File size: 4,056 Bytes
e900bec 868d5da c28aac9 e900bec 868d5da e900bec c5ba0eb e900bec 180dd60 e900bec 180dd60 e900bec 868d5da e900bec 180dd60 e900bec dd39e3f c28aac9 e900bec c28aac9 e900bec c28aac9 e900bec c28aac9 e900bec c28aac9 e900bec c28aac9 2e9d189 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# Clone: https://huggingface.co/spaces/Lum4yx/mcp-sentiment/blob/main/app.py
import gradio as gr
from textblob import TextBlob
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import torch
import numpy as np
import os
import glob
# 1. Set up device and data type for optimized performance
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
# 2. Define the model ID for the Whisper model
model_id = "onnx-community/whisper-podlodka-turbo-ONNX"
# 3. Load the model from pretrained weights
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
# 4. Load the processor which includes the feature extractor and tokenizer
processor = AutoProcessor.from_pretrained(model_id)
# 5. Create the ASR pipeline with the loaded components
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
torch_dtype=torch_dtype,
device=device,
)
def sentiment_analysis(text: str) -> dict:
"""
Analyze the sentiment of the given text.
"""
blob = TextBlob(text)
sentiment = blob.sentiment
return {
"transcript": text,
"polarity": round(sentiment.polarity, 2),
"subjectivity": round(sentiment.subjectivity, 2),
"assessment": "positive" if sentiment.polarity > 0 else "negative" if sentiment.polarity < 0 else "neutral"
}
# NEW: Simplified main function to process audio from a NumPy array
def analyze_audio(audio: tuple) -> dict:
"""
Processes audio data from a NumPy array, transcribes it, and analyzes its sentiment.
Gradio provides the audio as a tuple (sample_rate, data).
"""
if audio is None:
return {"error": "No audio provided. Please upload, record, or select an example."}
# Unpack the audio tuple
sample_rate, audio_data = audio
# Convert the audio data to the format the model expects (float32)
audio_float32 = audio_data.astype(np.float32) / 32768.0
try:
# Transcribe the audio
transcription_result = pipe(audio_float32)
transcript_text = transcription_result["text"].strip()
if not transcript_text:
return {"error": "Transcription failed or audio was silent."}
except Exception as e:
return {"error": f"Failed to transcribe audio: {str(e)}"}
# Perform sentiment analysis on the transcript
return sentiment_analysis(transcript_text)
# --- Code to find and load examples ---
examples_dir = "examples"
if not os.path.exists(examples_dir):
os.makedirs(examples_dir)
print(f"Created '{examples_dir}/' directory. Please add your audio examples there.")
example_files = (
glob.glob(os.path.join(examples_dir, "*.wav")) +
glob.glob(os.path.join(examples_dir, "*.mp3")) +
glob.glob(os.path.join(examples_dir, "*.flac"))
)
examples_list = [[file] for file in example_files]
# --- End of example loading ---
# Create the Gradio interface
demo = gr.Interface(
fn=analyze_audio, # CHANGED: Point to the new, simplified function
inputs=gr.Audio(type="numpy", label="Upload Audio File or Record"), # CHANGED: type="numpy"
outputs=gr.JSON(label="Analysis Result"),
title="🎙️ Audio Sentiment Analysis (Whisper Small)",
description="Analyze the sentiment of spoken words. Upload an audio file, record directly, or click an example below.",
examples=examples_list,
article="""
### How it Works
This tool uses OpenAI's **Whisper Small** model to transcribe audio into text.
Then, **TextBlob** is used to perform sentiment analysis on the resulting transcript.
By using `type="numpy"`, the interface directly processes audio data, making it more reliable.
""",
theme='huggingface'
)
# Launch the interface
if __name__ == "__main__":
demo.launch(mcp_server=True) |