Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from textblob import TextBlob | |
| from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
| import torch | |
| import numpy as np | |
| import os | |
| import glob | |
| # 1. Set up device and data type for optimized performance | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| # 2. Define the model ID for the Whisper model | |
| model_id = "openai/whisper-small" | |
| # 3. Load the model from pretrained weights | |
| model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
| model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True | |
| ) | |
| model.to(device) | |
| # 4. Load the processor which includes the feature extractor and tokenizer | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| # 5. Create the ASR pipeline with the loaded components | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=model, | |
| tokenizer=processor.tokenizer, | |
| feature_extractor=processor.feature_extractor, | |
| max_new_tokens=128, | |
| torch_dtype=torch_dtype, | |
| device=device, | |
| ) | |
| def sentiment_analysis(text: str) -> dict: | |
| """ | |
| Analyze the sentiment of the given text. | |
| """ | |
| blob = TextBlob(text) | |
| sentiment = blob.sentiment | |
| return { | |
| "transcript": text, | |
| "polarity": round(sentiment.polarity, 2), | |
| "subjectivity": round(sentiment.subjectivity, 2), | |
| "assessment": "positive" if sentiment.polarity > 0 else "negative" if sentiment.polarity < 0 else "neutral" | |
| } | |
| # NEW: Simplified main function to process audio from a NumPy array | |
| def analyze_audio(audio: tuple) -> dict: | |
| """ | |
| Processes audio data from a NumPy array, transcribes it, and analyzes its sentiment. | |
| Gradio provides the audio as a tuple (sample_rate, data). | |
| """ | |
| if audio is None: | |
| return {"error": "No audio provided. Please upload, record, or select an example."} | |
| # Unpack the audio tuple | |
| sample_rate, audio_data = audio | |
| # Convert the audio data to the format the model expects (float32) | |
| audio_float32 = audio_data.astype(np.float32) / 32768.0 | |
| try: | |
| # Transcribe the audio | |
| transcription_result = pipe(audio_float32) | |
| transcript_text = transcription_result["text"].strip() | |
| if not transcript_text: | |
| return {"error": "Transcription failed or audio was silent."} | |
| except Exception as e: | |
| return {"error": f"Failed to transcribe audio: {str(e)}"} | |
| # Perform sentiment analysis on the transcript | |
| return sentiment_analysis(transcript_text) | |
| # --- Code to find and load examples --- | |
| examples_dir = "examples" | |
| if not os.path.exists(examples_dir): | |
| os.makedirs(examples_dir) | |
| print(f"Created '{examples_dir}/' directory. Please add your audio examples there.") | |
| example_files = ( | |
| glob.glob(os.path.join(examples_dir, "*.wav")) + | |
| glob.glob(os.path.join(examples_dir, "*.mp3")) + | |
| glob.glob(os.path.join(examples_dir, "*.flac")) | |
| ) | |
| examples_list = [[file] for file in example_files] | |
| # --- End of example loading --- | |
| # Create the Gradio interface | |
| demo = gr.Interface( | |
| fn=analyze_audio, # CHANGED: Point to the new, simplified function | |
| inputs=gr.Audio(type="numpy", label="Upload Audio File or Record"), # CHANGED: type="numpy" | |
| outputs=gr.JSON(label="Analysis Result"), | |
| title="๐๏ธ Audio Sentiment Analysis (Whisper Small)", | |
| description="Analyze the sentiment of spoken words. Upload an audio file, record directly, or click an example below.", | |
| examples=examples_list, | |
| article=""" | |
| ### How it Works | |
| This tool uses OpenAI's **Whisper Small** model to transcribe audio into text. | |
| Then, **TextBlob** is used to perform sentiment analysis on the resulting transcript. | |
| By using `type="numpy"`, the interface directly processes audio data, making it more reliable. | |
| """, | |
| theme='huggingface' | |
| ) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) |