Spaces:

Lum4yx
/

mcp-sentiment

Sleeping

App Files Files Community

mcp-sentiment / app.py

Lum4yx

Update app.py

68728a0 verified about 2 months ago

raw

history blame contribute delete

3.96 kB

	import gradio as gr
	from textblob import TextBlob
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import torch
	import numpy as np
	import os
	import glob

	# 1. Set up device and data type for optimized performance
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# 2. Define the model ID for the Whisper model
	model_id = "openai/whisper-small"

	# 3. Load the model from pretrained weights
	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
	)
	model.to(device)

	# 4. Load the processor which includes the feature extractor and tokenizer
	processor = AutoProcessor.from_pretrained(model_id)

	# 5. Create the ASR pipeline with the loaded components
	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	torch_dtype=torch_dtype,
	device=device,
	)

	def sentiment_analysis(text: str) -> dict:
	"""
	Analyze the sentiment of the given text.
	"""
	blob = TextBlob(text)
	sentiment = blob.sentiment
	return {
	"transcript": text,
	"polarity": round(sentiment.polarity, 2),
	"subjectivity": round(sentiment.subjectivity, 2),
	"assessment": "positive" if sentiment.polarity > 0 else "negative" if sentiment.polarity < 0 else "neutral"
	}

	# NEW: Simplified main function to process audio from a NumPy array
	def analyze_audio(audio: tuple) -> dict:
	"""
	Processes audio data from a NumPy array, transcribes it, and analyzes its sentiment.
	Gradio provides the audio as a tuple (sample_rate, data).
	"""
	if audio is None:
	return {"error": "No audio provided. Please upload, record, or select an example."}

	# Unpack the audio tuple
	sample_rate, audio_data = audio

	# Convert the audio data to the format the model expects (float32)
	audio_float32 = audio_data.astype(np.float32) / 32768.0

	try:
	# Transcribe the audio
	transcription_result = pipe(audio_float32)
	transcript_text = transcription_result["text"].strip()

	if not transcript_text:
	return {"error": "Transcription failed or audio was silent."}

	except Exception as e:
	return {"error": f"Failed to transcribe audio: {str(e)}"}

	# Perform sentiment analysis on the transcript
	return sentiment_analysis(transcript_text)


	# --- Code to find and load examples ---
	examples_dir = "examples"
	if not os.path.exists(examples_dir):
	os.makedirs(examples_dir)
	print(f"Created '{examples_dir}/' directory. Please add your audio examples there.")

	example_files = (
	glob.glob(os.path.join(examples_dir, "*.wav")) +
	glob.glob(os.path.join(examples_dir, "*.mp3")) +
	glob.glob(os.path.join(examples_dir, "*.flac"))
	)
	examples_list = [[file] for file in example_files]
	# --- End of example loading ---


	# Create the Gradio interface
	demo = gr.Interface(
	fn=analyze_audio, # CHANGED: Point to the new, simplified function
	inputs=gr.Audio(type="numpy", label="Upload Audio File or Record"), # CHANGED: type="numpy"
	outputs=gr.JSON(label="Analysis Result"),
	title="🎙️ Audio Sentiment Analysis (Whisper Small)",
	description="Analyze the sentiment of spoken words. Upload an audio file, record directly, or click an example below.",
	examples=examples_list,
	article="""
	### How it Works
	This tool uses OpenAI's Whisper Small model to transcribe audio into text.
	Then, TextBlob is used to perform sentiment analysis on the resulting transcript.
	By using `type="numpy"`, the interface directly processes audio data, making it more reliable.
	""",
	theme='huggingface'
	)

	# Launch the interface
	if __name__ == "__main__":
	demo.launch(mcp_server=True)