Spaces:

Lum4yx
/

mcp-sentiment

Sleeping

App Files Files Community

mcp-sentiment / app.py

Lum4yx

Update app.py

d1e4dc7 verified 3 months ago

raw

history blame

4.95 kB

	import gradio as gr
	from textblob import TextBlob
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import torch
	import base64
	import numpy as np
	import ffmpeg

	# 1. Set up device and data type for optimized performance
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# 2. Define the model ID for the large Whisper model
	model_id = "openai/whisper-small"

	# 3. Load the model from pretrained weights
	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
	)
	model.to(device)

	# 4. Load the processor which includes the feature extractor and tokenizer
	processor = AutoProcessor.from_pretrained(model_id)

	# 5. Create the ASR pipeline with the loaded components
	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	torch_dtype=torch_dtype,
	device=device,
	)

	def sentiment_analysis(text: str) -> dict:
	"""
	Analyze the sentiment of the given text. (This function is unchanged)
	"""
	blob = TextBlob(text)
	sentiment = blob.sentiment

	return {
	"transcript": text,
	"polarity": round(sentiment.polarity, 2),
	"subjectivity": round(sentiment.subjectivity, 2),
	"assessment": "positive" if sentiment.polarity > 0 else "negative" if sentiment.polarity < 0 else "neutral"
	}

	def process_base64_audio(base64_data_uri: str) -> dict:
	"""
	Decodes a Base64 audio data URI, processes it in-memory,
	transcribes it using a Hugging Face Whisper pipeline, and then analyzes its sentiment.

	Args:
	base64_data_uri (str): A string in data URI format (e.g., "data:audio/wav;base64,UklGRi...").

	Returns:
	dict: The sentiment analysis result or an error message.
	"""
	if not base64_data_uri or "base64," not in base64_data_uri:
	return {"error": "Invalid or empty Base64 data URI provided."}

	try:
	# Parse the data URI to extract the Base64 encoded data
	_, encoded_data = base64_data_uri.split(',', 1)

	# Decode the Base64 string into binary audio data
	audio_data = base64.b64decode(encoded_data)

	# Use ffmpeg to convert the in-memory audio data to a raw PCM buffer.
	# The pipeline expects a 16kHz mono audio stream.
	out, _ = (
	ffmpeg
	.input('pipe:0')
	.output('pipe:1', format='s16le', ac=1, ar=16000)
	.run(input=audio_data, capture_stdout=True, capture_stderr=True)
	)

	# Convert the raw PCM buffer to a NumPy array of 32-bit floats.
	audio_np = np.frombuffer(out, np.int16).astype(np.float32) / 32768.0

	# Transcribe the audio from the NumPy array using the HF pipeline
	transcription_result = pipe(audio_np)
	transcript_text = transcription_result["text"]

	except Exception as e:
	# Capture potential errors from ffmpeg or the model
	return {"error": f"Failed to process audio: {str(e)}"}

	# Perform sentiment analysis on the transcribed text
	return sentiment_analysis(transcript_text)


	# Create the Gradio interface with the Hugging Face theme
	demo = gr.Interface(
	fn=process_base64_audio,
	# The input remains a Textbox to accept the raw Base64 string from the API client
	inputs=gr.Textbox(lines=5, placeholder="Paste your Base64 encoded audio data URI here...", label="Base64 Audio Input"),
	outputs=gr.JSON(label="Analysis Result"),
	title="🎙️ Audio Sentiment Analysis (Whisper Large v3)",
	description="""
	Analyze the sentiment of spoken words.
	This tool accepts a Base64 encoded audio data URI, transcribes the audio in-memory using the `openai/whisper-large-v3` model,
	and performs sentiment analysis on the text with TextBlob.
	""",
	examples=[
	["data:audio/wav;base64,UklGRiQ...<placeholder_for_a_short_positive_clip>"],
	["data:audio/wav;base64,UklGRiQ...<placeholder_for_a_short_negative_clip>"]
	],
	article="""
	### How to get a Base64 Audio URI?
	You can use an online converter or a script (like the provided `test_client.py`) to convert a short audio file (e.g., .wav or .mp3) into a Base64 data URI.
	The format must be `data:audio/[format];base64,[encoded_string]`.
	""",
	theme='huggingface' # This applies the new theme
	)

	# Launch the interface and MCP server
	if __name__ == "__main__":
	# You will need to have ffmpeg installed on your system for this to work.
	# You also need to install the required python packages. This model is large and requires significant resources.
	# pip install gradio textblob "transformers[torch]" accelerate safetensors ffmpeg-python numpy
	demo.launch(mcp_server=True)