Spaces:

tuanhqv123
/

final_agent_course

Running

App Files Files Community

final_agent_course / utils /audio_tool.py

tuan3335

structure code

92d2175 6 months ago

raw

history blame

6.67 kB

	"""
	Audio Tool - Transcribe audio với Groq Whisper API
	"""

	import os
	import tempfile
	import requests
	from typing import Optional

	def download_audio_file(task_id: str) -> Optional[str]:
	"""
	Download audio file from API
	"""
	try:
	api_url = "https://agents-course-unit4-scoring.hf.space"
	file_url = f"{api_url}/files/{task_id}"

	response = requests.get(file_url, timeout=30)
	if response.status_code == 200:
	# Determine file extension
	content_type = response.headers.get('content-type', '')
	if 'audio' in content_type:
	if 'mp3' in content_type:
	suffix = '.mp3'
	elif 'wav' in content_type:
	suffix = '.wav'
	elif 'ogg' in content_type:
	suffix = '.ogg'
	elif 'm4a' in content_type:
	suffix = '.m4a'
	else:
	suffix = '.mp3' # Default
	else:
	suffix = '.mp3' # Default for unknown audio types

	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
	tmp_file.write(response.content)
	return tmp_file.name
	else:
	return None
	except Exception as e:
	print(f"Error downloading audio: {e}")
	return None

	def transcribe_audio_groq(task_id: str = "", audio_path: str = "", language: str = "en") -> str:
	"""
	Main function: Transcribe audio với Groq Whisper API - model whisper-large-v3

	Args:
	task_id: ID để download file từ API
	audio_path: Đường dẫn file audio local (nếu có)
	language: Ngôn ngữ transcription (default: "en")

	Returns:
	Transcribed text
	"""
	target_audio_path = None

	try:
	# Initialize Groq client
	from groq import Groq
	groq_api_key = os.environ.get("GROQ_API_KEY")

	if not groq_api_key:
	return "Error: GROQ_API_KEY not found in environment variables"

	groq_client = Groq(api_key=groq_api_key)

	# Xác định đường dẫn audio
	if audio_path and os.path.exists(audio_path):
	target_audio_path = audio_path
	elif task_id:
	target_audio_path = download_audio_file(task_id)
	if not target_audio_path:
	return "Error: Could not download audio file"
	else:
	return "Error: No audio path or task_id provided"

	# Kiểm tra file audio tồn tại
	if not os.path.exists(target_audio_path):
	return "Error: Audio file not found"

	# Transcribe với Groq Whisper
	with open(target_audio_path, "rb") as audio_file:
	transcription = groq_client.audio.transcriptions.create(
	file=(os.path.basename(target_audio_path), audio_file.read()),
	model="whisper-large-v3",
	response_format="text",
	language=language,
	temperature=0.0 # Deterministic results
	)

	# Lấy kết quả
	if hasattr(transcription, 'text'):
	result = transcription.text
	else:
	result = str(transcription)

	# Cleanup downloaded file nếu cần
	if task_id and target_audio_path != audio_path:
	try:
	os.unlink(target_audio_path)
	except:
	pass

	return result.strip()

	except Exception as e:
	# Cleanup file nếu có lỗi
	if task_id and target_audio_path and target_audio_path != audio_path:
	try:
	os.unlink(target_audio_path)
	except:
	pass

	return f"Audio transcription error: {str(e)}"

	def transcribe_audio_with_details(task_id: str = "", audio_path: str = "", language: str = "en") -> dict:
	"""
	Transcribe audio với thêm chi tiết metadata

	Returns:
	Dict chứa transcription và metadata
	"""
	try:
	# Lấy transcription
	text = transcribe_audio_groq(task_id, audio_path, language)

	# Metadata cơ bản
	metadata = {
	"model": "whisper-large-v3",
	"language": language,
	"provider": "groq"
	}

	# Nếu có file local, lấy thêm thông tin
	if audio_path and os.path.exists(audio_path):
	file_size = os.path.getsize(audio_path)
	metadata["file_size"] = file_size
	metadata["file_path"] = audio_path

	return {
	"transcription": text,
	"metadata": metadata,
	"success": not text.startswith("Error:")
	}

	except Exception as e:
	return {
	"transcription": f"Error: {str(e)}",
	"metadata": {},
	"success": False
	}

	# Fallback function nếu Groq không khả dụng
	def fallback_audio_info(task_id: str = "", audio_path: str = "") -> str:
	"""
	Fallback function khi không thể transcribe audio
	"""
	try:
	target_audio_path = None

	if audio_path and os.path.exists(audio_path):
	target_audio_path = audio_path
	elif task_id:
	target_audio_path = download_audio_file(task_id)
	if not target_audio_path:
	return "Error: Could not download audio file"
	else:
	return "Error: No audio path or task_id provided"

	# Basic file info
	file_size = os.path.getsize(target_audio_path)
	result = f"Audio file detected - Size: {file_size} bytes. Groq transcription not available. Please describe the audio content."

	# Cleanup
	if task_id and target_audio_path != audio_path:
	try:
	os.unlink(target_audio_path)
	except:
	pass

	return result

	except Exception as e:
	return f"Audio processing error: {str(e)}"

	# Test function
	if __name__ == "__main__":
	# Test với file audio local (nếu có)
	test_audio = "/path/to/test/audio.mp3"
	if os.path.exists(test_audio):
	result = transcribe_audio_groq(audio_path=test_audio)
	print("Transcription Result:", result)
	else:
	print("No test audio found")

	# Test với task_id (cần API key)
	# result = transcribe_audio_groq(task_id="some_task_id")
	# print("Transcription Result:", result)