final_agent_course / utils /audio_tool.py
tuan3335's picture
structure code
92d2175
raw
history blame
6.67 kB
"""
Audio Tool - Transcribe audio với Groq Whisper API
"""
import os
import tempfile
import requests
from typing import Optional
def download_audio_file(task_id: str) -> Optional[str]:
"""
Download audio file from API
"""
try:
api_url = "https://agents-course-unit4-scoring.hf.space"
file_url = f"{api_url}/files/{task_id}"
response = requests.get(file_url, timeout=30)
if response.status_code == 200:
# Determine file extension
content_type = response.headers.get('content-type', '')
if 'audio' in content_type:
if 'mp3' in content_type:
suffix = '.mp3'
elif 'wav' in content_type:
suffix = '.wav'
elif 'ogg' in content_type:
suffix = '.ogg'
elif 'm4a' in content_type:
suffix = '.m4a'
else:
suffix = '.mp3' # Default
else:
suffix = '.mp3' # Default for unknown audio types
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
tmp_file.write(response.content)
return tmp_file.name
else:
return None
except Exception as e:
print(f"Error downloading audio: {e}")
return None
def transcribe_audio_groq(task_id: str = "", audio_path: str = "", language: str = "en") -> str:
"""
Main function: Transcribe audio với Groq Whisper API - model whisper-large-v3
Args:
task_id: ID để download file từ API
audio_path: Đường dẫn file audio local (nếu có)
language: Ngôn ngữ transcription (default: "en")
Returns:
Transcribed text
"""
target_audio_path = None
try:
# Initialize Groq client
from groq import Groq
groq_api_key = os.environ.get("GROQ_API_KEY")
if not groq_api_key:
return "Error: GROQ_API_KEY not found in environment variables"
groq_client = Groq(api_key=groq_api_key)
# Xác định đường dẫn audio
if audio_path and os.path.exists(audio_path):
target_audio_path = audio_path
elif task_id:
target_audio_path = download_audio_file(task_id)
if not target_audio_path:
return "Error: Could not download audio file"
else:
return "Error: No audio path or task_id provided"
# Kiểm tra file audio tồn tại
if not os.path.exists(target_audio_path):
return "Error: Audio file not found"
# Transcribe với Groq Whisper
with open(target_audio_path, "rb") as audio_file:
transcription = groq_client.audio.transcriptions.create(
file=(os.path.basename(target_audio_path), audio_file.read()),
model="whisper-large-v3",
response_format="text",
language=language,
temperature=0.0 # Deterministic results
)
# Lấy kết quả
if hasattr(transcription, 'text'):
result = transcription.text
else:
result = str(transcription)
# Cleanup downloaded file nếu cần
if task_id and target_audio_path != audio_path:
try:
os.unlink(target_audio_path)
except:
pass
return result.strip()
except Exception as e:
# Cleanup file nếu có lỗi
if task_id and target_audio_path and target_audio_path != audio_path:
try:
os.unlink(target_audio_path)
except:
pass
return f"Audio transcription error: {str(e)}"
def transcribe_audio_with_details(task_id: str = "", audio_path: str = "", language: str = "en") -> dict:
"""
Transcribe audio với thêm chi tiết metadata
Returns:
Dict chứa transcription và metadata
"""
try:
# Lấy transcription
text = transcribe_audio_groq(task_id, audio_path, language)
# Metadata cơ bản
metadata = {
"model": "whisper-large-v3",
"language": language,
"provider": "groq"
}
# Nếu có file local, lấy thêm thông tin
if audio_path and os.path.exists(audio_path):
file_size = os.path.getsize(audio_path)
metadata["file_size"] = file_size
metadata["file_path"] = audio_path
return {
"transcription": text,
"metadata": metadata,
"success": not text.startswith("Error:")
}
except Exception as e:
return {
"transcription": f"Error: {str(e)}",
"metadata": {},
"success": False
}
# Fallback function nếu Groq không khả dụng
def fallback_audio_info(task_id: str = "", audio_path: str = "") -> str:
"""
Fallback function khi không thể transcribe audio
"""
try:
target_audio_path = None
if audio_path and os.path.exists(audio_path):
target_audio_path = audio_path
elif task_id:
target_audio_path = download_audio_file(task_id)
if not target_audio_path:
return "Error: Could not download audio file"
else:
return "Error: No audio path or task_id provided"
# Basic file info
file_size = os.path.getsize(target_audio_path)
result = f"Audio file detected - Size: {file_size} bytes. Groq transcription not available. Please describe the audio content."
# Cleanup
if task_id and target_audio_path != audio_path:
try:
os.unlink(target_audio_path)
except:
pass
return result
except Exception as e:
return f"Audio processing error: {str(e)}"
# Test function
if __name__ == "__main__":
# Test với file audio local (nếu có)
test_audio = "/path/to/test/audio.mp3"
if os.path.exists(test_audio):
result = transcribe_audio_groq(audio_path=test_audio)
print("Transcription Result:", result)
else:
print("No test audio found")
# Test với task_id (cần API key)
# result = transcribe_audio_groq(task_id="some_task_id")
# print("Transcription Result:", result)