Spaces:
Running
Running
| """ | |
| Audio Tool - Transcribe audio với Groq Whisper API | |
| """ | |
| import os | |
| import tempfile | |
| import requests | |
| from typing import Optional | |
| def download_audio_file(task_id: str) -> Optional[str]: | |
| """ | |
| Download audio file from API | |
| """ | |
| try: | |
| api_url = "https://agents-course-unit4-scoring.hf.space" | |
| file_url = f"{api_url}/files/{task_id}" | |
| response = requests.get(file_url, timeout=30) | |
| if response.status_code == 200: | |
| # Determine file extension | |
| content_type = response.headers.get('content-type', '') | |
| if 'audio' in content_type: | |
| if 'mp3' in content_type: | |
| suffix = '.mp3' | |
| elif 'wav' in content_type: | |
| suffix = '.wav' | |
| elif 'ogg' in content_type: | |
| suffix = '.ogg' | |
| elif 'm4a' in content_type: | |
| suffix = '.m4a' | |
| else: | |
| suffix = '.mp3' # Default | |
| else: | |
| suffix = '.mp3' # Default for unknown audio types | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file: | |
| tmp_file.write(response.content) | |
| return tmp_file.name | |
| else: | |
| return None | |
| except Exception as e: | |
| print(f"Error downloading audio: {e}") | |
| return None | |
| def transcribe_audio_groq(task_id: str = "", audio_path: str = "", language: str = "en") -> str: | |
| """ | |
| Main function: Transcribe audio với Groq Whisper API - model whisper-large-v3 | |
| Args: | |
| task_id: ID để download file từ API | |
| audio_path: Đường dẫn file audio local (nếu có) | |
| language: Ngôn ngữ transcription (default: "en") | |
| Returns: | |
| Transcribed text | |
| """ | |
| target_audio_path = None | |
| try: | |
| # Initialize Groq client | |
| from groq import Groq | |
| groq_api_key = os.environ.get("GROQ_API_KEY") | |
| if not groq_api_key: | |
| return "Error: GROQ_API_KEY not found in environment variables" | |
| groq_client = Groq(api_key=groq_api_key) | |
| # Xác định đường dẫn audio | |
| if audio_path and os.path.exists(audio_path): | |
| target_audio_path = audio_path | |
| elif task_id: | |
| target_audio_path = download_audio_file(task_id) | |
| if not target_audio_path: | |
| return "Error: Could not download audio file" | |
| else: | |
| return "Error: No audio path or task_id provided" | |
| # Kiểm tra file audio tồn tại | |
| if not os.path.exists(target_audio_path): | |
| return "Error: Audio file not found" | |
| # Transcribe với Groq Whisper | |
| with open(target_audio_path, "rb") as audio_file: | |
| transcription = groq_client.audio.transcriptions.create( | |
| file=(os.path.basename(target_audio_path), audio_file.read()), | |
| model="whisper-large-v3", | |
| response_format="text", | |
| language=language, | |
| temperature=0.0 # Deterministic results | |
| ) | |
| # Lấy kết quả | |
| if hasattr(transcription, 'text'): | |
| result = transcription.text | |
| else: | |
| result = str(transcription) | |
| # Cleanup downloaded file nếu cần | |
| if task_id and target_audio_path != audio_path: | |
| try: | |
| os.unlink(target_audio_path) | |
| except: | |
| pass | |
| return result.strip() | |
| except Exception as e: | |
| # Cleanup file nếu có lỗi | |
| if task_id and target_audio_path and target_audio_path != audio_path: | |
| try: | |
| os.unlink(target_audio_path) | |
| except: | |
| pass | |
| return f"Audio transcription error: {str(e)}" | |
| def transcribe_audio_with_details(task_id: str = "", audio_path: str = "", language: str = "en") -> dict: | |
| """ | |
| Transcribe audio với thêm chi tiết metadata | |
| Returns: | |
| Dict chứa transcription và metadata | |
| """ | |
| try: | |
| # Lấy transcription | |
| text = transcribe_audio_groq(task_id, audio_path, language) | |
| # Metadata cơ bản | |
| metadata = { | |
| "model": "whisper-large-v3", | |
| "language": language, | |
| "provider": "groq" | |
| } | |
| # Nếu có file local, lấy thêm thông tin | |
| if audio_path and os.path.exists(audio_path): | |
| file_size = os.path.getsize(audio_path) | |
| metadata["file_size"] = file_size | |
| metadata["file_path"] = audio_path | |
| return { | |
| "transcription": text, | |
| "metadata": metadata, | |
| "success": not text.startswith("Error:") | |
| } | |
| except Exception as e: | |
| return { | |
| "transcription": f"Error: {str(e)}", | |
| "metadata": {}, | |
| "success": False | |
| } | |
| # Fallback function nếu Groq không khả dụng | |
| def fallback_audio_info(task_id: str = "", audio_path: str = "") -> str: | |
| """ | |
| Fallback function khi không thể transcribe audio | |
| """ | |
| try: | |
| target_audio_path = None | |
| if audio_path and os.path.exists(audio_path): | |
| target_audio_path = audio_path | |
| elif task_id: | |
| target_audio_path = download_audio_file(task_id) | |
| if not target_audio_path: | |
| return "Error: Could not download audio file" | |
| else: | |
| return "Error: No audio path or task_id provided" | |
| # Basic file info | |
| file_size = os.path.getsize(target_audio_path) | |
| result = f"Audio file detected - Size: {file_size} bytes. Groq transcription not available. Please describe the audio content." | |
| # Cleanup | |
| if task_id and target_audio_path != audio_path: | |
| try: | |
| os.unlink(target_audio_path) | |
| except: | |
| pass | |
| return result | |
| except Exception as e: | |
| return f"Audio processing error: {str(e)}" | |
| # Test function | |
| if __name__ == "__main__": | |
| # Test với file audio local (nếu có) | |
| test_audio = "/path/to/test/audio.mp3" | |
| if os.path.exists(test_audio): | |
| result = transcribe_audio_groq(audio_path=test_audio) | |
| print("Transcription Result:", result) | |
| else: | |
| print("No test audio found") | |
| # Test với task_id (cần API key) | |
| # result = transcribe_audio_groq(task_id="some_task_id") | |
| # print("Transcription Result:", result) |