Spaces:

tuanhqv123
/

final_agent_course

Running

File size: 6,668 Bytes

92d2175

"""
Audio Tool - Transcribe audio với Groq Whisper API
"""

import os
import tempfile
import requests
from typing import Optional

def download_audio_file(task_id: str) -> Optional[str]:
    """
    Download audio file from API
    """
    try:
        api_url = "https://agents-course-unit4-scoring.hf.space"
        file_url = f"{api_url}/files/{task_id}"
        
        response = requests.get(file_url, timeout=30)
        if response.status_code == 200:
            # Determine file extension
            content_type = response.headers.get('content-type', '')
            if 'audio' in content_type:
                if 'mp3' in content_type:
                    suffix = '.mp3'
                elif 'wav' in content_type:
                    suffix = '.wav'
                elif 'ogg' in content_type:
                    suffix = '.ogg'
                elif 'm4a' in content_type:
                    suffix = '.m4a'
                else:
                    suffix = '.mp3'  # Default
            else:
                suffix = '.mp3'  # Default for unknown audio types
                
            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
                tmp_file.write(response.content)
                return tmp_file.name
        else:
            return None
    except Exception as e:
        print(f"Error downloading audio: {e}")
        return None

def transcribe_audio_groq(task_id: str = "", audio_path: str = "", language: str = "en") -> str:
    """
    Main function: Transcribe audio với Groq Whisper API - model whisper-large-v3
    
    Args:
        task_id: ID để download file từ API
        audio_path: Đường dẫn file audio local (nếu có)
        language: Ngôn ngữ transcription (default: "en")
        
    Returns:
        Transcribed text
    """
    target_audio_path = None
    
    try:
        # Initialize Groq client
        from groq import Groq
        groq_api_key = os.environ.get("GROQ_API_KEY")
        
        if not groq_api_key:
            return "Error: GROQ_API_KEY not found in environment variables"
        
        groq_client = Groq(api_key=groq_api_key)
        
        # Xác định đường dẫn audio
        if audio_path and os.path.exists(audio_path):
            target_audio_path = audio_path
        elif task_id:
            target_audio_path = download_audio_file(task_id)
            if not target_audio_path:
                return "Error: Could not download audio file"
        else:
            return "Error: No audio path or task_id provided"
        
        # Kiểm tra file audio tồn tại
        if not os.path.exists(target_audio_path):
            return "Error: Audio file not found"
        
        # Transcribe với Groq Whisper
        with open(target_audio_path, "rb") as audio_file:
            transcription = groq_client.audio.transcriptions.create(
                file=(os.path.basename(target_audio_path), audio_file.read()),
                model="whisper-large-v3",
                response_format="text",
                language=language,
                temperature=0.0  # Deterministic results
            )
        
        # Lấy kết quả
        if hasattr(transcription, 'text'):
            result = transcription.text
        else:
            result = str(transcription)
        
        # Cleanup downloaded file nếu cần
        if task_id and target_audio_path != audio_path:
            try:
                os.unlink(target_audio_path)
            except:
                pass
        
        return result.strip()
        
    except Exception as e:
        # Cleanup file nếu có lỗi
        if task_id and target_audio_path and target_audio_path != audio_path:
            try:
                os.unlink(target_audio_path)
            except:
                pass
        
        return f"Audio transcription error: {str(e)}"

def transcribe_audio_with_details(task_id: str = "", audio_path: str = "", language: str = "en") -> dict:
    """
    Transcribe audio với thêm chi tiết metadata
    
    Returns:
        Dict chứa transcription và metadata
    """
    try:
        # Lấy transcription
        text = transcribe_audio_groq(task_id, audio_path, language)
        
        # Metadata cơ bản
        metadata = {
            "model": "whisper-large-v3",
            "language": language,
            "provider": "groq"
        }
        
        # Nếu có file local, lấy thêm thông tin
        if audio_path and os.path.exists(audio_path):
            file_size = os.path.getsize(audio_path)
            metadata["file_size"] = file_size
            metadata["file_path"] = audio_path
        
        return {
            "transcription": text,
            "metadata": metadata,
            "success": not text.startswith("Error:")
        }
        
    except Exception as e:
        return {
            "transcription": f"Error: {str(e)}",
            "metadata": {},
            "success": False
        }

# Fallback function nếu Groq không khả dụng
def fallback_audio_info(task_id: str = "", audio_path: str = "") -> str:
    """
    Fallback function khi không thể transcribe audio
    """
    try:
        target_audio_path = None
        
        if audio_path and os.path.exists(audio_path):
            target_audio_path = audio_path
        elif task_id:
            target_audio_path = download_audio_file(task_id)
            if not target_audio_path:
                return "Error: Could not download audio file"
        else:
            return "Error: No audio path or task_id provided"
        
        # Basic file info
        file_size = os.path.getsize(target_audio_path)
        result = f"Audio file detected - Size: {file_size} bytes. Groq transcription not available. Please describe the audio content."
        
        # Cleanup
        if task_id and target_audio_path != audio_path:
            try:
                os.unlink(target_audio_path)
            except:
                pass
                
        return result
        
    except Exception as e:
        return f"Audio processing error: {str(e)}"

# Test function
if __name__ == "__main__":
    # Test với file audio local (nếu có)
    test_audio = "/path/to/test/audio.mp3"
    if os.path.exists(test_audio):
        result = transcribe_audio_groq(audio_path=test_audio)
        print("Transcription Result:", result)
    else:
        print("No test audio found")
        
        # Test với task_id (cần API key)
        # result = transcribe_audio_groq(task_id="some_task_id")
        # print("Transcription Result:", result)