File size: 6,668 Bytes
92d2175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""
Audio Tool - Transcribe audio với Groq Whisper API
"""

import os
import tempfile
import requests
from typing import Optional

def download_audio_file(task_id: str) -> Optional[str]:
    """
    Download audio file from API
    """
    try:
        api_url = "https://agents-course-unit4-scoring.hf.space"
        file_url = f"{api_url}/files/{task_id}"
        
        response = requests.get(file_url, timeout=30)
        if response.status_code == 200:
            # Determine file extension
            content_type = response.headers.get('content-type', '')
            if 'audio' in content_type:
                if 'mp3' in content_type:
                    suffix = '.mp3'
                elif 'wav' in content_type:
                    suffix = '.wav'
                elif 'ogg' in content_type:
                    suffix = '.ogg'
                elif 'm4a' in content_type:
                    suffix = '.m4a'
                else:
                    suffix = '.mp3'  # Default
            else:
                suffix = '.mp3'  # Default for unknown audio types
                
            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
                tmp_file.write(response.content)
                return tmp_file.name
        else:
            return None
    except Exception as e:
        print(f"Error downloading audio: {e}")
        return None

def transcribe_audio_groq(task_id: str = "", audio_path: str = "", language: str = "en") -> str:
    """
    Main function: Transcribe audio với Groq Whisper API - model whisper-large-v3
    
    Args:
        task_id: ID để download file từ API
        audio_path: Đường dẫn file audio local (nếu có)
        language: Ngôn ngữ transcription (default: "en")
        
    Returns:
        Transcribed text
    """
    target_audio_path = None
    
    try:
        # Initialize Groq client
        from groq import Groq
        groq_api_key = os.environ.get("GROQ_API_KEY")
        
        if not groq_api_key:
            return "Error: GROQ_API_KEY not found in environment variables"
        
        groq_client = Groq(api_key=groq_api_key)
        
        # Xác định đường dẫn audio
        if audio_path and os.path.exists(audio_path):
            target_audio_path = audio_path
        elif task_id:
            target_audio_path = download_audio_file(task_id)
            if not target_audio_path:
                return "Error: Could not download audio file"
        else:
            return "Error: No audio path or task_id provided"
        
        # Kiểm tra file audio tồn tại
        if not os.path.exists(target_audio_path):
            return "Error: Audio file not found"
        
        # Transcribe với Groq Whisper
        with open(target_audio_path, "rb") as audio_file:
            transcription = groq_client.audio.transcriptions.create(
                file=(os.path.basename(target_audio_path), audio_file.read()),
                model="whisper-large-v3",
                response_format="text",
                language=language,
                temperature=0.0  # Deterministic results
            )
        
        # Lấy kết quả
        if hasattr(transcription, 'text'):
            result = transcription.text
        else:
            result = str(transcription)
        
        # Cleanup downloaded file nếu cần
        if task_id and target_audio_path != audio_path:
            try:
                os.unlink(target_audio_path)
            except:
                pass
        
        return result.strip()
        
    except Exception as e:
        # Cleanup file nếu có lỗi
        if task_id and target_audio_path and target_audio_path != audio_path:
            try:
                os.unlink(target_audio_path)
            except:
                pass
        
        return f"Audio transcription error: {str(e)}"

def transcribe_audio_with_details(task_id: str = "", audio_path: str = "", language: str = "en") -> dict:
    """
    Transcribe audio với thêm chi tiết metadata
    
    Returns:
        Dict chứa transcription và metadata
    """
    try:
        # Lấy transcription
        text = transcribe_audio_groq(task_id, audio_path, language)
        
        # Metadata cơ bản
        metadata = {
            "model": "whisper-large-v3",
            "language": language,
            "provider": "groq"
        }
        
        # Nếu có file local, lấy thêm thông tin
        if audio_path and os.path.exists(audio_path):
            file_size = os.path.getsize(audio_path)
            metadata["file_size"] = file_size
            metadata["file_path"] = audio_path
        
        return {
            "transcription": text,
            "metadata": metadata,
            "success": not text.startswith("Error:")
        }
        
    except Exception as e:
        return {
            "transcription": f"Error: {str(e)}",
            "metadata": {},
            "success": False
        }

# Fallback function nếu Groq không khả dụng
def fallback_audio_info(task_id: str = "", audio_path: str = "") -> str:
    """
    Fallback function khi không thể transcribe audio
    """
    try:
        target_audio_path = None
        
        if audio_path and os.path.exists(audio_path):
            target_audio_path = audio_path
        elif task_id:
            target_audio_path = download_audio_file(task_id)
            if not target_audio_path:
                return "Error: Could not download audio file"
        else:
            return "Error: No audio path or task_id provided"
        
        # Basic file info
        file_size = os.path.getsize(target_audio_path)
        result = f"Audio file detected - Size: {file_size} bytes. Groq transcription not available. Please describe the audio content."
        
        # Cleanup
        if task_id and target_audio_path != audio_path:
            try:
                os.unlink(target_audio_path)
            except:
                pass
                
        return result
        
    except Exception as e:
        return f"Audio processing error: {str(e)}"

# Test function
if __name__ == "__main__":
    # Test với file audio local (nếu có)
    test_audio = "/path/to/test/audio.mp3"
    if os.path.exists(test_audio):
        result = transcribe_audio_groq(audio_path=test_audio)
        print("Transcription Result:", result)
    else:
        print("No test audio found")
        
        # Test với task_id (cần API key)
        # result = transcribe_audio_groq(task_id="some_task_id")
        # print("Transcription Result:", result)