File size: 3,804 Bytes
a60f448
0e33469
0047a1f
0e33469
 
a60f448
0047a1f
 
 
 
0e33469
9b9a7e2
0e33469
 
 
0047a1f
 
a60f448
0e33469
0047a1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e33469
 
 
0047a1f
 
 
 
 
 
 
 
 
 
 
 
 
0e33469
 
a60f448
0047a1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# src/api/routes/audio.py

from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
from fastapi.responses import JSONResponse

from src.core.state import AppState, get_state
from src.services.audio_transcription import (get_supported_formats,
											  transcribe_audio_bytes,
											  validate_audio_format)
from src.utils.logger import logger

router = APIRouter(prefix="/audio", tags=["Audio"])

@router.post("/transcribe")
async def transcribe_audio(
	file: UploadFile = File(...),
	language_code: str = Form(default="en"),
	state: AppState = Depends(get_state)
) -> JSONResponse:
	"""
	Transcribe audio file to text using NVIDIA Riva API.

	Args:
		file: Audio file (WAV, OPUS, FLAC, or WebM format)
		language_code: Language code for transcription (default: 'en')
		state: Application state

	Returns:
		JSON response with transcribed text
	"""
	try:
		# Validate file type by content-type or extension
		valid_ctypes = {
			"audio/wav", "audio/x-wav", "audio/webm", "audio/ogg", "audio/opus", "audio/flac"
		}
		content_type_ok = (file.content_type in valid_ctypes) if file.content_type else False
		if not content_type_ok:
			file_name = (file.filename or "").lower()
			if not any(file_name.endswith(ext) for ext in get_supported_formats()):
				raise HTTPException(
					status_code=400,
					detail=f"Unsupported audio format. Supported formats: {', '.join(get_supported_formats())}"
				)

		# Read audio data
		audio_bytes = await file.read()

		if len(audio_bytes) == 0:
			raise HTTPException(status_code=400, detail="Empty audio file")

		# Validate audio format
		if not validate_audio_format(audio_bytes):
			raise HTTPException(
				status_code=400,
				detail="Invalid audio format. Please ensure the file is a valid WAV, OPUS, FLAC, or WebM file."
			)

		# Transcribe audio
		logger().info(f"Transcribing audio file: {file.filename} (language: {language_code})")
		transcribed_text = await transcribe_audio_bytes(
			audio_bytes,
			language_code,
			state.nvidia_rotator
		)

		if transcribed_text is None:
			raise HTTPException(
				status_code=500,
				detail="Transcription failed. Please try again or check your audio file."
			)

		return JSONResponse(
			status_code=200,
			content={
				"success": True,
				"transcribed_text": transcribed_text,
				"language_code": language_code,
				"file_name": file.filename
			}
		)

	except HTTPException:
		raise
	except Exception as e:
		logger().error(f"Audio transcription error: {e}")
		raise HTTPException(
			status_code=500,
			detail="Internal server error during transcription"
		)

@router.get("/supported-formats")
async def get_audio_formats() -> JSONResponse:
	"""
	Get list of supported audio formats for transcription.

	Returns:
		JSON response with supported formats
	"""
	return JSONResponse(
		status_code=200,
		content={
			"supported_formats": get_supported_formats(),
			"description": "Supported audio formats for transcription"
		}
	)

@router.get("/health")
async def audio_health_check(state: AppState = Depends(get_state)) -> JSONResponse:
	"""
	Check if audio transcription service is available.

	Returns:
		JSON response with service status
	"""
	try:
		# Check if NVIDIA API keys are available
		nvidia_keys_available = len([k for k in state.nvidia_rotator.keys if k]) > 0

		return JSONResponse(
			status_code=200,
			content={
				"service": "audio_transcription",
				"status": "available" if nvidia_keys_available else "unavailable",
				"nvidia_keys_available": nvidia_keys_available,
				"supported_formats": get_supported_formats()
			}
		)
	except Exception as e:
		logger().error(f"Audio health check error: {e}")
		return JSONResponse(
			status_code=500,
			content={
				"service": "audio_transcription",
				"status": "error",
				"error": str(e)
			}
		)