Spaces:
Sleeping
Sleeping
File size: 3,804 Bytes
a60f448 0e33469 0047a1f 0e33469 a60f448 0047a1f 0e33469 9b9a7e2 0e33469 0047a1f a60f448 0e33469 0047a1f 0e33469 0047a1f 0e33469 a60f448 0047a1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# src/api/routes/audio.py
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from src.core.state import AppState, get_state
from src.services.audio_transcription import (get_supported_formats,
transcribe_audio_bytes,
validate_audio_format)
from src.utils.logger import logger
router = APIRouter(prefix="/audio", tags=["Audio"])
@router.post("/transcribe")
async def transcribe_audio(
file: UploadFile = File(...),
language_code: str = Form(default="en"),
state: AppState = Depends(get_state)
) -> JSONResponse:
"""
Transcribe audio file to text using NVIDIA Riva API.
Args:
file: Audio file (WAV, OPUS, FLAC, or WebM format)
language_code: Language code for transcription (default: 'en')
state: Application state
Returns:
JSON response with transcribed text
"""
try:
# Validate file type by content-type or extension
valid_ctypes = {
"audio/wav", "audio/x-wav", "audio/webm", "audio/ogg", "audio/opus", "audio/flac"
}
content_type_ok = (file.content_type in valid_ctypes) if file.content_type else False
if not content_type_ok:
file_name = (file.filename or "").lower()
if not any(file_name.endswith(ext) for ext in get_supported_formats()):
raise HTTPException(
status_code=400,
detail=f"Unsupported audio format. Supported formats: {', '.join(get_supported_formats())}"
)
# Read audio data
audio_bytes = await file.read()
if len(audio_bytes) == 0:
raise HTTPException(status_code=400, detail="Empty audio file")
# Validate audio format
if not validate_audio_format(audio_bytes):
raise HTTPException(
status_code=400,
detail="Invalid audio format. Please ensure the file is a valid WAV, OPUS, FLAC, or WebM file."
)
# Transcribe audio
logger().info(f"Transcribing audio file: {file.filename} (language: {language_code})")
transcribed_text = await transcribe_audio_bytes(
audio_bytes,
language_code,
state.nvidia_rotator
)
if transcribed_text is None:
raise HTTPException(
status_code=500,
detail="Transcription failed. Please try again or check your audio file."
)
return JSONResponse(
status_code=200,
content={
"success": True,
"transcribed_text": transcribed_text,
"language_code": language_code,
"file_name": file.filename
}
)
except HTTPException:
raise
except Exception as e:
logger().error(f"Audio transcription error: {e}")
raise HTTPException(
status_code=500,
detail="Internal server error during transcription"
)
@router.get("/supported-formats")
async def get_audio_formats() -> JSONResponse:
"""
Get list of supported audio formats for transcription.
Returns:
JSON response with supported formats
"""
return JSONResponse(
status_code=200,
content={
"supported_formats": get_supported_formats(),
"description": "Supported audio formats for transcription"
}
)
@router.get("/health")
async def audio_health_check(state: AppState = Depends(get_state)) -> JSONResponse:
"""
Check if audio transcription service is available.
Returns:
JSON response with service status
"""
try:
# Check if NVIDIA API keys are available
nvidia_keys_available = len([k for k in state.nvidia_rotator.keys if k]) > 0
return JSONResponse(
status_code=200,
content={
"service": "audio_transcription",
"status": "available" if nvidia_keys_available else "unavailable",
"nvidia_keys_available": nvidia_keys_available,
"supported_formats": get_supported_formats()
}
)
except Exception as e:
logger().error(f"Audio health check error: {e}")
return JSONResponse(
status_code=500,
content={
"service": "audio_transcription",
"status": "error",
"error": str(e)
}
)
|