Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| VITO APIλ₯Ό μ¬μ©ν μμ± μΈμ(STT) λͺ¨λ | |
| """ | |
| import os | |
| import logging | |
| import requests | |
| import json | |
| import time # time import μΆκ° | |
| from dotenv import load_dotenv | |
| # νκ²½ λ³μ λ‘λ | |
| load_dotenv() | |
| # λ‘κ±° μ€μ (app.pyμ 곡μ νκ±°λ λ 립μ μΌλ‘ μ€μ κ°λ₯) | |
| # μ¬κΈ°μλ λ 립μ μΈ λ‘κ±°λ₯Ό μ¬μ©ν©λλ€. νμμ app.pyμ λ‘κ±°λ₯Ό μ¬μ©νλλ‘ μμ ν μ μμ΅λλ€. | |
| logger = logging.getLogger("VitoSTT") | |
| # κΈ°λ³Έ λ‘κΉ λ 벨 μ€μ (νΈλ€λ¬κ° μμΌλ©΄ μΆλ ₯μ΄ μλ μ μμΌλ―λ‘ κΈ°λ³Έ νΈλ€λ¬ μΆκ° κ³ λ €) | |
| if not logger.hasHandlers(): | |
| handler = logging.StreamHandler() | |
| formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| handler.setFormatter(formatter) | |
| logger.addHandler(handler) | |
| logger.setLevel(logging.INFO) # κΈ°λ³Έ λ 벨 INFOλ‘ μ€μ | |
| class VitoSTT: | |
| """VITO STT API λνΌ ν΄λμ€""" | |
| def __init__(self): | |
| """VITO STT ν΄λμ€ μ΄κΈ°ν""" | |
| self.client_id = os.getenv("VITO_CLIENT_ID") | |
| self.client_secret = os.getenv("VITO_CLIENT_SECRET") | |
| if not self.client_id or not self.client_secret: | |
| logger.warning("VITO API μΈμ¦ μ λ³΄κ° .env νμΌμ μ€μ λμ§ μμμ΅λλ€.") | |
| logger.warning("VITO_CLIENT_IDμ VITO_CLIENT_SECRETλ₯Ό νμΈνμΈμ.") | |
| # μλ¬λ₯Ό λ°μμν€κ±°λ, κΈ°λ₯ μ¬μ© μμ μ 체ν¬νλλ‘ λ μ μμ΅λλ€. | |
| # μ¬κΈ°μλ κ²½κ³ λ§ νκ³ λμ΄κ°λλ€. | |
| else: | |
| logger.info("VITO STT API ν΄λΌμ΄μΈνΈ ID/Secret λ‘λ μλ£.") | |
| # API μλν¬μΈνΈ | |
| self.token_url = "https://openapi.vito.ai/v1/authenticate" | |
| self.stt_url = "https://openapi.vito.ai/v1/transcribe" | |
| # μ‘μΈμ€ ν ν° | |
| self.access_token = None | |
| self._token_expires_at = 0 # ν ν° λ§λ£ μκ° μΆμ (μ νμ κ°μ ) | |
| def get_access_token(self): | |
| """VITO API μ‘μΈμ€ ν ν° νλ""" | |
| # νμ¬ μκ°μ κ°μ Έμ ν ν° λ§λ£ μ¬λΆ νμΈ (μ νμ κ°μ ) | |
| # now = time.time() | |
| # if self.access_token and now < self._token_expires_at: | |
| # logger.debug("κΈ°μ‘΄ VITO API ν ν° μ¬μ©") | |
| # return self.access_token | |
| if not self.client_id or not self.client_secret: | |
| logger.error("API ν€κ° μ€μ λμ§ μμ ν ν°μ νλν μ μμ΅λλ€.") | |
| raise ValueError("VITO API μΈμ¦ μ λ³΄κ° μ€μ λμ§ μμμ΅λλ€.") | |
| logger.info("VITO API μ‘μΈμ€ ν ν° μμ² μ€...") | |
| try: | |
| response = requests.post( | |
| self.token_url, | |
| data={"client_id": self.client_id, "client_secret": self.client_secret}, | |
| timeout=10 # νμμμ μ€μ | |
| ) | |
| response.raise_for_status() # HTTP μ€λ₯ λ°μ μ μμΈ λ°μ | |
| result = response.json() | |
| self.access_token = result.get("access_token") | |
| expires_in = result.get("expires_in", 3600) # λ§λ£ μκ° (μ΄), κΈ°λ³Έκ° 1μκ° | |
| self._token_expires_at = time.time() + expires_in - 60 # 60μ΄ μ¬μ | |
| if not self.access_token: | |
| logger.error("VITO API μλ΅μμ ν ν°μ μ°Ύμ μ μμ΅λλ€.") | |
| raise ValueError("VITO API ν ν°μ λ°μμ€μ§ λͺ»νμ΅λλ€.") | |
| logger.info("VITO API μ‘μΈμ€ ν ν° νλ μ±κ³΅") | |
| return self.access_token | |
| except requests.exceptions.Timeout: | |
| logger.error(f"VITO API ν ν° νλ μκ° μ΄κ³Ό: {self.token_url}") | |
| raise TimeoutError("VITO API ν ν° νλ μκ° μ΄κ³Ό") | |
| except requests.exceptions.RequestException as e: | |
| logger.error(f"VITO API ν ν° νλ μ€ν¨: {e}") | |
| if hasattr(e, 'response') and e.response is not None: | |
| logger.error(f"μλ΅ μ½λ: {e.response.status_code}, λ΄μ©: {e.response.text}") | |
| raise ConnectionError(f"VITO API ν ν° νλ μ€ν¨: {e}") | |
| def transcribe_audio(self, audio_bytes, language="ko"): | |
| """ | |
| μ€λμ€ λ°μ΄νΈ λ°μ΄ν°λ₯Ό ν μ€νΈλ‘ λ³ν | |
| Args: | |
| audio_bytes: μ€λμ€ νμΌ λ°μ΄νΈ λ°μ΄ν° | |
| language: μΈμ΄ μ½λ (κΈ°λ³Έκ°: 'ko') | |
| Returns: | |
| μΈμλ ν μ€νΈ λλ μ€λ₯ λ©μμ§λ₯Ό ν¬ν¨ν λμ λ리 | |
| {'success': True, 'text': 'μΈμλ ν μ€νΈ'} | |
| {'success': False, 'error': 'μ€λ₯ λ©μμ§', 'details': 'μμΈ λ΄μ©'} | |
| """ | |
| if not self.client_id or not self.client_secret: | |
| logger.error("API ν€κ° μ€μ λμ§ μμμ΅λλ€.") | |
| return {"success": False, "error": "API ν€κ° μ€μ λμ§ μμμ΅λλ€."} | |
| try: | |
| # ν ν° νλ λλ κ°±μ | |
| # (μ νμ κ°μ : λ§λ£ μκ° μ²΄ν¬ λ‘μ§ μΆκ° μ self._token_expires_at μ¬μ©) | |
| if not self.access_token: # or time.time() >= self._token_expires_at: | |
| logger.info("VITO API ν ν° νλ/κ°±μ μλ...") | |
| self.get_access_token() | |
| headers = { | |
| "Authorization": f"Bearer {self.access_token}" | |
| } | |
| files = { | |
| "file": ("audio_file", audio_bytes) # νμΌλͺ ννλ‘ μ λ¬ | |
| } | |
| # API μ€μ κ° (νμμ λ°λΌ μμ ) | |
| config = { | |
| "use_multi_channel": False, | |
| "use_itn": True, # Inverse Text Normalization (μ«μ, λ μ§ λ± λ³ν) | |
| "use_disfluency_filter": True, # νλ¬ (μ, μ...) μ κ±° | |
| "use_profanity_filter": False, # λΉμμ΄ νν°λ§ | |
| "language": language, | |
| # "type": "audio" # type νλΌλ―Έν°λ VITO λ¬Έμμ νμ μλ (μλ κ°μ§) | |
| } | |
| data = {"config": json.dumps(config)} | |
| logger.info(f"VITO STT API ({self.stt_url}) μμ² μ μ‘ μ€...") | |
| response = requests.post( | |
| self.stt_url, | |
| headers=headers, | |
| files=files, | |
| data=data, | |
| timeout=20 # μ λ‘λ νμμμ | |
| ) | |
| response.raise_for_status() | |
| result = response.json() | |
| job_id = result.get("id") | |
| if not job_id: | |
| logger.error("VITO API μμ IDλ₯Ό λ°μμ€μ§ λͺ»νμ΅λλ€.") | |
| return {"success": False, "error": "VITO API μμ IDλ₯Ό λ°μμ€μ§ λͺ»νμ΅λλ€."} | |
| logger.info(f"VITO STT μμ ID: {job_id}, κ²°κ³Ό νμΈ μμ...") | |
| # κ²°κ³Ό νμΈ URL | |
| transcript_url = f"{self.stt_url}/{job_id}" | |
| max_tries = 15 # μ΅λ μλ νμ μ¦κ° | |
| wait_time = 2 # λκΈ° μκ° μ¦κ° (μ΄) | |
| for try_count in range(max_tries): | |
| time.sleep(wait_time) # API λΆν κ°μ μν΄ λκΈ° | |
| logger.debug(f"κ²°κ³Ό νμΈ μλ ({try_count + 1}/{max_tries}) - URL: {transcript_url}") | |
| get_response = requests.get( | |
| transcript_url, | |
| headers=headers, | |
| timeout=10 # κ²°κ³Ό νμΈ νμμμ | |
| ) | |
| get_response.raise_for_status() | |
| result = get_response.json() | |
| status = result.get("status") | |
| logger.debug(f"νμ¬ μν: {status}") | |
| if status == "completed": | |
| # κ²°κ³Ό μΆμΆ (utterances ꡬ쑰 νμΈ νμ) | |
| utterances = result.get("results", {}).get("utterances", []) | |
| if utterances: | |
| # μ 체 ν μ€νΈλ₯Ό νλλ‘ ν©μΉ¨ | |
| transcript = " ".join([seg.get("msg", "") for seg in utterances if seg.get("msg")]).strip() | |
| logger.info(f"VITO STT μΈμ μ±κ³΅ (μΌλΆ): {transcript[:50]}...") | |
| return { | |
| "success": True, | |
| "text": transcript | |
| # "raw_result": result # νμμ μ 체 κ²°κ³Ό λ°ν | |
| } | |
| else: | |
| logger.warning("VITO STT μλ£λμμΌλ κ²°κ³Ό utterancesκ° λΉμ΄μμ΅λλ€.") | |
| return {"success": True, "text": ""} # μ±κ³΅μ΄μ§λ§ ν μ€νΈ μμ | |
| elif status == "failed": | |
| error_msg = f"VITO API λ³ν μ€ν¨: {result.get('message', 'μ μ μλ μ€λ₯')}" | |
| logger.error(error_msg) | |
| return {"success": False, "error": error_msg, "details": result} | |
| elif status == "transcribing": | |
| logger.info(f"VITO API μ²λ¦¬ μ€... ({try_count + 1}/{max_tries})") | |
| else: # registered, waiting λ± λ€λ₯Έ μν | |
| logger.info(f"VITO API μν '{status}', λκΈ° μ€... ({try_count + 1}/{max_tries})") | |
| logger.error(f"VITO API μλ΅ νμμμ ({max_tries * wait_time}μ΄ μ΄κ³Ό)") | |
| return {"success": False, "error": "VITO API μλ΅ νμμμ"} | |
| except requests.exceptions.HTTPError as e: | |
| # ν ν° λ§λ£ μ€λ₯ μ²λ¦¬ (401 Unauthorized) | |
| if e.response.status_code == 401: | |
| logger.warning("VITO API ν ν°μ΄ λ§λ£λμκ±°λ μ ν¨νμ§ μμ΅λλ€. ν ν° μ¬λ°κΈ μλ...") | |
| self.access_token = None # κΈ°μ‘΄ ν ν° λ¬΄ν¨ν | |
| try: | |
| # μ¬κ· νΈμΆ λμ , ν ν° μ¬λ°κΈ ν λ€μ μλνλ λ‘μ§ κ΅¬μ± | |
| self.get_access_token() | |
| logger.info("μ ν ν°μΌλ‘ μ¬μλν©λλ€.") | |
| # μ¬μλλ μ΄ ν¨μλ₯Ό λ€μ νΈμΆνλ λμ , νΈμΆνλ μͺ½μμ μ²λ¦¬νλ κ²μ΄ λ μμ ν μ μμ | |
| # μ¬κΈ°μλ ν λ² λ μλνλ λ‘μ§ μΆκ° (무ν 루ν λ°©μ§ νμ) | |
| # return self.transcribe_audio(audio_bytes, language) # μ¬κ· νΈμΆ λ°©μ | |
| # --- λΉμ¬κ· λ°©μ --- | |
| headers["Authorization"] = f"Bearer {self.access_token}" # ν€λ μ λ°μ΄νΈ | |
| # POST μμ²λΆν° λ€μ μμ (μ½λ μ€λ³΅ λ°μ κ°λ₯μ± μμ) | |
| # ... (POST μμ² λ° κ²°κ³Ό ν΄λ§ λ‘μ§ λ°λ³΅) ... | |
| # κ°λ¨νκ²λ κ·Έλ₯ μ€ν¨ μ²λ¦¬νκ³ μμμμ μ¬μλ μ λ | |
| return {"success": False, "error": "ν ν° λ§λ£ ν μ¬μλ νμ", "details": "ν ν° μ¬λ°κΈ μ±κ³΅"} | |
| except Exception as token_e: | |
| logger.error(f"ν ν° μ¬νλ μ€ν¨: {token_e}") | |
| return {"success": False, "error": f"ν ν° μ¬νλ μ€ν¨: {str(token_e)}"} | |
| else: | |
| # 401 μΈ λ€λ₯Έ HTTP μ€λ₯ | |
| error_body = "" | |
| try: | |
| error_body = e.response.text | |
| except Exception: | |
| pass | |
| logger.error(f"VITO API HTTP μ€λ₯: {e.response.status_code}, μλ΅: {error_body}") | |
| return { | |
| "success": False, | |
| "error": f"API HTTP μ€λ₯: {e.response.status_code}", | |
| "details": error_body | |
| } | |
| except requests.exceptions.Timeout: | |
| logger.error("VITO API μμ² μκ° μ΄κ³Ό") | |
| return {"success": False, "error": "API μμ² μκ° μ΄κ³Ό"} | |
| except requests.exceptions.RequestException as e: | |
| logger.error(f"VITO API μμ² μ€ λ€νΈμν¬ μ€λ₯ λ°μ: {str(e)}") | |
| return {"success": False, "error": "API μμ² λ€νΈμν¬ μ€λ₯", "details": str(e)} | |
| except Exception as e: | |
| logger.error(f"μμ±μΈμ μ²λ¦¬ μ€ μμμΉ λͺ»ν μ€λ₯ λ°μ: {str(e)}", exc_info=True) | |
| return { | |
| "success": False, | |
| "error": "μμ±μΈμ λ΄λΆ μ²λ¦¬ μ€ν¨", | |
| "details": str(e) | |
| } |