Spaces:

Princeaka
/

justiceai

Running

App Files Files Community

Princeaka commited on 3 days ago

Commit

4305795

verified ·

1 Parent(s): 9cd3579

Update voicecloner.py

Browse files

Files changed (1) hide show

voicecloner.py +50 -25

voicecloner.py CHANGED Viewed

@@ -42,34 +42,41 @@ def compute_file_sha256(path: str) -> str:
     return h.hexdigest()
 def get_tts_model():
-    """Get or load TTS model (thread-safe)"""
     global _tts_model
     if not TTS_AVAILABLE:
-        raise RuntimeError("TTS.api not available")
     with _tts_lock:
         if _tts_model is None:
-            logger.info(f"[TTS] Loading model {TTS_MODEL_NAME} on device {TTS_DEVICE}")
-            _tts_model = TTS(TTS_MODEL_NAME)
-            if TTS_DEVICE and torch:
-                if TTS_DEVICE.startswith("cuda") and torch.cuda.is_available():
-                    try:
-                        _tts_model.to(TTS_DEVICE)
-                        torch.backends.cudnn.benchmark = True
-                        if TTS_USE_HALF and hasattr(_tts_model, "model"):
-                            _tts_model.model.half()
-                    except Exception as e:
-                        logger.warning(f"[TTS] GPU optimization failed: {e}")
-            logger.info("[TTS] Model loaded successfully")
-            _tts_loaded_event.set()
     return _tts_model
 def synthesize_speech(text: str, speaker_wav: Optional[str] = None, language: Optional[str] = None, output_path: Optional[str] = None) -> str:
     """
-    Synthesize speech from text
     Args:
         text: Text to synthesize
@@ -81,24 +88,36 @@ def synthesize_speech(text: str, speaker_wav: Optional[str] = None, language: Op
         Path to generated audio file
     """
     if not text or not text.strip():
-        raise ValueError("Text is required")
-    tts = get_tts_model()
     if output_path is None:
         fd, output_path = tempfile.mkstemp(suffix=".wav", prefix="tts_")
         os.close(fd)
     kwargs = {}
-    if speaker_wav:
         kwargs["speaker_wav"] = speaker_wav
     if language:
         kwargs["language"] = language
     try:
         if torch and torch.cuda.is_available() and TTS_USE_HALF:
-            with torch.inference_mode():
-                with torch.cuda.amp.autocast():
                     tts.tts_to_file(text=text, file_path=output_path, **kwargs)
         else:
             if torch:
@@ -106,10 +125,16 @@ def synthesize_speech(text: str, speaker_wav: Optional[str] = None, language: Op
                     tts.tts_to_file(text=text, file_path=output_path, **kwargs)
             else:
                 tts.tts_to_file(text=text, file_path=output_path, **kwargs)
     except Exception as e:
         if os.path.exists(output_path):
-            os.remove(output_path)
-        raise RuntimeError(f"TTS synthesis failed: {e}")
     return output_path

     return h.hexdigest()
 def get_tts_model():
+    """Get or load TTS model (thread-safe) with better error handling"""
     global _tts_model
     if not TTS_AVAILABLE:
+        raise RuntimeError("TTS.api not available. Please install: pip install TTS")
     with _tts_lock:
         if _tts_model is None:
+            try:
+                logger.info(f"[TTS] Loading model {TTS_MODEL_NAME} on device {TTS_DEVICE}")
+                _tts_model = TTS(TTS_MODEL_NAME)
+                if TTS_DEVICE and torch:
+                    if TTS_DEVICE.startswith("cuda") and torch.cuda.is_available():
+                        try:
+                            _tts_model.to(TTS_DEVICE)
+                            torch.backends.cudnn.benchmark = True
+                            if TTS_USE_HALF and hasattr(_tts_model, "model"):
+                                _tts_model.model.half()
+                            logger.info("[TTS] GPU optimization enabled")
+                        except Exception as e:
+                            logger.warning(f"[TTS] GPU optimization failed, using CPU: {e}")
+                            _tts_model.to("cpu")
+                logger.info("[TTS] Model loaded successfully")
+                _tts_loaded_event.set()
+            except Exception as e:
+                logger.error(f"[TTS] Failed to load model: {e}")
+                _tts_model = None
+                raise RuntimeError(f"Failed to load TTS model: {str(e)}")
     return _tts_model
 def synthesize_speech(text: str, speaker_wav: Optional[str] = None, language: Optional[str] = None, output_path: Optional[str] = None) -> str:
     """
+    Synthesize speech from text with robust error handling
     Args:
         text: Text to synthesize
         Path to generated audio file
     """
     if not text or not text.strip():
+        raise ValueError("Text is required and cannot be empty")
+    try:
+        tts = get_tts_model()
+    except Exception as e:
+        logger.error(f"Failed to get TTS model: {e}")
+        raise RuntimeError(f"TTS model unavailable: {str(e)}")
     if output_path is None:
         fd, output_path = tempfile.mkstemp(suffix=".wav", prefix="tts_")
         os.close(fd)
     kwargs = {}
+    if speaker_wav and os.path.exists(speaker_wav):
         kwargs["speaker_wav"] = speaker_wav
+        logger.info(f"Using speaker sample: {speaker_wav}")
     if language:
         kwargs["language"] = language
+        logger.info(f"Using language: {language}")
     try:
+        logger.info(f"Synthesizing speech: '{text[:50]}...'")
         if torch and torch.cuda.is_available() and TTS_USE_HALF:
+            try:
+                with torch.inference_mode():
+                    with torch.cuda.amp.autocast():
+                        tts.tts_to_file(text=text, file_path=output_path, **kwargs)
+            except Exception as e:
+                logger.warning(f"GPU synthesis failed, trying CPU: {e}")
+                with torch.inference_mode():
                     tts.tts_to_file(text=text, file_path=output_path, **kwargs)
         else:
             if torch:
                     tts.tts_to_file(text=text, file_path=output_path, **kwargs)
             else:
                 tts.tts_to_file(text=text, file_path=output_path, **kwargs)
+        logger.info(f"Speech synthesis successful: {output_path}")
     except Exception as e:
+        logger.error(f"TTS synthesis failed: {e}")
         if os.path.exists(output_path):
+            try:
+                os.remove(output_path)
+            except:
+                pass
+        raise RuntimeError(f"TTS synthesis failed: {str(e)}")
     return output_path