asr-inference

Running on Zero

App Files Files Community

ssolito commited on Aug 4

Commit

a5e2883

verified ·

1 Parent(s): d262e76

Update whisper_cs.py

Browse files

Files changed (1) hide show

whisper_cs.py +25 -8

whisper_cs.py CHANGED Viewed

@@ -130,7 +130,7 @@ except RuntimeError as e:
     )
 '''
-faster_model = WhisperModel(MODEL_PATH_V2_FAST, device=DEVICE, compute_type="int8")
 def load_whisper_model(model_path: str):
     device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -182,18 +182,35 @@ def transcribe_audio(model, audio_path: str) -> Dict:
 def generate(audio_path, use_v2_fast):
-    global faster_model
     if DEBUG_MODE: print(f"Entering generate function...")
     if DEBUG_MODE: print(f"use_v2_fast: {use_v2_fast}")
-    if use_v2_fast and torch.cuda.is_available():
-        try:
-            faster_model.to("cuda")
-            print("[INFO] Moved faster_model to CUDA")
-        except Exception as e:
-            print(f"[WARNING] Could not move model to CUDA: {e}")
     if use_v2_fast:
         split_stereo_channels(audio_path)
         left_channel_path = "temp_mono_speaker2.wav"
         right_channel_path = "temp_mono_speaker1.wav"

     )
 '''
+#faster_model = WhisperModel(MODEL_PATH_V2_FAST, device=DEVICE, compute_type="int8")
 def load_whisper_model(model_path: str):
     device = "cuda" if torch.cuda.is_available() else "cpu"
 def generate(audio_path, use_v2_fast):
     if DEBUG_MODE: print(f"Entering generate function...")
     if DEBUG_MODE: print(f"use_v2_fast: {use_v2_fast}")
+    faster_model = None
     if use_v2_fast:
+        if torch.cuda.is_available():
+            try:
+                if DEBUG_MODE: print("[INFO] GPU detected. Loading model on GPU with float16...")
+                faster_model = WhisperModel(
+                    MODEL_PATH_V2_FAST,
+                    device="cuda",
+                    compute_type="float16"
+                )
+            except RuntimeError as e:
+                print(f"[WARNING] Failed to load model on GPU: {e}")
+                if DEBUG_MODE: print("[INFO] Falling back to CPU with int8...")
+                faster_model = WhisperModel(
+                    MODEL_PATH_V2_FAST,
+                    device="cpu",
+                    compute_type="int8"
+                )
+        else:
+            if DEBUG_MODE: print("[INFO] No GPU detected. Loading model on CPU with int8...")
+            faster_model = WhisperModel(
+                MODEL_PATH_V2_FAST,
+                device="cpu",
+                compute_type="int8"
+            )
         split_stereo_channels(audio_path)
         left_channel_path = "temp_mono_speaker2.wav"
         right_channel_path = "temp_mono_speaker1.wav"