Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs.py
Browse files- whisper_cs.py +25 -8
whisper_cs.py
CHANGED
|
@@ -130,7 +130,7 @@ except RuntimeError as e:
|
|
| 130 |
)
|
| 131 |
'''
|
| 132 |
|
| 133 |
-
faster_model = WhisperModel(MODEL_PATH_V2_FAST, device=DEVICE, compute_type="int8")
|
| 134 |
|
| 135 |
def load_whisper_model(model_path: str):
|
| 136 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -182,18 +182,35 @@ def transcribe_audio(model, audio_path: str) -> Dict:
|
|
| 182 |
|
| 183 |
|
| 184 |
def generate(audio_path, use_v2_fast):
|
| 185 |
-
global faster_model
|
| 186 |
if DEBUG_MODE: print(f"Entering generate function...")
|
| 187 |
if DEBUG_MODE: print(f"use_v2_fast: {use_v2_fast}")
|
| 188 |
|
| 189 |
-
|
| 190 |
-
try:
|
| 191 |
-
faster_model.to("cuda")
|
| 192 |
-
print("[INFO] Moved faster_model to CUDA")
|
| 193 |
-
except Exception as e:
|
| 194 |
-
print(f"[WARNING] Could not move model to CUDA: {e}")
|
| 195 |
|
| 196 |
if use_v2_fast:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
split_stereo_channels(audio_path)
|
| 198 |
left_channel_path = "temp_mono_speaker2.wav"
|
| 199 |
right_channel_path = "temp_mono_speaker1.wav"
|
|
|
|
| 130 |
)
|
| 131 |
'''
|
| 132 |
|
| 133 |
+
#faster_model = WhisperModel(MODEL_PATH_V2_FAST, device=DEVICE, compute_type="int8")
|
| 134 |
|
| 135 |
def load_whisper_model(model_path: str):
|
| 136 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 182 |
|
| 183 |
|
| 184 |
def generate(audio_path, use_v2_fast):
|
|
|
|
| 185 |
if DEBUG_MODE: print(f"Entering generate function...")
|
| 186 |
if DEBUG_MODE: print(f"use_v2_fast: {use_v2_fast}")
|
| 187 |
|
| 188 |
+
faster_model = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
if use_v2_fast:
|
| 191 |
+
if torch.cuda.is_available():
|
| 192 |
+
try:
|
| 193 |
+
if DEBUG_MODE: print("[INFO] GPU detected. Loading model on GPU with float16...")
|
| 194 |
+
faster_model = WhisperModel(
|
| 195 |
+
MODEL_PATH_V2_FAST,
|
| 196 |
+
device="cuda",
|
| 197 |
+
compute_type="float16"
|
| 198 |
+
)
|
| 199 |
+
except RuntimeError as e:
|
| 200 |
+
print(f"[WARNING] Failed to load model on GPU: {e}")
|
| 201 |
+
if DEBUG_MODE: print("[INFO] Falling back to CPU with int8...")
|
| 202 |
+
faster_model = WhisperModel(
|
| 203 |
+
MODEL_PATH_V2_FAST,
|
| 204 |
+
device="cpu",
|
| 205 |
+
compute_type="int8"
|
| 206 |
+
)
|
| 207 |
+
else:
|
| 208 |
+
if DEBUG_MODE: print("[INFO] No GPU detected. Loading model on CPU with int8...")
|
| 209 |
+
faster_model = WhisperModel(
|
| 210 |
+
MODEL_PATH_V2_FAST,
|
| 211 |
+
device="cpu",
|
| 212 |
+
compute_type="int8"
|
| 213 |
+
)
|
| 214 |
split_stereo_channels(audio_path)
|
| 215 |
left_channel_path = "temp_mono_speaker2.wav"
|
| 216 |
right_channel_path = "temp_mono_speaker1.wav"
|