Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs.py (#40)
Browse files- Update whisper_cs.py (ee5993709babb4400395b6bccd3d2cf4d3152d9e)
- whisper_cs.py +19 -2
whisper_cs.py
CHANGED
|
@@ -11,6 +11,7 @@ from faster_whisper import WhisperModel
|
|
| 11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 12 |
torch_dtype = torch.float32
|
| 13 |
|
|
|
|
| 14 |
MODEL_PATH_V2 = "langtech-veu/whisper-timestamped-cs"
|
| 15 |
MODEL_PATH_V2_FAST = "langtech-veu/faster-whisper-timestamped-cs"
|
| 16 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -101,10 +102,18 @@ def post_merge_consecutive_segments_from_text(transcription_text: str) -> str:
|
|
| 101 |
return merged_transcription.strip()
|
| 102 |
|
| 103 |
def cleanup_temp_files(*file_paths):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
for path in file_paths:
|
| 105 |
if path and os.path.exists(path):
|
|
|
|
| 106 |
os.remove(path)
|
| 107 |
|
|
|
|
|
|
|
| 108 |
'''
|
| 109 |
try:
|
| 110 |
faster_model = WhisperModel(
|
|
@@ -173,6 +182,9 @@ def transcribe_audio(model, audio_path: str) -> Dict:
|
|
| 173 |
|
| 174 |
def generate(audio_path, use_v2_fast):
|
| 175 |
|
|
|
|
|
|
|
|
|
|
| 176 |
if use_v2_fast:
|
| 177 |
split_stereo_channels(audio_path)
|
| 178 |
left_channel_path = "temp_mono_speaker2.wav"
|
|
@@ -206,12 +218,13 @@ def generate(audio_path, use_v2_fast):
|
|
| 206 |
|
| 207 |
clean_output = ""
|
| 208 |
for start, end, speaker, text in merged_transcript:
|
| 209 |
-
clean_output += f"[{speaker}]: {text}\n"
|
| 210 |
-
print('clean_output',clean_output)
|
| 211 |
|
| 212 |
# FIX Seems that post_merge_consecutive_segments_from_text returns an empty string
|
| 213 |
#clean_output = post_merge_consecutive_segments_from_text(clean_output)
|
| 214 |
#print('clean_output',clean_output)
|
|
|
|
|
|
|
| 215 |
|
| 216 |
else:
|
| 217 |
model = load_whisper_model(MODEL_PATH_V2)
|
|
@@ -248,9 +261,13 @@ def generate(audio_path, use_v2_fast):
|
|
| 248 |
|
| 249 |
clean_output = output.strip()
|
| 250 |
|
|
|
|
|
|
|
| 251 |
cleanup_temp_files(
|
| 252 |
"temp_mono_speaker1.wav",
|
| 253 |
"temp_mono_speaker2.wav"
|
| 254 |
)
|
| 255 |
|
|
|
|
|
|
|
| 256 |
return clean_output
|
|
|
|
| 11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 12 |
torch_dtype = torch.float32
|
| 13 |
|
| 14 |
+
DEBUG_MODE = True
|
| 15 |
MODEL_PATH_V2 = "langtech-veu/whisper-timestamped-cs"
|
| 16 |
MODEL_PATH_V2_FAST = "langtech-veu/faster-whisper-timestamped-cs"
|
| 17 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 102 |
return merged_transcription.strip()
|
| 103 |
|
| 104 |
def cleanup_temp_files(*file_paths):
|
| 105 |
+
|
| 106 |
+
if DEBUG_MODE: print(f"Entered cleanup_temp_files function...")
|
| 107 |
+
|
| 108 |
+
if DEBUG_MODE: print(f"file_paths: {file_paths}")
|
| 109 |
+
|
| 110 |
for path in file_paths:
|
| 111 |
if path and os.path.exists(path):
|
| 112 |
+
if DEBUG_MODE: print(f"Removing path: {path}")
|
| 113 |
os.remove(path)
|
| 114 |
|
| 115 |
+
if DEBUG_MODE: print(f"Exited cleanup_temp_files function.")
|
| 116 |
+
|
| 117 |
'''
|
| 118 |
try:
|
| 119 |
faster_model = WhisperModel(
|
|
|
|
| 182 |
|
| 183 |
def generate(audio_path, use_v2_fast):
|
| 184 |
|
| 185 |
+
if DEBUG_MODE: print(f"Entering generate function...")
|
| 186 |
+
if DEBUG_MODE: print(f"use_v2_fast: {use_v2_fast}")
|
| 187 |
+
|
| 188 |
if use_v2_fast:
|
| 189 |
split_stereo_channels(audio_path)
|
| 190 |
left_channel_path = "temp_mono_speaker2.wav"
|
|
|
|
| 218 |
|
| 219 |
clean_output = ""
|
| 220 |
for start, end, speaker, text in merged_transcript:
|
| 221 |
+
clean_output += f"[{speaker}]: {text}\n"
|
|
|
|
| 222 |
|
| 223 |
# FIX Seems that post_merge_consecutive_segments_from_text returns an empty string
|
| 224 |
#clean_output = post_merge_consecutive_segments_from_text(clean_output)
|
| 225 |
#print('clean_output',clean_output)
|
| 226 |
+
|
| 227 |
+
if DEBUG_MODE: print(f"clean_output: {clean_output}")
|
| 228 |
|
| 229 |
else:
|
| 230 |
model = load_whisper_model(MODEL_PATH_V2)
|
|
|
|
| 261 |
|
| 262 |
clean_output = output.strip()
|
| 263 |
|
| 264 |
+
if DEBUG_MODE: print(f"Clean output generated.")
|
| 265 |
+
|
| 266 |
cleanup_temp_files(
|
| 267 |
"temp_mono_speaker1.wav",
|
| 268 |
"temp_mono_speaker2.wav"
|
| 269 |
)
|
| 270 |
|
| 271 |
+
if DEBUG_MODE: print(f"Exiting generate function...")
|
| 272 |
+
|
| 273 |
return clean_output
|