Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs.py (#27)
Browse files- Update whisper_cs.py (21d88d41c32fdb31a5e1c97551c235825afa4583)
Co-authored-by: Sarah Solito <ssolito@users.noreply.huggingface.co>
- whisper_cs.py +5 -0
whisper_cs.py
CHANGED
|
@@ -46,6 +46,7 @@ def convert_to_mono(input_path):
|
|
| 46 |
audio = AudioSegment.from_file(input_path)
|
| 47 |
base, ext = os.path.splitext(input_path)
|
| 48 |
output_path = f"{base}_merged.wav"
|
|
|
|
| 49 |
mono = audio.set_channels(1)
|
| 50 |
mono.export(output_path, format="wav")
|
| 51 |
return output_path
|
|
@@ -60,6 +61,7 @@ def format_audio(audio_path):
|
|
| 60 |
input_audio = torch.mean(input_audio, dim=0, keepdim=True)
|
| 61 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
| 62 |
input_audio = resampler(input_audio)
|
|
|
|
| 63 |
return input_audio.squeeze(), 16000
|
| 64 |
|
| 65 |
def assign_timestamps(asr_segments, audio_path):
|
|
@@ -228,10 +230,13 @@ asr_pipe = pipeline(
|
|
| 228 |
def diarization(audio_path):
|
| 229 |
diarization_result = diarization_pipeline(audio_path)
|
| 230 |
diarized_segments = list(diarization_result.itertracks(yield_label=True))
|
|
|
|
| 231 |
return diarized_segments
|
| 232 |
|
| 233 |
def asr(audio_path):
|
|
|
|
| 234 |
asr_result = asr_pipe(audio_path, return_timestamps=True)
|
|
|
|
| 235 |
asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
|
| 236 |
asr_segments = assign_timestamps(asr_segments, audio_path)
|
| 237 |
return asr_segments
|
|
|
|
| 46 |
audio = AudioSegment.from_file(input_path)
|
| 47 |
base, ext = os.path.splitext(input_path)
|
| 48 |
output_path = f"{base}_merged.wav"
|
| 49 |
+
print('output_path',output_path)
|
| 50 |
mono = audio.set_channels(1)
|
| 51 |
mono.export(output_path, format="wav")
|
| 52 |
return output_path
|
|
|
|
| 61 |
input_audio = torch.mean(input_audio, dim=0, keepdim=True)
|
| 62 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
| 63 |
input_audio = resampler(input_audio)
|
| 64 |
+
print('resampled')
|
| 65 |
return input_audio.squeeze(), 16000
|
| 66 |
|
| 67 |
def assign_timestamps(asr_segments, audio_path):
|
|
|
|
| 230 |
def diarization(audio_path):
|
| 231 |
diarization_result = diarization_pipeline(audio_path)
|
| 232 |
diarized_segments = list(diarization_result.itertracks(yield_label=True))
|
| 233 |
+
print('diarized_segments',diarized_segments)
|
| 234 |
return diarized_segments
|
| 235 |
|
| 236 |
def asr(audio_path):
|
| 237 |
+
print(f"[DEBUG] Starting ASR on audio: {audio_path}")
|
| 238 |
asr_result = asr_pipe(audio_path, return_timestamps=True)
|
| 239 |
+
print(f"[DEBUG] Raw ASR result: {asr_result}")
|
| 240 |
asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
|
| 241 |
asr_segments = assign_timestamps(asr_segments, audio_path)
|
| 242 |
return asr_segments
|