asr-inference / silence_detector.py
Sarah Solito
Fase_1 and Fase_2 releases, code cleaned
d6fb6a2
raw
history blame
1.6 kB
import librosa
import numpy as np
from settings import DEBUG_MODE, RESAMPLING_FREQ, ORIGINAL_FREQ, MIN_SIL_DURATION, SIL_THRESHOLD
from audio_utils import sec_to_hhmmss
def silence(audio_path):
if DEBUG_MODE:
print(f"[MODEL LOADING] Loading silence model")
y, sr = librosa.load(audio_path, sr=ORIGINAL_FREQ, mono=True) #merging stereo2mono
y = librosa.resample(y, orig_sr=ORIGINAL_FREQ, target_sr=RESAMPLING_FREQ)
y = y / np.max(np.abs(y))
frame_length = int(0.1 * RESAMPLING_FREQ)
hop_length = frame_length
rms = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
rms_db = librosa.amplitude_to_db(rms, ref=np.max)
silence_mask = rms_db < SIL_THRESHOLD
frame_duration = hop_length / RESAMPLING_FREQ
silence_segments = []
start = None
for i, silent in enumerate(silence_mask):
if silent and start is None:
start = i * frame_duration
elif not silent and start is not None:
end = i * frame_duration
if end - start >= MIN_SIL_DURATION:
silence_segments.append((start, end))
start = None
if start is not None:
end = len(silence_mask) * frame_duration
if end - start >= MIN_SIL_DURATION:
silence_segments.append((start, end))
if silence_segments:
events = [f"{sec_to_hhmmss(s)}{sec_to_hhmmss(e)}" for s, e in silence_segments]
event = "Silencios detectados en: " + ", ".join(events)
else:
event = "No se detectaron silencios prolongados"
return event