asr-inference

Running on Zero

asr-inference / silence_detector.py

Sarah Solito

Fase_1 and Fase_2 releases, code cleaned

d6fb6a2 7 days ago

1.6 kB

	import librosa
	import numpy as np
	from settings import DEBUG_MODE, RESAMPLING_FREQ, ORIGINAL_FREQ, MIN_SIL_DURATION, SIL_THRESHOLD
	from audio_utils import sec_to_hhmmss

	def silence(audio_path):

	if DEBUG_MODE:
	print(f"[MODEL LOADING] Loading silence model")

	y, sr = librosa.load(audio_path, sr=ORIGINAL_FREQ, mono=True) #merging stereo2mono
	y = librosa.resample(y, orig_sr=ORIGINAL_FREQ, target_sr=RESAMPLING_FREQ)
	y = y / np.max(np.abs(y))

	frame_length = int(0.1 * RESAMPLING_FREQ)
	hop_length = frame_length
	rms = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
	rms_db = librosa.amplitude_to_db(rms, ref=np.max)

	silence_mask = rms_db < SIL_THRESHOLD
	frame_duration = hop_length / RESAMPLING_FREQ

	silence_segments = []
	start = None
	for i, silent in enumerate(silence_mask):
	if silent and start is None:
	start = i * frame_duration
	elif not silent and start is not None:
	end = i * frame_duration
	if end - start >= MIN_SIL_DURATION:
	silence_segments.append((start, end))
	start = None
	if start is not None:
	end = len(silence_mask) * frame_duration
	if end - start >= MIN_SIL_DURATION:
	silence_segments.append((start, end))

	if silence_segments:
	events = [f"{sec_to_hhmmss(s)} – {sec_to_hhmmss(e)}" for s, e in silence_segments]
	event = "Silencios detectados en: " + ", ".join(events)
	else:
	event = "No se detectaron silencios prolongados"

	return event