asr-inference

Running on Zero

File size: 1,599 Bytes

1619dcb

import librosa
import numpy as np
from settings import DEBUG_MODE, RESAMPLING_FREQ, ORIGINAL_FREQ, MIN_SIL_DURATION, SIL_THRESHOLD
from audio_utils import sec_to_hhmmss

def silence(audio_path):

    if DEBUG_MODE: 
        print(f"[MODEL LOADING] Loading silence model")

    y, sr = librosa.load(audio_path, sr=ORIGINAL_FREQ, mono=True) #merging stereo2mono
    y = librosa.resample(y, orig_sr=ORIGINAL_FREQ, target_sr=RESAMPLING_FREQ)
    y = y / np.max(np.abs(y))

    frame_length = int(0.1 * RESAMPLING_FREQ)
    hop_length = frame_length
    rms = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
    rms_db = librosa.amplitude_to_db(rms, ref=np.max)

    silence_mask = rms_db < SIL_THRESHOLD 
    frame_duration = hop_length / RESAMPLING_FREQ

    silence_segments = []
    start = None
    for i, silent in enumerate(silence_mask):
        if silent and start is None:
            start = i * frame_duration
        elif not silent and start is not None:
            end = i * frame_duration
            if end - start >= MIN_SIL_DURATION:
                silence_segments.append((start, end))
            start = None
    if start is not None:
        end = len(silence_mask) * frame_duration
        if end - start >= MIN_SIL_DURATION:
            silence_segments.append((start, end))

    if silence_segments:
        events = [f"{sec_to_hhmmss(s)} – {sec_to_hhmmss(e)}" for s, e in silence_segments]
        event = "Silencios detectados en: " + ", ".join(events)
    else:
        event = "No se detectaron silencios prolongados"
    
    return event