Spaces:
Paused
Paused
feat: fix model
Browse files- language_detector.py +9 -9
language_detector.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import whisper
|
| 2 |
import numpy as np
|
| 3 |
import logging
|
| 4 |
import io
|
|
@@ -14,7 +14,7 @@ class LanguageDetector:
|
|
| 14 |
Args:
|
| 15 |
model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
|
| 16 |
"""
|
| 17 |
-
self.model =
|
| 18 |
logger.info(f"Loaded Whisper model {model_name} for language detection")
|
| 19 |
|
| 20 |
def detect_language_from_file(self, audio_file_path):
|
|
@@ -31,11 +31,11 @@ class LanguageDetector:
|
|
| 31 |
"""
|
| 32 |
try:
|
| 33 |
# Load and preprocess audio
|
| 34 |
-
audio =
|
| 35 |
-
audio =
|
| 36 |
|
| 37 |
-
# Make log-Mel spectrogram
|
| 38 |
-
mel =
|
| 39 |
|
| 40 |
# Detect language
|
| 41 |
_, probs = self.model.detect_language(mel)
|
|
@@ -69,10 +69,10 @@ class LanguageDetector:
|
|
| 69 |
audio = (audio * 32768).astype(np.int16)
|
| 70 |
|
| 71 |
# Load and preprocess audio
|
| 72 |
-
audio =
|
| 73 |
|
| 74 |
-
# Make log-Mel spectrogram
|
| 75 |
-
mel =
|
| 76 |
|
| 77 |
# Detect language
|
| 78 |
_, probs = self.model.detect_language(mel)
|
|
|
|
| 1 |
+
import whisper
|
| 2 |
import numpy as np
|
| 3 |
import logging
|
| 4 |
import io
|
|
|
|
| 14 |
Args:
|
| 15 |
model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
|
| 16 |
"""
|
| 17 |
+
self.model = whisper.load_model(model_name)
|
| 18 |
logger.info(f"Loaded Whisper model {model_name} for language detection")
|
| 19 |
|
| 20 |
def detect_language_from_file(self, audio_file_path):
|
|
|
|
| 31 |
"""
|
| 32 |
try:
|
| 33 |
# Load and preprocess audio
|
| 34 |
+
audio = whisper.load_audio(audio_file_path)
|
| 35 |
+
audio = whisper.pad_or_trim(audio)
|
| 36 |
|
| 37 |
+
# Make log-Mel spectrogram with correct dimensions
|
| 38 |
+
mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(self.model.device)
|
| 39 |
|
| 40 |
# Detect language
|
| 41 |
_, probs = self.model.detect_language(mel)
|
|
|
|
| 69 |
audio = (audio * 32768).astype(np.int16)
|
| 70 |
|
| 71 |
# Load and preprocess audio
|
| 72 |
+
audio = whisper.pad_or_trim(audio)
|
| 73 |
|
| 74 |
+
# Make log-Mel spectrogram with correct dimensions
|
| 75 |
+
mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(self.model.device)
|
| 76 |
|
| 77 |
# Detect language
|
| 78 |
_, probs = self.model.detect_language(mel)
|