File size: 804 Bytes
c20196f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import whisper
from transformers import pipeline

class Voice_Analysis:
    def __init__(self, emotion_model="prithivMLmods/Speech-Emotion-Classification", whisper_size="base"):
        # HF pipeline for speech emotion
        self.classifier = pipeline(
            "audio-classification",
            model=emotion_model,
            feature_extractor=emotion_model
        )
        # Whisper for ASR
        self.modelwa = whisper.load_model(whisper_size)

    def detect(self, path):
        """Run emotion classification on an audio file. Returns list of dicts with label/score."""
        return self.classifier(path)

    def subtitles(self, path):
        """Transcribe audio to text using Whisper."""
        result = self.modelwa.transcribe(path)
        return result.get("text", "").strip()