Spaces:

slav7
/

ac

Sleeping

App Files Files Community

slav7 commited on May 22

Commit

ad2cddc

verified ·

1 Parent(s): 36db4cd

Upload 5 files

Browse files

Files changed (5) hide show

src/custom_interface.py +24 -0
src/d.py +69 -0
src/packages.txt +1 -0
src/requirements.txt +9 -0
src/streamlit_app.py +35 -39

src/custom_interface.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# custom_interface.py for CommonAccent English Accent Classifier
+# Downloaded from: https://huggingface.co/Jzuluaga/accent-id-commonaccent_xlsr-en-english/blob/main/custom_interface.py
+# This file is required by the SpeechBrain foreign_class interface.
+import torch
+from speechbrain.pretrained.interfaces import Pretrained
+class CustomEncoderWav2vec2Classifier(Pretrained):
+    MODULES_NEEDED = ["model", "mean_var_norm", "label_encoder"]
+    HPARAMS_NEEDED = ["sample_rate"]
+    def classify_file(self, path):
+        signal, fs = self.load_audio(path)
+        return self.classify_batch(signal, fs)
+    def classify_batch(self, signal, fs):
+        if fs != self.hparams.sample_rate:
+            signal = self.resample(signal, fs, self.hparams.sample_rate)
+        signal = self.modules.mean_var_norm(signal, torch.tensor([1]))
+        embeddings = self.modules.model.encode_batch(signal)
+        out_prob = self.modules.model.classify_batch(embeddings)
+        score, index = torch.max(out_prob, dim=1)
+        text_lab = self.hparams.label_encoder.decode_torch(index)
+        return out_prob, score, index, text_lab

src/d.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import yt_dlp
+from pydub import AudioSegment
+import os
+import librosa
+import numpy as np
+import matplotlib.pyplot as plt
+import torchaudio
+def download_and_extract_audio(video_url, output_audio_path="audio.wav"):
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'outtmpl': 'temp_audio.%(ext)s',
+        'quiet': True,
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'wav',
+            'preferredquality': '192',
+        }],
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([video_url])
+    for ext in ['wav', 'mp3', 'm4a', 'webm']:
+        fname = f"temp_audio.{ext}"
+        if os.path.exists(fname):
+            if ext != 'wav':
+                audio = AudioSegment.from_file(fname)
+                audio.export(output_audio_path, format="wav")
+                os.remove(fname)
+            else:
+                os.rename(fname, output_audio_path)
+            return output_audio_path
+    raise FileNotFoundError("Audio extraction failed.")
+def debug_audio(audio_path):
+    y, sr = librosa.load(audio_path, sr=None)
+    plt.figure(figsize=(10, 2))
+    plt.plot(np.linspace(0, len(y)/sr, num=len(y)), y)
+    plt.title('Extracted Audio Waveform')
+    plt.xlabel('Time (s)')
+    plt.ylabel('Amplitude')
+    plt.show()
+def get_accent_classifier():
+    from speechbrain.pretrained.interfaces import foreign_class
+    if not hasattr(get_accent_classifier, "model"):
+        get_accent_classifier.model = foreign_class(
+            source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
+            pymodule_file="custom_interface.py",
+            classname="CustomEncoderWav2vec2Classifier"
+        )
+    return get_accent_classifier.model
+def analyze_accent(audio_path):
+    accent_classifier = get_accent_classifier()
+    # The classifier expects a path to a wav file
+    out_prob, score, index, text_lab = accent_classifier.classify_file(audio_path)
+    accent = text_lab[0] if isinstance(text_lab, list) else text_lab
+    confidence = float(score[0]) if hasattr(score, '__getitem__') else float(score)
+    summary = f"Detected accent: {accent} with confidence {confidence:.2f}."
+    return accent, confidence, summary
+if __name__ == "__main__":
+    video_url = input("Enter public video URL: ")
+    audio_path = download_and_extract_audio(video_url)
+    # debug_audio(audio_path)  # Uncomment to listen and plot
+    accent, confidence, summary = analyze_accent(audio_path)
+    print(f"Accent: {accent}")
+    print(f"English Accent Confidence: {confidence}%")
+    print(f"Summary: {summary}")

src/packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

src/requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+speechbrain==0.5.14
+torchaudio
+transformers
+yt-dlp
+pydub
+librosa
+matplotlib
+numpy

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,36 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import os
+from d import download_and_extract_audio, analyze_accent, get_accent_classifier
+st.title("English Accent Classifier")
+st.write("""
+Upload a public video URL (e.g., YouTube, Loom, or direct MP4 link). The tool will extract the audio, analyze the speaker’s accent, and provide a confidence score.
+""")
+# Show spinner and load model at startup
+if "model_loaded" not in st.session_state:
+    with st.spinner("Loading models (this may take a while the first time)..."):
+        get_accent_classifier()
+    st.session_state["model_loaded"] = True
+    st.success("Model loaded!")
+video_url = st.text_input("Enter public video URL:")
+if st.button("Analyze Accent") and video_url:
+    with st.spinner("Downloading and extracting audio..."):
+        try:
+            audio_path = download_and_extract_audio(video_url)
+        except Exception as e:
+            st.error(f"Audio extraction failed: {e}")
+            st.stop()
+    st.success("Audio extracted successfully!")
+    st.audio(audio_path)
+    with st.spinner("Analyzing accent (downloading model if needed)..."):
+        try:
+            accent, confidence, summary = analyze_accent(audio_path)
+        except Exception as e:
+            st.error(f"Accent analysis failed: {e}")
+            st.stop()
+    st.markdown(f"**Accent:** {accent}")
+    st.markdown(f"**English Accent Confidence:** {confidence:.2f}%")
+    st.markdown(f"**Summary:** {summary}")