Spaces:

leeksang
/

Accent_Classifier

Sleeping

App Files Files Community

leeksang commited on Jun 1

Commit

9c06d0f

verified ·

1 Parent(s): 28543c7

Upload app.py

Browse files

Files changed (1) hide show

app.py +104 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import subprocess
+import sys
+import requests
+import gradio as gr
+from urllib.parse import urlparse
+from moviepy.editor import VideoFileClip
+from transformers import pipeline
+import yt_dlp as youtube_dl
+# Ensure required packages are installed
+def install_package(package):
+    try:
+        __import__(package)
+    except ImportError:
+        print(f"Installing {package}...")
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+        __import__(package)
+install_package("torch")
+install_package("tensorflow")
+install_package("transformers")
+install_package("gradio")
+install_package("yt-dlp")
+install_package("moviepy")
+# Check if URL is a direct video file
+def is_direct_video_link(url: str) -> bool:
+    parsed = urlparse(url)
+    path = parsed.path.lower()
+    return any(path.endswith(ext) for ext in (".mp4", ".mov", ".avi", ".mkv", ".wmv", ".flv"))
+# Download functions
+def download_video_direct(video_url: str, filename: str = "video"):
+    ext = os.path.splitext(urlparse(video_url).path)[1]
+    if ext == "":
+        raise ValueError("Cannot determine file extension.")
+    filename_with_ext = filename + ext
+    print(f"Downloading: {video_url}")
+    resp = requests.get(video_url, stream=True)
+    resp.raise_for_status()
+    with open(filename_with_ext, "wb") as f:
+        for chunk in resp.iter_content(chunk_size=8192):
+            f.write(chunk)
+    return filename_with_ext
+def download_video_via_yt_dlp(video_url: str, filename: str = "video.mp4"):
+    ydl_opts = {
+        "format": "best[ext=mp4]/best",
+        "outtmpl": filename,
+        "noplaylist": True,
+        "quiet": True
+    }
+    print(f"Downloading (yt-dlp): {video_url}")
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([video_url])
+    return filename
+# Extract audio via MoviePy (Supports ALL video formats)
+def extract_audio(video_filename: str, audio_filename: str = "audio.wav") -> str:
+    try:
+        video = VideoFileClip(video_filename)
+        audio = video.audio
+        audio.write_audiofile(audio_filename)
+        return audio_filename
+    except Exception as e:
+        raise Exception(f"Audio extraction failed: {e}")
+# Hugging Face accent classifier
+def classify_accent(audio_file: str, model_name: str = "superb/wav2vec2-base-superb-sid") -> str:
+    classifier = pipeline("audio-classification", model=model_name)
+    results = classifier(audio_file)
+    if results:
+        top = results[0]
+        return f"Accent: {top['label']} (Confidence: {top['score'] * 100:.2f}%)"
+    return "No classification result."
+# Full pipeline for Gradio
+def accent_classifier(video_url: str) -> str:
+    tmp_video, tmp_audio = None, None
+    try:
+        tmp_video = download_video_direct(video_url) if is_direct_video_link(video_url) else download_video_via_yt_dlp(video_url)
+        tmp_audio = extract_audio(tmp_video)
+        result = classify_accent(tmp_audio)
+    except Exception as e:
+        result = f"Error: {e}"
+    finally:
+        if tmp_video and os.path.exists(tmp_video):
+            os.remove(tmp_video)
+        if tmp_audio and os.path.exists(tmp_audio):
+            os.remove(tmp_audio)
+    return result
+# Gradio UI setup
+iface = gr.Interface(
+    fn=accent_classifier,
+    inputs=gr.Textbox(label="Video URL", placeholder="Enter a direct video or streaming link"),
+    outputs="text",
+    title="🎤 Accent Classifier",
+    description="Paste any video URL (MP4/MOV/AVI/MKV/WMV/FLV, YouTube, Vimeo, etc.). This will download, extract audio, and classify the speaker's accent."
+)
+if __name__ == "__main__":
+    iface.launch()