Spaces:

neuralworm
/

video_transcription

Running

App Files Files Community

neuralworm commited on 7 days ago

Commit

f862cfc

verified ·

1 Parent(s): e430acc

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -67

app.py CHANGED Viewed

@@ -1,21 +1,24 @@
 #!/usr/bin/env python3
 # coding: utf-8
-"""
-Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
 Tab 1: Transkription
 - Video per URL (yt-dlp) oder Upload
 - Audio-Extraktion via ffmpeg
 - Transkription mit Whisper (lokal)
 - Downloads: SRT, VTT, TXT, JSON
 Tab 2: Netzwerk / DNS Diagnose
 - Testet DNS-Auflösung für mehrere Hosts
 - Testet HTTP-Requests auf Basis-URLs
 - Zeigt Version/Verfügbarkeit von yt-dlp und ffmpeg
-Hinweis: Verwende diese App nur für eigene oder freigegebene Inhalte.
-"""
 import os
 import subprocess
 import tempfile
@@ -40,9 +43,9 @@ def run_capture(cmd):
     """Run a command and return stdout; raise RuntimeError with readable stderr on failure."""
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
     if result.returncode != 0:
-        stderr_text = result.stderr or "" # Keep only tail to avoid massive logs
         tail = stderr_text[-2000:]
-        # KORRIGIERTE ZEILE:
         raise RuntimeError("Command failed: " + " ".join(cmd) + " " + tail)
     return result.stdout
@@ -51,11 +54,7 @@ def run_capture(cmd):
 # ---------------------------------------------------------------------------
 def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
-    """Download a video with yt-dlp into out_dir and return the video path.
-    - Wenn DNS/Internet für bestimmte Hosts (z.B. Instagram) geblockt ist,
-      wird eine verständliche Fehlermeldung zurückgegeben.
-    """
     out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
     cmd = ["yt-dlp", "-o", out_template]
     if format_selector:
@@ -81,13 +80,11 @@ def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=N
         raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
     return str(files[0])
 def extract_audio_ffmpeg(video_path, out_wav):
     cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
     subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     return out_wav
 # ---------------------------------------------------------------------------
 # Zeit- und Format-Helfer
 # ---------------------------------------------------------------------------
@@ -99,7 +96,6 @@ def seconds_to_timestamp(s):
     ms = int(round((s - int(s)) * 1000))
     return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
 def format_timestamp_vtt(s):
     hours = int(s // 3600)
     minutes = int((s % 3600) // 60)
@@ -107,39 +103,35 @@ def format_timestamp_vtt(s):
     ms = int(round((s - int(s)) * 1000))
     return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
 def segments_to_srt(segments):
     parts = []
     for i, seg in enumerate(segments, start=1):
         start = seconds_to_timestamp(seg['start'])
         end = seconds_to_timestamp(seg['end'])
         text = seg['text'].strip()
-        parts.append(f"{i}
-{start} --> {end}
-{text}
-")
-    return "
-".join(parts)
 def segments_to_vtt(segments):
-    parts = ["WEBVTT
-"]
     for seg in segments:
         start = format_timestamp_vtt(seg['start'])
         end = format_timestamp_vtt(seg['end'])
         text = seg['text'].strip()
-        parts.append(f"{start} --> {end}
-{text}
-")
-    return "
-".join(parts)
 def segments_to_txt(segments):
-    return "
-".join([f"[{seconds_to_timestamp(seg['start'])}] {seg['text'].strip()}" for seg in segments])
 def segments_to_json(segments, language=None, metadata=None):
     data = {"language": language, "segments": segments}
@@ -147,7 +139,6 @@ def segments_to_json(segments, language=None, metadata=None):
         data["metadata"] = metadata
     return json.dumps(data, ensure_ascii=False, indent=2)
 # ---------------------------------------------------------------------------
 # Kern-Pipeline: Transkription
 # ---------------------------------------------------------------------------
@@ -160,16 +151,10 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_fil
     try:
         # Quelle bestimmen
         if url:
-            cookies_path = cookies_file if cookies_file and os.path.exists(cookies_file) else None
-            video_path = download_video_with_ytdlp(url, tmpdir, cookies_path=cookies_path, format_selector=format_selector)
         elif file_obj:
-            if isinstance(file_obj, str) and os.path.exists(file_obj):
-                video_path = file_obj
-            else:
-                uploaded_path = Path(tmpdir) / Path(getattr(file_obj, "name", "upload")).name
-                with open(uploaded_path, "wb") as f:
-                    f.write(file_obj.read())
-                video_path = str(uploaded_path)
         else:
             return "Kein Video angegeben.", None, None, None, None, None
@@ -184,9 +169,9 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_fil
         language = result.get("language", "unknown")
         # Ausgaben erzeugen
         srt_text = segments_to_srt(segments)
         vtt_text = segments_to_vtt(segments)
-        txt_text = segments_to_txt(segments)
         json_text = segments_to_json(segments, language, {"model": model_size})
         base = Path(video_path).stem
@@ -207,7 +192,6 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_fil
     except Exception as e:
         return f"Fehler: {e}", None, None, None, None, None
 # ---------------------------------------------------------------------------
 # Netzwerk / DNS Diagnose
 # ---------------------------------------------------------------------------
@@ -218,16 +202,15 @@ def dns_internet_diag():
     # DNS-Checks
     lines.append("=== DNS-Auflösung ===")
-    for host in ["huggingface.co", "www.google.com", "www.instagram.com"]:
         try:
             ip = socket.gethostbyname(host)
-            lines.append(f"{host} -> {ip}")
         except Exception as e:
             lines.append(f"{host} -> ERROR: {e}")
     # HTTP-Checks
-    lines.append("
-=== HTTP-Requests (GET) ===")
     for url in ["https://huggingface.co", "https://www.google.com", "https://www.instagram.com"]:
         try:
             with urllib.request.urlopen(url, timeout=5) as resp:
@@ -237,8 +220,7 @@ def dns_internet_diag():
             lines.append(f"{url} -> ERROR: {e}")
     # yt-dlp
-    lines.append("
-=== yt-dlp ===")
     try:
         out = run_capture(["yt-dlp", "--version"])
         lines.append(f"yt-dlp Version: {out.strip()}")
@@ -246,8 +228,7 @@ def dns_internet_diag():
         lines.append(f"yt-dlp Fehler: {e}")
     # ffmpeg
-    lines.append("
-=== ffmpeg ===")
     try:
         out = run_capture(["ffmpeg", "-version"])
         first = out.splitlines()[0] if out else "(keine Ausgabe)"
@@ -255,9 +236,7 @@ def dns_internet_diag():
     except Exception as e:
         lines.append(f"ffmpeg Fehler: {e}")
-    return "
-".join(lines)
 # ---------------------------------------------------------------------------
 # Gradio UI mit zwei Tabs
@@ -279,14 +258,19 @@ with gr.Blocks() as demo:
                 status = gr.Textbox(label="Status / Meta", interactive=False)
             with gr.Column():
                 transcript = gr.Textbox(label="Transkript", lines=20)
-                srt_dl = gr.File(label="SRT", visible=False)
-                vtt_dl = gr.File(label="VTT", visible=False)
-                txt_dl = gr.File(label="TXT", visible=False)
-                json_dl = gr.File(label="JSON", visible=False)
         def run_transcribe(f, u, m, k, c, fmt):
-            cookies_path = c if isinstance(c, str) and os.path.exists(c) else None
-            display, srtf, vttf, txtf, jsonf, meta = transcribe_pipeline(f, u, m, k, cookies_file=cookies_path, format_selector=(fmt or None))
             return (
                 display,
                 gr.update(value=srtf, visible=bool(srtf)),
@@ -306,16 +290,14 @@ with gr.Blocks() as demo:
         gr.Markdown(
             """Führt einfache Tests für DNS, HTTP sowie yt-dlp/ffmpeg aus.
-- Wenn z. B. `www.instagram.com` nicht auflösbar ist, liegt ein DNS-/Firewall-Problem vor.
-- Wenn Hugging Face / Google funktionieren, aber Instagram nicht, blockt vermutlich die Umgebung nur bestimmte Domains.
-            """
         )
         diag_btn = gr.Button("Diagnose starten")
         diag_out = gr.Textbox(label="Diagnose-Ausgabe", lines=25)
         diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

 #!/usr/bin/env python3
 # coding: utf-8
+""" Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
 Tab 1: Transkription
 - Video per URL (yt-dlp) oder Upload
 - Audio-Extraktion via ffmpeg
 - Transkription mit Whisper (lokal)
 - Downloads: SRT, VTT, TXT, JSON
 Tab 2: Netzwerk / DNS Diagnose
 - Testet DNS-Auflösung für mehrere Hosts
 - Testet HTTP-Requests auf Basis-URLs
 - Zeigt Version/Verfügbarkeit von yt-dlp und ffmpeg
+Hinweis: Verwende diese App nur für eigene oder freigegebene Inhalte. """
 import os
 import subprocess
 import tempfile
     """Run a command and return stdout; raise RuntimeError with readable stderr on failure."""
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
     if result.returncode != 0:
+        stderr_text = result.stderr or ""  # Keep only tail to avoid massive logs
         tail = stderr_text[-2000:]
+        # KORRIGIERT: Fehlermeldung korrekt zusammengebaut und abgeschlossen.
         raise RuntimeError("Command failed: " + " ".join(cmd) + " " + tail)
     return result.stdout
 # ---------------------------------------------------------------------------
 def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
+    """Download a video with yt-dlp into out_dir and return the video path."""
     out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
     cmd = ["yt-dlp", "-o", out_template]
     if format_selector:
         raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
     return str(files[0])
 def extract_audio_ffmpeg(video_path, out_wav):
     cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
     subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     return out_wav
 # ---------------------------------------------------------------------------
 # Zeit- und Format-Helfer
 # ---------------------------------------------------------------------------
     ms = int(round((s - int(s)) * 1000))
     return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
 def format_timestamp_vtt(s):
     hours = int(s // 3600)
     minutes = int((s % 3600) // 60)
     ms = int(round((s - int(s)) * 1000))
     return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
 def segments_to_srt(segments):
+    """Formats transcript segments into a valid SRT string."""
     parts = []
     for i, seg in enumerate(segments, start=1):
         start = seconds_to_timestamp(seg['start'])
         end = seconds_to_timestamp(seg['end'])
         text = seg['text'].strip()
+        # KORRIGIERT: Korrektes SRT-Blockformat mit Zeilenumbrüchen.
+        parts.append(f"{i}\n{start} --> {end}\n{text}")
+    # KORRIGIERT: Blöcke mit zwei Zeilenumbrüchen verbinden, um gültiges SRT zu erzeugen.
+    return "\n\n".join(parts) + "\n\n"
 def segments_to_vtt(segments):
+    """Formats transcript segments into a valid VTT string."""
+    # KORRIGIERT: Korrekter Header mit nachfolgendem Zeilenumbruch.
+    parts = ["WEBVTT\n"]
     for seg in segments:
         start = format_timestamp_vtt(seg['start'])
         end = format_timestamp_vtt(seg['end'])
         text = seg['text'].strip()
+        # KORRIGIERT: Korrektes VTT-Blockformat mit Zeilenumbruch.
+        parts.append(f"{start} --> {end}\n{text}")
+    # KORRIGIERT: Blöcke mit zwei Zeilenumbrüchen verbinden.
+    return "\n\n".join(parts)
 def segments_to_txt(segments):
+    """Formats segments to a readable plain text file."""
+    # VERBESSERT: Segmente mit Zeilenumbruch statt Leerzeichen für bessere Lesbarkeit trennen.
+    return "\n".join([f"[{seconds_to_timestamp(seg['start'])}] {seg['text'].strip()}" for seg in segments])
 def segments_to_json(segments, language=None, metadata=None):
     data = {"language": language, "segments": segments}
         data["metadata"] = metadata
     return json.dumps(data, ensure_ascii=False, indent=2)
 # ---------------------------------------------------------------------------
 # Kern-Pipeline: Transkription
 # ---------------------------------------------------------------------------
     try:
         # Quelle bestimmen
         if url:
+            video_path = download_video_with_ytdlp(url, tmpdir, cookies_path=cookies_file, format_selector=format_selector)
         elif file_obj:
+            # Gradio übergibt ein temporäres Dateiobjekt, dessen .name Attribut der Pfad ist.
+            video_path = file_obj.name
         else:
             return "Kein Video angegeben.", None, None, None, None, None
         language = result.get("language", "unknown")
         # Ausgaben erzeugen
+        txt_text = segments_to_txt(segments)
         srt_text = segments_to_srt(segments)
         vtt_text = segments_to_vtt(segments)
         json_text = segments_to_json(segments, language, {"model": model_size})
         base = Path(video_path).stem
     except Exception as e:
         return f"Fehler: {e}", None, None, None, None, None
 # ---------------------------------------------------------------------------
 # Netzwerk / DNS Diagnose
 # ---------------------------------------------------------------------------
     # DNS-Checks
     lines.append("=== DNS-Auflösung ===")
+    for host in ["huggingface.co", "www.google.com", "www.instagram.com", "youtube.com"]:
         try:
             ip = socket.gethostbyname(host)
+            lines.append(f"{host} -> {ip} (OK)")
         except Exception as e:
             lines.append(f"{host} -> ERROR: {e}")
     # HTTP-Checks
+    lines.append("\n\n=== HTTP-Requests (GET) ===")
     for url in ["https://huggingface.co", "https://www.google.com", "https://www.instagram.com"]:
         try:
             with urllib.request.urlopen(url, timeout=5) as resp:
             lines.append(f"{url} -> ERROR: {e}")
     # yt-dlp
+    lines.append("\n\n=== yt-dlp ===")
     try:
         out = run_capture(["yt-dlp", "--version"])
         lines.append(f"yt-dlp Version: {out.strip()}")
         lines.append(f"yt-dlp Fehler: {e}")
     # ffmpeg
+    lines.append("\n\n=== ffmpeg ===")
     try:
         out = run_capture(["ffmpeg", "-version"])
         first = out.splitlines()[0] if out else "(keine Ausgabe)"
     except Exception as e:
         lines.append(f"ffmpeg Fehler: {e}")
+    return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # Gradio UI mit zwei Tabs
                 status = gr.Textbox(label="Status / Meta", interactive=False)
             with gr.Column():
                 transcript = gr.Textbox(label="Transkript", lines=20)
+                srt_dl = gr.File(label="SRT")
+                vtt_dl = gr.File(label="VTT")
+                txt_dl = gr.File(label="TXT")
+                json_dl = gr.File(label="JSON")
         def run_transcribe(f, u, m, k, c, fmt):
+            # KORRIGIERT: Korrekte Handhabung des Gradio-Dateiobjekts für Cookies.
+            # Wir holen den Pfad über das .name Attribut.
+            cookies_path = c.name if c else None
+            display, srtf, vttf, txtf, jsonf, meta = transcribe_pipeline(
+                f, u, m, k, cookies_file=cookies_path, format_selector=(fmt or None)
+            )
             return (
                 display,
                 gr.update(value=srtf, visible=bool(srtf)),
         gr.Markdown(
             """Führt einfache Tests für DNS, HTTP sowie yt-dlp/ffmpeg aus.
+            Wenn z. B. www.instagram.com nicht auflösbar ist, liegt ein DNS-/Firewall-Problem vor.
+            Wenn Hugging Face / Google funktionieren, aber Instagram nicht, blockt vermutlich die Umgebung nur bestimmte Domains."""
         )
         diag_btn = gr.Button("Diagnose starten")
         diag_out = gr.Textbox(label="Diagnose-Ausgabe", lines=25)
         diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))