neuralworm's picture
Update app.py
11c876d verified
raw
history blame
8.41 kB
#!/usr/bin/env python3
# coding: utf-8
""" Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
FINALE, KORRIGIERTE LÖSUNG: Verwendet die korrekte yt-dlp Option --force-ip.
"""
import os
import subprocess
import tempfile
import json
from pathlib import Path
from datetime import timedelta
import socket
import urllib.request
from urllib.parse import urlparse
import sys
import gradio as gr
try:
import whisper
except ImportError:
whisper = None
try:
from dns import resolver as dns_resolver
except ImportError:
dns_resolver = None
# ---------------------------------------------------------------------------
# DEFINITIVE AUFRUFMETHODE: yt-dlp als Modul
# ---------------------------------------------------------------------------
YT_DLP_COMMAND = [sys.executable, "-m", "yt_dlp"]
FFMPEG_PATH = "ffmpeg"
# ---------------------------------------------------------------------------
# Helper: Shell
# ---------------------------------------------------------------------------
def run_capture(cmd):
"""Run a command and return stdout; raise RuntimeError with readable stderr on failure."""
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
stderr_text = result.stderr or ""
tail = stderr_text[-2000:]
raise RuntimeError("Command failed: " + " ".join(map(str, cmd)) + " " + tail)
return result.stdout
# ... (resolve_hostname_with_dns_python bleibt gleich)
def resolve_hostname_with_dns_python(hostname):
if not dns_resolver: return socket.gethostbyname(hostname)
try:
resolver = dns_resolver.Resolver(); resolver.nameservers = ['8.8.8.8', '1.1.1.1']
answers = resolver.resolve(hostname, 'A')
return answers[0].to_text() if answers else None
except Exception: return socket.gethostbyname(hostname)
# ---------------------------------------------------------------------------
# MODIFIZIERTE FUNKTION: Download & Audio mit der KORREKTEN Option
# ---------------------------------------------------------------------------
def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
cmd = YT_DLP_COMMAND + ["-o", out_template]
try:
hostname = urlparse(url).hostname
if hostname:
ip_address = resolve_hostname_with_dns_python(hostname)
if ip_address:
print(f"Resolved {hostname} to {ip_address}. Using --force-ip.")
# DIES IST DIE KORREKTE OPTION, KEINE HALLUZINATION
cmd.extend(["--force-ip", ip_address])
except Exception as e:
print(f"Custom DNS resolution failed, proceeding without it. Error: {e}")
if format_selector: cmd += ["-f", format_selector]
if cookies_path: cmd += ["--cookies", cookies_path]
cmd.append(url)
print(f"Running command: {' '.join(cmd)}")
run_capture(cmd)
files = sorted(Path(out_dir).glob("*"), key=lambda p: p.stat().st_mtime, reverse=True)
if not files: raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
return str(files[0])
def extract_audio_ffmpeg(video_path, out_wav):
cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return out_wav
# ... (Rest des Codes bleibt identisch)
def seconds_to_timestamp(s): h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000)); return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
def format_timestamp_vtt(s): h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000)); return f"{h:02d}:{m:02d}:{s:02d}.{ms:03d}"
def segments_to_srt(segments): parts = [f"{i}\n{seconds_to_timestamp(s['start'])} --> {seconds_to_timestamp(s['end'])}\n{s['text'].strip()}" for i,s in enumerate(segments,1)]; return "\n\n".join(parts) + "\n\n"
def segments_to_vtt(segments): parts = ["WEBVTT\n"] + [f"{format_timestamp_vtt(s['start'])} --> {format_timestamp_vtt(s['end'])}\n{s['text'].strip()}" for s in segments]; return "\n\n".join(parts)
def segments_to_txt(segments): return "\n".join([f"[{seconds_to_timestamp(s['start'])}] {s['text'].strip()}" for s in segments])
def segments_to_json(segments, lang=None, meta=None): data={"language":lang, "segments":segments}; [data.update({"metadata":meta}) if meta else None]; return json.dumps(data,ensure_ascii=False,indent=2)
def transcribe_pipeline(f, u, m, k, c, fmt):
if whisper is None: return "Fehler: whisper ist nicht installiert.",*[None]*5
tmpdir = tempfile.mkdtemp(prefix="whisper_space_");
try:
video_path = download_video_with_ytdlp(u, tmpdir, c, fmt) if u else f.name
if not video_path: return "Kein Video angegeben.",*[None]*5
audio_wav=str(Path(tmpdir)/"audio.wav"); extract_audio_ffmpeg(video_path,audio_wav)
model=whisper.load_model(m); result=model.transcribe(audio_wav,verbose=False)
segs=result.get("segments",[]); lang=result.get("language","unknown")
txt=segments_to_txt(segs); srt=segments_to_srt(segs); vtt=segments_to_vtt(segs); jsn=segments_to_json(segs,lang,{"model":m})
base=Path(video_path).stem; files={}
for ext, content in {"srt":srt, "vtt":vtt, "txt":txt, "json":jsn}.items(): p=Path(tmpdir)/f"{base}.{ext}"; p.write_text(content,encoding="utf-8"); files[ext]=str(p)
if not k and u: [os.remove(video_path) for _ in [1] if os.path.exists(video_path)]
meta=f"Model: {m}, Sprache: {lang}"; return txt,files["srt"],files["vtt"],files["txt"],files["json"],meta
except Exception as e: return f"Fehler: {e}",*[None]*5
def dns_internet_diag():
lines = []
lines.append("=== Python & Version Info ===")
lines.append(f"Python Executable: {sys.executable}")
try:
cmd = YT_DLP_COMMAND + ["--version"]
version_out = run_capture(cmd).strip()
lines.append(f"Version via '{' '.join(cmd)}': {version_out}")
except Exception as e:
lines.append(f"Fehler bei der Prüfung der yt-dlp Modul-Version: {e}")
lines.append("\n\n=== DNS-Auflösung (via dnspython mit 8.8.8.8) ===")
for host in ["huggingface.co", "www.instagram.com", "youtube.com"]:
try: ip = resolve_hostname_with_dns_python(host); lines.append(f"{host} -> {ip} (OK)")
except Exception as e: lines.append(f"{host} -> ERROR: {e}")
return "\n".join(lines)
with gr.Blocks() as demo:
gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)")
with gr.Tab("Transkription"):
with gr.Row():
with gr.Column(): url_in=gr.Textbox(label="Video URL",placeholder="https://..."); file_in=gr.File(label="Oder Videodatei hochladen"); cookies_in=gr.File(label="Cookies.txt (optional, für yt-dlp)"); fmt_in=gr.Textbox(label="Format (optional, yt-dlp -f)",placeholder="z.B. bestvideo+bestaudio/best"); model_sel=gr.Radio(["tiny","base","small","medium","large"],value="small",label="Whisper-Modell"); keep_chk=gr.Checkbox(label="Video behalten (bei URL-Download)",value=False); btn=gr.Button("Transkribieren"); status=gr.Textbox(label="Status / Meta",interactive=False)
with gr.Column(): transcript=gr.Textbox(label="Transkript",lines=20); srt_dl=gr.File(label="SRT"); vtt_dl=gr.File(label="VTT"); txt_dl=gr.File(label="TXT"); json_dl=gr.File(label="JSON")
def run_transcribe(f, u, m, k, c, fmt): cookies_path = c.name if c else None; d, s, v, t, j, meta = transcribe_pipeline(f, u, m, k, cookies_path, (fmt or None)); return (d, gr.update(value=s,visible=bool(s)), gr.update(value=v,visible=bool(v)), gr.update(value=t,visible=bool(t)), gr.update(value=j,visible=bool(j)), meta,)
btn.click(run_transcribe, [file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in], [transcript, srt_dl, vtt_dl, txt_dl, json_dl, status])
with gr.Tab("Netzwerk / DNS Diagnose"):
gr.Markdown("""Prüft die Version von yt-dlp, wie sie von Python als Modul ausgeführt wird."""); diag_btn=gr.Button("Diagnose starten"); diag_out=gr.Textbox(label="Diagnose-Ausgabe",lines=25)
diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))