Spaces:
Sleeping
Sleeping
File size: 10,924 Bytes
11c876d 6c588c4 f862cfc f137403 eb37fc2 6c588c4 1bac39b abecc06 eb37fc2 1bac39b a9d392f d926f18 6c588c4 abecc06 6c588c4 abecc06 6c588c4 eb37fc2 f137403 eb37fc2 79ee34b 11c876d eb37fc2 1bac39b 6c588c4 1ae567a 6c588c4 abecc06 1ae567a eb37fc2 6c588c4 79ee34b f137403 abecc06 f137403 abecc06 f137403 eb37fc2 f137403 6c588c4 1bac39b f137403 1bac39b f137403 6c588c4 79ee34b abecc06 79ee34b abecc06 f137403 abecc06 79ee34b abecc06 79ee34b 6c588c4 abecc06 eb37fc2 1bac39b 6c588c4 79ee34b 6c588c4 f137403 eb37fc2 6c588c4 f137403 79ee34b f137403 eb37fc2 6c588c4 f137403 1bac39b 79ee34b eb37fc2 79ee34b f137403 79ee34b 11c876d 79ee34b f137403 79ee34b f137403 f862cfc f137403 6c588c4 f137403 1bac39b f137403 1bac39b f137403 1bac39b f137403 6c588c4 f862cfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
#!/usr/bin/env python3
# coding: utf-8
""" Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
ENDGÜLTIGE LÖSUNG: Verwendet die präzise yt-dlp Option --force-ipv4, um DNS-Probleme zu umgehen.
"""
import os
import subprocess
import tempfile
import json
from pathlib import Path
from datetime import timedelta
import socket
import urllib.request
from urllib.parse import urlparse
import sys
import gradio as gr
try:
import whisper
except ImportError:
whisper = None
try:
from dns import resolver as dns_resolver
except ImportError:
dns_resolver = None
# ---------------------------------------------------------------------------
# DEFINITIVE AUFRUFMETHODE: yt-dlp als Modul, um PATH-Konflikte zu vermeiden
# ---------------------------------------------------------------------------
YT_DLP_COMMAND = [sys.executable, "-m", "yt_dlp"]
FFMPEG_PATH = "ffmpeg"
# ---------------------------------------------------------------------------
# Helper: Shell
# ---------------------------------------------------------------------------
def run_capture(cmd):
"""Run a command and return stdout; raise RuntimeError with readable stderr on failure."""
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
stderr_text = result.stderr or ""
tail = stderr_text[-2000:]
raise RuntimeError("Command failed: " + " ".join(map(str, cmd)) + " " + tail)
return result.stdout
# ---------------------------------------------------------------------------
# DNS-Helfer
# ---------------------------------------------------------------------------
def resolve_hostname_with_dns_python(hostname):
"""Resolves a hostname to an IPv4 address using a public DNS server."""
if not dns_resolver:
# Fallback auf System-DNS, wenn dnspython nicht installiert ist
return socket.gethostbyname(hostname)
try:
resolver = dns_resolver.Resolver()
resolver.nameservers = ['8.8.8.8', '1.1.1.1'] # Google & Cloudflare DNS
# Explizit nach A-Record (IPv4) fragen, passend zu --force-ipv4
answers = resolver.resolve(hostname, 'A')
return answers[0].to_text() if answers else None
except Exception:
# Wenn der externe DNS fehlschlägt, versuchen wir es als letzte Rettung mit dem System-DNS
return socket.gethostbyname(hostname)
# ---------------------------------------------------------------------------
# Download & Audio Extraktion
# ---------------------------------------------------------------------------
def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
"""Downloads a video using yt-dlp, bypassing DNS blocks with --force-ipv4."""
out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
cmd = YT_DLP_COMMAND + ["-o", out_template]
try:
hostname = urlparse(url).hostname
if hostname:
ip_address = resolve_hostname_with_dns_python(hostname)
if ip_address:
print(f"Resolved {hostname} to IPv4 {ip_address}. Using --force-ipv4.")
# DIES IST DIE KORREKTE, PRÄZISE OPTION.
cmd.extend(["--force-ipv4", ip_address])
except Exception as e:
print(f"Custom DNS resolution failed, proceeding without it. Error: {e}")
if format_selector: cmd += ["-f", format_selector]
if cookies_path: cmd += ["--cookies", cookies_path]
cmd.append(url)
print(f"Running command: {' '.join(cmd)}")
run_capture(cmd)
files = sorted(Path(out_dir).glob("*"), key=lambda p: p.stat().st_mtime, reverse=True)
if not files: raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
return str(files[0])
def extract_audio_ffmpeg(video_path, out_wav):
"""Extracts a 16kHz mono WAV audio track from a video file."""
cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return out_wav
# ---------------------------------------------------------------------------
# Zeit- und Untertitel-Formatierer
# ---------------------------------------------------------------------------
def seconds_to_timestamp(s):
h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000))
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
def format_timestamp_vtt(s):
h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000))
return f"{h:02d}:{m:02d}:{s:02d}.{ms:03d}"
def segments_to_srt(segments):
parts = [f"{i}\n{seconds_to_timestamp(s['start'])} --> {seconds_to_timestamp(s['end'])}\n{s['text'].strip()}" for i,s in enumerate(segments,1)]
return "\n\n".join(parts) + "\n\n"
def segments_to_vtt(segments):
parts = ["WEBVTT\n"] + [f"{format_timestamp_vtt(s['start'])} --> {format_timestamp_vtt(s['end'])}\n{s['text'].strip()}" for s in segments]
return "\n\n".join(parts)
def segments_to_txt(segments):
return "\n".join([f"[{seconds_to_timestamp(s['start'])}] {s['text'].strip()}" for s in segments])
def segments_to_json(segments, lang=None, meta=None):
data={"language":lang, "segments":segments}
if meta: data.update({"metadata":meta})
return json.dumps(data,ensure_ascii=False,indent=2)
# ---------------------------------------------------------------------------
# Kern-Pipeline: Transkription
# ---------------------------------------------------------------------------
def transcribe_pipeline(file_obj, url, model_size, keep_video, cookies_file, format_selector):
if whisper is None: return "Fehler: whisper ist nicht installiert.",*[None]*5
tmpdir = tempfile.mkdtemp(prefix="whisper_space_");
try:
if url:
video_path = download_video_with_ytdlp(url, tmpdir, cookies_file, format_selector)
elif file_obj:
video_path = file_obj.name
else:
return "Kein Video angegeben.",*[None]*5
audio_wav=str(Path(tmpdir)/"audio.wav")
extract_audio_ffmpeg(video_path,audio_wav)
model=whisper.load_model(model_size)
result=model.transcribe(audio_wav,verbose=False)
segs=result.get("segments",[])
lang=result.get("language","unknown")
txt=segments_to_txt(segs)
srt=segments_to_srt(segs)
vtt=segments_to_vtt(segs)
jsn=segments_to_json(segs,lang,{"model":model_size})
base=Path(video_path).stem
files={}
for ext, content in {"srt":srt, "vtt":vtt, "txt":txt, "json":jsn}.items():
p=Path(tmpdir)/f"{base}.{ext}"
p.write_text(content,encoding="utf-8")
files[ext]=str(p)
if not keep_video and url:
try: os.remove(video_path)
except OSError: pass
meta=f"Model: {model_size}, Sprache: {lang}"
return txt, files["srt"], files["vtt"], files["txt"], files["json"], meta
except Exception as e:
# Räume im Fehlerfall das temporäre Verzeichnis auf
# shutil.rmtree(tmpdir, ignore_errors=True)
return f"Fehler: {e}",*[None]*5
# ---------------------------------------------------------------------------
# Netzwerk-Diagnose-Tab
# ---------------------------------------------------------------------------
def dns_internet_diag():
lines = []
lines.append("=== Python & Version Info ===")
lines.append(f"Python Executable: {sys.executable}")
try:
# Führe yt-dlp als Modul aus, um die ECHTE, von pip installierte Version zu prüfen
cmd = YT_DLP_COMMAND + ["--version"]
version_out = run_capture(cmd).strip()
lines.append(f"Version via '{' '.join(cmd)}': {version_out}")
except Exception as e:
lines.append(f"Fehler bei der Prüfung der yt-dlp Modul-Version: {e}")
lines.append("\n\n=== DNS-Auflösung (via dnspython mit 8.8.8.8) ===")
for host in ["huggingface.co", "www.instagram.com", "youtube.com"]:
try:
ip = resolve_hostname_with_dns_python(host)
lines.append(f"{host} -> {ip} (OK)")
except Exception as e:
lines.append(f"{host} -> ERROR: {e}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)")
with gr.Tab("Transkription"):
with gr.Row():
with gr.Column():
url_in=gr.Textbox(label="Video URL",placeholder="https://...")
file_in=gr.File(label="Oder Videodatei hochladen")
cookies_in=gr.File(label="Cookies.txt (optional, für yt-dlp)")
fmt_in=gr.Textbox(label="Format (optional, yt-dlp -f)",placeholder="z.B. bestvideo+bestaudio/best")
model_sel=gr.Radio(["tiny","base","small","medium","large"],value="small",label="Whisper-Modell")
keep_chk=gr.Checkbox(label="Video behalten (bei URL-Download)",value=False)
btn=gr.Button("Transkribieren")
status=gr.Textbox(label="Status / Meta",interactive=False)
with gr.Column():
transcript=gr.Textbox(label="Transkript",lines=20)
srt_dl=gr.File(label="SRT")
vtt_dl=gr.File(label="VTT")
txt_dl=gr.File(label="TXT")
json_dl=gr.File(label="JSON")
def run_transcribe_wrapper(f, u, m, k, c, fmt):
cookies_path = c.name if c else None
display, srt, vtt, txt, jsn, meta = transcribe_pipeline(f, u, m, k, cookies_path, (fmt or None))
return (
display,
gr.update(value=srt, visible=bool(srt)),
gr.update(value=vtt, visible=bool(vtt)),
gr.update(value=txt, visible=bool(txt)),
gr.update(value=jsn, visible=bool(jsn)),
meta,
)
btn.click(
run_transcribe_wrapper,
[file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in],
[transcript, srt_dl, vtt_dl, txt_dl, json_dl, status]
)
with gr.Tab("Netzwerk / DNS Diagnose"):
gr.Markdown("""Prüft die Version von yt-dlp, wie sie von Python als Modul ausgeführt wird, und testet die DNS-Auflösung.""")
diag_btn=gr.Button("Diagnose starten")
diag_out=gr.Textbox(label="Diagnose-Ausgabe",lines=25)
diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |