Spaces:

neuralworm
/

video_transcription

Sleeping

App Files Files Community

video_transcription / app.py

neuralworm

Update app.py

11c876d verified 19 days ago

raw

history blame

8.41 kB

	#!/usr/bin/env python3
	# coding: utf-8

	""" Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
	FINALE, KORRIGIERTE LÖSUNG: Verwendet die korrekte yt-dlp Option --force-ip.
	"""
	import os
	import subprocess
	import tempfile
	import json
	from pathlib import Path
	from datetime import timedelta
	import socket
	import urllib.request
	from urllib.parse import urlparse
	import sys

	import gradio as gr

	try:
	import whisper
	except ImportError:
	whisper = None

	try:
	from dns import resolver as dns_resolver
	except ImportError:
	dns_resolver = None

	# ---------------------------------------------------------------------------
	# DEFINITIVE AUFRUFMETHODE: yt-dlp als Modul
	# ---------------------------------------------------------------------------
	YT_DLP_COMMAND = [sys.executable, "-m", "yt_dlp"]
	FFMPEG_PATH = "ffmpeg"

	# ---------------------------------------------------------------------------
	# Helper: Shell
	# ---------------------------------------------------------------------------
	def run_capture(cmd):
	"""Run a command and return stdout; raise RuntimeError with readable stderr on failure."""
	result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
	if result.returncode != 0:
	stderr_text = result.stderr or ""
	tail = stderr_text[-2000:]
	raise RuntimeError("Command failed: " + " ".join(map(str, cmd)) + " " + tail)
	return result.stdout

	# ... (resolve_hostname_with_dns_python bleibt gleich)
	def resolve_hostname_with_dns_python(hostname):
	if not dns_resolver: return socket.gethostbyname(hostname)
	try:
	resolver = dns_resolver.Resolver(); resolver.nameservers = ['8.8.8.8', '1.1.1.1']
	answers = resolver.resolve(hostname, 'A')
	return answers[0].to_text() if answers else None
	except Exception: return socket.gethostbyname(hostname)

	# ---------------------------------------------------------------------------
	# MODIFIZIERTE FUNKTION: Download & Audio mit der KORREKTEN Option
	# ---------------------------------------------------------------------------
	def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
	out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
	cmd = YT_DLP_COMMAND + ["-o", out_template]

	try:
	hostname = urlparse(url).hostname
	if hostname:
	ip_address = resolve_hostname_with_dns_python(hostname)
	if ip_address:
	print(f"Resolved {hostname} to {ip_address}. Using --force-ip.")
	# DIES IST DIE KORREKTE OPTION, KEINE HALLUZINATION
	cmd.extend(["--force-ip", ip_address])
	except Exception as e:
	print(f"Custom DNS resolution failed, proceeding without it. Error: {e}")

	if format_selector: cmd += ["-f", format_selector]
	if cookies_path: cmd += ["--cookies", cookies_path]
	cmd.append(url)

	print(f"Running command: {' '.join(cmd)}")
	run_capture(cmd)

	files = sorted(Path(out_dir).glob("*"), key=lambda p: p.stat().st_mtime, reverse=True)
	if not files: raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
	return str(files[0])

	def extract_audio_ffmpeg(video_path, out_wav):
	cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
	subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	return out_wav

	# ... (Rest des Codes bleibt identisch)
	def seconds_to_timestamp(s): h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000)); return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
	def format_timestamp_vtt(s): h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000)); return f"{h:02d}:{m:02d}:{s:02d}.{ms:03d}"
	def segments_to_srt(segments): parts = [f"{i}\n{seconds_to_timestamp(s['start'])} --> {seconds_to_timestamp(s['end'])}\n{s['text'].strip()}" for i,s in enumerate(segments,1)]; return "\n\n".join(parts) + "\n\n"
	def segments_to_vtt(segments): parts = ["WEBVTT\n"] + [f"{format_timestamp_vtt(s['start'])} --> {format_timestamp_vtt(s['end'])}\n{s['text'].strip()}" for s in segments]; return "\n\n".join(parts)
	def segments_to_txt(segments): return "\n".join([f"[{seconds_to_timestamp(s['start'])}] {s['text'].strip()}" for s in segments])
	def segments_to_json(segments, lang=None, meta=None): data={"language":lang, "segments":segments}; [data.update({"metadata":meta}) if meta else None]; return json.dumps(data,ensure_ascii=False,indent=2)
	def transcribe_pipeline(f, u, m, k, c, fmt):
	if whisper is None: return "Fehler: whisper ist nicht installiert.",[None]5
	tmpdir = tempfile.mkdtemp(prefix="whisper_space_");
	try:
	video_path = download_video_with_ytdlp(u, tmpdir, c, fmt) if u else f.name
	if not video_path: return "Kein Video angegeben.",[None]5
	audio_wav=str(Path(tmpdir)/"audio.wav"); extract_audio_ffmpeg(video_path,audio_wav)
	model=whisper.load_model(m); result=model.transcribe(audio_wav,verbose=False)
	segs=result.get("segments",[]); lang=result.get("language","unknown")
	txt=segments_to_txt(segs); srt=segments_to_srt(segs); vtt=segments_to_vtt(segs); jsn=segments_to_json(segs,lang,{"model":m})
	base=Path(video_path).stem; files={}
	for ext, content in {"srt":srt, "vtt":vtt, "txt":txt, "json":jsn}.items(): p=Path(tmpdir)/f"{base}.{ext}"; p.write_text(content,encoding="utf-8"); files[ext]=str(p)
	if not k and u: [os.remove(video_path) for _ in [1] if os.path.exists(video_path)]
	meta=f"Model: {m}, Sprache: {lang}"; return txt,files["srt"],files["vtt"],files["txt"],files["json"],meta
	except Exception as e: return f"Fehler: {e}",[None]5
	def dns_internet_diag():
	lines = []
	lines.append("=== Python & Version Info ===")
	lines.append(f"Python Executable: {sys.executable}")
	try:
	cmd = YT_DLP_COMMAND + ["--version"]
	version_out = run_capture(cmd).strip()
	lines.append(f"Version via '{' '.join(cmd)}': {version_out}")
	except Exception as e:
	lines.append(f"Fehler bei der Prüfung der yt-dlp Modul-Version: {e}")
	lines.append("\n\n=== DNS-Auflösung (via dnspython mit 8.8.8.8) ===")
	for host in ["huggingface.co", "www.instagram.com", "youtube.com"]:
	try: ip = resolve_hostname_with_dns_python(host); lines.append(f"{host} -> {ip} (OK)")
	except Exception as e: lines.append(f"{host} -> ERROR: {e}")
	return "\n".join(lines)
	with gr.Blocks() as demo:
	gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)")
	with gr.Tab("Transkription"):
	with gr.Row():
	with gr.Column(): url_in=gr.Textbox(label="Video URL",placeholder="https://..."); file_in=gr.File(label="Oder Videodatei hochladen"); cookies_in=gr.File(label="Cookies.txt (optional, für yt-dlp)"); fmt_in=gr.Textbox(label="Format (optional, yt-dlp -f)",placeholder="z.B. bestvideo+bestaudio/best"); model_sel=gr.Radio(["tiny","base","small","medium","large"],value="small",label="Whisper-Modell"); keep_chk=gr.Checkbox(label="Video behalten (bei URL-Download)",value=False); btn=gr.Button("Transkribieren"); status=gr.Textbox(label="Status / Meta",interactive=False)
	with gr.Column(): transcript=gr.Textbox(label="Transkript",lines=20); srt_dl=gr.File(label="SRT"); vtt_dl=gr.File(label="VTT"); txt_dl=gr.File(label="TXT"); json_dl=gr.File(label="JSON")
	def run_transcribe(f, u, m, k, c, fmt): cookies_path = c.name if c else None; d, s, v, t, j, meta = transcribe_pipeline(f, u, m, k, cookies_path, (fmt or None)); return (d, gr.update(value=s,visible=bool(s)), gr.update(value=v,visible=bool(v)), gr.update(value=t,visible=bool(t)), gr.update(value=j,visible=bool(j)), meta,)
	btn.click(run_transcribe, [file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in], [transcript, srt_dl, vtt_dl, txt_dl, json_dl, status])
	with gr.Tab("Netzwerk / DNS Diagnose"):
	gr.Markdown("""Prüft die Version von yt-dlp, wie sie von Python als Modul ausgeführt wird."""); diag_btn=gr.Button("Diagnose starten"); diag_out=gr.Textbox(label="Diagnose-Ausgabe",lines=25)
	diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))