Spaces:
Sleeping
Sleeping
File size: 7,240 Bytes
a9d392f 6c588c4 a9d392f d926f18 6c588c4 8278e48 6c588c4 8278e48 6c588c4 a9d392f 6c588c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
#!/usr/bin/env python3
# coding: utf-8
"""
Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
Hinweis: Verwende diese App nur für eigene oder freigegebene Inhalte.
"""
import os
import subprocess
import tempfile
import json
from pathlib import Path
from datetime import timedelta
import gradio as gr
try:
import whisper
except Exception:
whisper = None
def run_capture(cmd):
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
err_tail = result.stderr[-1000:] if result.stderr else ""
raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{err_tail}")
return result.stdout
def download_video_with_ytdlp(url: str, out_dir: str, cookies_path=None, format_selector=None) -> str:
out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
cmd = ["yt-dlp", "-o", out_template]
if format_selector:
cmd += ["-f", format_selector]
if cookies_path:
cmd += ["--cookies", cookies_path]
cmd.append(url)
try:
run_capture(cmd)
except RuntimeError as e:
msg = str(e)
if "Failed to resolve" in msg or "Name or service not known" in msg:
raise RuntimeError("Kein DNS/Internet im Space: URL-Download nicht möglich. Bitte Videodatei hochladen oder in einer Umgebung mit Internet ausführen.")
raise
files = sorted(Path(out_dir).glob("*"), key=lambda p: p.stat().st_mtime, reverse=True)
if not files:
raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
return str(files[0])
def extract_audio_ffmpeg(video_path, out_wav):
cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return out_wav
def seconds_to_timestamp(s):
hours = int(s // 3600)
minutes = int((s % 3600) // 60)
seconds = int(s % 60)
ms = int(round((s - int(s)) * 1000))
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
def format_timestamp_vtt(s):
hours = int(s // 3600)
minutes = int((s % 3600) // 60)
seconds = int(s % 60)
ms = int(round((s - int(s)) * 1000))
return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
def segments_to_srt(segments):
parts = []
for i, seg in enumerate(segments, start=1):
start = seconds_to_timestamp(seg['start'])
end = seconds_to_timestamp(seg['end'])
text = seg['text'].strip()
parts.append(f"{i}\n{start} --> {end}\n{text}\n")
return "\n".join(parts)
def segments_to_vtt(segments):
parts = ["WEBVTT\n"]
for seg in segments:
start = format_timestamp_vtt(seg['start'])
end = format_timestamp_vtt(seg['end'])
text = seg['text'].strip()
parts.append(f"{start} --> {end}\n{text}\n")
return "\n".join(parts)
def segments_to_txt(segments):
return "\n".join([f"[{seconds_to_timestamp(seg['start'])}] {seg['text'].strip()}" for seg in segments])
def segments_to_json(segments, language=None, metadata=None):
data = {"language": language, "segments": segments}
if metadata:
data["metadata"] = metadata
return json.dumps(data, ensure_ascii=False, indent=2)
def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_file=None, format_selector=None):
if whisper is None:
return "Fehler: whisper ist nicht installiert.", None, None, None, None, None
tmpdir = tempfile.mkdtemp(prefix="whisper_space_")
try:
if url:
cookies_path = cookies_file if cookies_file and os.path.exists(cookies_file) else None
video_path = download_video_with_ytdlp(url, tmpdir, cookies_path=cookies_path, format_selector=format_selector)
elif file_obj:
if isinstance(file_obj, str) and os.path.exists(file_obj):
video_path = file_obj
else:
uploaded_path = Path(tmpdir) / Path(getattr(file_obj, "name", "upload")).name
with open(uploaded_path, "wb") as f:
f.write(file_obj.read())
video_path = str(uploaded_path)
else:
return "Kein Video angegeben.", None, None, None, None, None
audio_wav = str(Path(tmpdir) / "audio.wav")
extract_audio_ffmpeg(video_path, audio_wav)
model = whisper.load_model(model_size)
result = model.transcribe(audio_wav, verbose=False)
segments = result.get("segments", [])
language = result.get("language", "unknown")
srt_text = segments_to_srt(segments)
vtt_text = segments_to_vtt(segments)
txt_text = segments_to_txt(segments)
json_text = segments_to_json(segments, language, {"model": model_size})
base = Path(video_path).stem
files = {}
for ext, content in {"srt": srt_text, "vtt": vtt_text, "txt": txt_text, "json": json_text}.items():
p = Path(tmpdir) / f"{base}.{ext}"
p.write_text(content, encoding="utf-8")
files[ext] = str(p)
if not keep_video and url:
try:
os.remove(video_path)
except Exception:
pass
return txt_text, files["srt"], files["vtt"], files["txt"], files["json"], f"Model: {model_size}, Sprache: {language}"
except Exception as e:
return f"Fehler: {e}", None, None, None, None, None
with gr.Blocks() as demo:
gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)")
with gr.Row():
with gr.Column():
url_in = gr.Textbox(label="Video URL", placeholder="https://...")
file_in = gr.File(label="Oder Videodatei hochladen")
cookies_in = gr.File(label="Cookies.txt (optional)")
fmt_in = gr.Textbox(label="Format (optional, yt-dlp -f)")
model_sel = gr.Radio(["tiny", "base", "small", "medium", "large"], value="small", label="Whisper-Modell")
keep_chk = gr.Checkbox(label="Video behalten", value=False)
btn = gr.Button("Transkribieren")
status = gr.Textbox(label="Status")
with gr.Column():
transcript = gr.Textbox(label="Transkript", lines=20)
srt_dl = gr.File(label="SRT", visible=False)
vtt_dl = gr.File(label="VTT", visible=False)
txt_dl = gr.File(label="TXT", visible=False)
json_dl = gr.File(label="JSON", visible=False)
def run_transcribe(f, u, m, k, c, fmt):
cookies_path = c if isinstance(c, str) and os.path.exists(c) else None
display, srtf, vttf, txtf, jsonf, meta = transcribe_pipeline(f, u, m, k, cookies_file=cookies_path, format_selector=fmt)
return display, gr.update(value=srtf, visible=bool(srtf)), gr.update(value=vttf, visible=bool(vttf)), gr.update(value=txtf, visible=bool(txtf)), gr.update(value=jsonf, visible=bool(jsonf)), meta
btn.click(run_transcribe, [file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in], [transcript, srt_dl, vtt_dl, txt_dl, json_dl, status])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |