Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # coding: utf-8 | |
| """ | |
| Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON) | |
| Hinweis: Verwende diese App nur für eigene oder freigegebene Inhalte. | |
| """ | |
| import os | |
| import subprocess | |
| import tempfile | |
| import json | |
| from pathlib import Path | |
| from datetime import timedelta | |
| import gradio as gr | |
| try: | |
| import whisper | |
| except Exception: | |
| whisper = None | |
| def run_capture(cmd): | |
| result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
| if result.returncode != 0: | |
| err_tail = result.stderr[-1000:] if result.stderr else "" | |
| raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{err_tail}") | |
| return result.stdout | |
| def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None): | |
| out_template = str(Path(out_dir) / "%(title)s.%(ext)s") | |
| cmd = ["yt-dlp", "-o", out_template] | |
| if format_selector: | |
| cmd += ["-f", format_selector] | |
| if cookies_path: | |
| cmd += ["--cookies", cookies_path] | |
| cmd.append(url) | |
| run_capture(cmd) | |
| files = sorted(Path(out_dir).glob("*"), key=lambda p: p.stat().st_mtime, reverse=True) | |
| if not files: | |
| raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.") | |
| return str(files[0]) | |
| def extract_audio_ffmpeg(video_path, out_wav): | |
| cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav] | |
| subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
| return out_wav | |
| def seconds_to_timestamp(s): | |
| hours = int(s // 3600) | |
| minutes = int((s % 3600) // 60) | |
| seconds = int(s % 60) | |
| ms = int(round((s - int(s)) * 1000)) | |
| return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}" | |
| def format_timestamp_vtt(s): | |
| hours = int(s // 3600) | |
| minutes = int((s % 3600) // 60) | |
| seconds = int(s % 60) | |
| ms = int(round((s - int(s)) * 1000)) | |
| return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}" | |
| def segments_to_srt(segments): | |
| parts = [] | |
| for i, seg in enumerate(segments, start=1): | |
| start = seconds_to_timestamp(seg['start']) | |
| end = seconds_to_timestamp(seg['end']) | |
| text = seg['text'].strip() | |
| parts.append(f"{i}\n{start} --> {end}\n{text}\n") | |
| return "\n".join(parts) | |
| def segments_to_vtt(segments): | |
| parts = ["WEBVTT\n"] | |
| for seg in segments: | |
| start = format_timestamp_vtt(seg['start']) | |
| end = format_timestamp_vtt(seg['end']) | |
| text = seg['text'].strip() | |
| parts.append(f"{start} --> {end}\n{text}\n") | |
| return "\n".join(parts) | |
| def segments_to_txt(segments): | |
| return "\n".join([f"[{seconds_to_timestamp(seg['start'])}] {seg['text'].strip()}" for seg in segments]) | |
| def segments_to_json(segments, language=None, metadata=None): | |
| data = {"language": language, "segments": segments} | |
| if metadata: | |
| data["metadata"] = metadata | |
| return json.dumps(data, ensure_ascii=False, indent=2) | |
| def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_file=None, format_selector=None): | |
| if whisper is None: | |
| return "Fehler: whisper ist nicht installiert.", None, None, None, None, None | |
| tmpdir = tempfile.mkdtemp(prefix="whisper_space_") | |
| try: | |
| if url: | |
| cookies_path = cookies_file if cookies_file and os.path.exists(cookies_file) else None | |
| video_path = download_video_with_ytdlp(url, tmpdir, cookies_path=cookies_path, format_selector=format_selector) | |
| elif file_obj: | |
| if isinstance(file_obj, str) and os.path.exists(file_obj): | |
| video_path = file_obj | |
| else: | |
| uploaded_path = Path(tmpdir) / Path(getattr(file_obj, "name", "upload")).name | |
| with open(uploaded_path, "wb") as f: | |
| f.write(file_obj.read()) | |
| video_path = str(uploaded_path) | |
| else: | |
| return "Kein Video angegeben.", None, None, None, None, None | |
| audio_wav = str(Path(tmpdir) / "audio.wav") | |
| extract_audio_ffmpeg(video_path, audio_wav) | |
| model = whisper.load_model(model_size) | |
| result = model.transcribe(audio_wav, verbose=False) | |
| segments = result.get("segments", []) | |
| language = result.get("language", "unknown") | |
| srt_text = segments_to_srt(segments) | |
| vtt_text = segments_to_vtt(segments) | |
| txt_text = segments_to_txt(segments) | |
| json_text = segments_to_json(segments, language, {"model": model_size}) | |
| base = Path(video_path).stem | |
| files = {} | |
| for ext, content in {"srt": srt_text, "vtt": vtt_text, "txt": txt_text, "json": json_text}.items(): | |
| p = Path(tmpdir) / f"{base}.{ext}" | |
| p.write_text(content, encoding="utf-8") | |
| files[ext] = str(p) | |
| if not keep_video and url: | |
| try: | |
| os.remove(video_path) | |
| except Exception: | |
| pass | |
| return txt_text, files["srt"], files["vtt"], files["txt"], files["json"], f"Model: {model_size}, Sprache: {language}" | |
| except Exception as e: | |
| return f"Fehler: {e}", None, None, None, None, None | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)") | |
| with gr.Row(): | |
| with gr.Column(): | |
| url_in = gr.Textbox(label="Video URL", placeholder="https://...") | |
| file_in = gr.File(label="Oder Videodatei hochladen") | |
| cookies_in = gr.File(label="Cookies.txt (optional)") | |
| fmt_in = gr.Textbox(label="Format (optional, yt-dlp -f)") | |
| model_sel = gr.Radio(["tiny", "base", "small", "medium", "large"], value="small", label="Whisper-Modell") | |
| keep_chk = gr.Checkbox(label="Video behalten", value=False) | |
| btn = gr.Button("Transkribieren") | |
| status = gr.Textbox(label="Status") | |
| with gr.Column(): | |
| transcript = gr.Textbox(label="Transkript", lines=20) | |
| srt_dl = gr.File(label="SRT", visible=False) | |
| vtt_dl = gr.File(label="VTT", visible=False) | |
| txt_dl = gr.File(label="TXT", visible=False) | |
| json_dl = gr.File(label="JSON", visible=False) | |
| def run_transcribe(f, u, m, k, c, fmt): | |
| cookies_path = c if isinstance(c, str) and os.path.exists(c) else None | |
| display, srtf, vttf, txtf, jsonf, meta = transcribe_pipeline(f, u, m, k, cookies_file=cookies_path, format_selector=fmt) | |
| return display, gr.update(value=srtf, visible=bool(srtf)), gr.update(value=vttf, visible=bool(vttf)), gr.update(value=txtf, visible=bool(txtf)), gr.update(value=jsonf, visible=bool(jsonf)), meta | |
| btn.click(run_transcribe, [file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in], [transcript, srt_dl, vtt_dl, txt_dl, json_dl, status]) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |