neuralworm commited on
Commit
1bac39b
·
verified ·
1 Parent(s): 8278e48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -34
app.py CHANGED
@@ -3,6 +3,17 @@
3
  """
4
  Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
5
 
 
 
 
 
 
 
 
 
 
 
 
6
  Hinweis: Verwende diese App nur für eigene oder freigegebene Inhalte.
7
  """
8
  import os
@@ -11,6 +22,9 @@ import tempfile
11
  import json
12
  from pathlib import Path
13
  from datetime import timedelta
 
 
 
14
  import gradio as gr
15
 
16
  try:
@@ -18,14 +32,30 @@ try:
18
  except Exception:
19
  whisper = None
20
 
 
 
 
 
21
  def run_capture(cmd):
 
22
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
23
  if result.returncode != 0:
24
  err_tail = result.stderr[-1000:] if result.stderr else ""
25
- raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{err_tail}")
 
26
  return result.stdout
27
 
28
- def download_video_with_ytdlp(url: str, out_dir: str, cookies_path=None, format_selector=None) -> str:
 
 
 
 
 
 
 
 
 
 
29
  out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
30
  cmd = ["yt-dlp", "-o", out_template]
31
  if format_selector:
@@ -33,23 +63,35 @@ def download_video_with_ytdlp(url: str, out_dir: str, cookies_path=None, format_
33
  if cookies_path:
34
  cmd += ["--cookies", cookies_path]
35
  cmd.append(url)
 
36
  try:
37
  run_capture(cmd)
38
  except RuntimeError as e:
39
  msg = str(e)
40
  if "Failed to resolve" in msg or "Name or service not known" in msg:
41
- raise RuntimeError("Kein DNS/Internet im Space: URL-Download nicht möglich. Bitte Videodatei hochladen oder in einer Umgebung mit Internet ausführen.")
 
 
 
 
42
  raise
 
43
  files = sorted(Path(out_dir).glob("*"), key=lambda p: p.stat().st_mtime, reverse=True)
44
  if not files:
45
  raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
46
  return str(files[0])
47
 
 
48
  def extract_audio_ffmpeg(video_path, out_wav):
49
  cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
50
  subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
51
  return out_wav
52
 
 
 
 
 
 
53
  def seconds_to_timestamp(s):
54
  hours = int(s // 3600)
55
  minutes = int((s % 3600) // 60)
@@ -57,6 +99,7 @@ def seconds_to_timestamp(s):
57
  ms = int(round((s - int(s)) * 1000))
58
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
59
 
 
60
  def format_timestamp_vtt(s):
61
  hours = int(s // 3600)
62
  minutes = int((s % 3600) // 60)
@@ -64,26 +107,39 @@ def format_timestamp_vtt(s):
64
  ms = int(round((s - int(s)) * 1000))
65
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
66
 
 
67
  def segments_to_srt(segments):
68
  parts = []
69
  for i, seg in enumerate(segments, start=1):
70
  start = seconds_to_timestamp(seg['start'])
71
  end = seconds_to_timestamp(seg['end'])
72
  text = seg['text'].strip()
73
- parts.append(f"{i}\n{start} --> {end}\n{text}\n")
74
- return "\n".join(parts)
 
 
 
 
 
75
 
76
  def segments_to_vtt(segments):
77
- parts = ["WEBVTT\n"]
 
78
  for seg in segments:
79
  start = format_timestamp_vtt(seg['start'])
80
  end = format_timestamp_vtt(seg['end'])
81
  text = seg['text'].strip()
82
- parts.append(f"{start} --> {end}\n{text}\n")
83
- return "\n".join(parts)
 
 
 
 
84
 
85
  def segments_to_txt(segments):
86
- return "\n".join([f"[{seconds_to_timestamp(seg['start'])}] {seg['text'].strip()}" for seg in segments])
 
 
87
 
88
  def segments_to_json(segments, language=None, metadata=None):
89
  data = {"language": language, "segments": segments}
@@ -91,11 +147,18 @@ def segments_to_json(segments, language=None, metadata=None):
91
  data["metadata"] = metadata
92
  return json.dumps(data, ensure_ascii=False, indent=2)
93
 
 
 
 
 
 
94
  def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_file=None, format_selector=None):
95
  if whisper is None:
96
  return "Fehler: whisper ist nicht installiert.", None, None, None, None, None
 
97
  tmpdir = tempfile.mkdtemp(prefix="whisper_space_")
98
  try:
 
99
  if url:
100
  cookies_path = cookies_file if cookies_file and os.path.exists(cookies_file) else None
101
  video_path = download_video_with_ytdlp(url, tmpdir, cookies_path=cookies_path, format_selector=format_selector)
@@ -109,56 +172,150 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_fil
109
  video_path = str(uploaded_path)
110
  else:
111
  return "Kein Video angegeben.", None, None, None, None, None
 
 
112
  audio_wav = str(Path(tmpdir) / "audio.wav")
113
  extract_audio_ffmpeg(video_path, audio_wav)
 
 
114
  model = whisper.load_model(model_size)
115
  result = model.transcribe(audio_wav, verbose=False)
116
  segments = result.get("segments", [])
117
  language = result.get("language", "unknown")
 
 
118
  srt_text = segments_to_srt(segments)
119
  vtt_text = segments_to_vtt(segments)
120
  txt_text = segments_to_txt(segments)
121
  json_text = segments_to_json(segments, language, {"model": model_size})
 
122
  base = Path(video_path).stem
123
  files = {}
124
  for ext, content in {"srt": srt_text, "vtt": vtt_text, "txt": txt_text, "json": json_text}.items():
125
  p = Path(tmpdir) / f"{base}.{ext}"
126
  p.write_text(content, encoding="utf-8")
127
  files[ext] = str(p)
 
128
  if not keep_video and url:
129
  try:
130
  os.remove(video_path)
131
  except Exception:
132
  pass
133
- return txt_text, files["srt"], files["vtt"], files["txt"], files["json"], f"Model: {model_size}, Sprache: {language}"
 
 
134
  except Exception as e:
135
  return f"Fehler: {e}", None, None, None, None, None
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  with gr.Blocks() as demo:
138
  gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)")
139
- with gr.Row():
140
- with gr.Column():
141
- url_in = gr.Textbox(label="Video URL", placeholder="https://...")
142
- file_in = gr.File(label="Oder Videodatei hochladen")
143
- cookies_in = gr.File(label="Cookies.txt (optional)")
144
- fmt_in = gr.Textbox(label="Format (optional, yt-dlp -f)")
145
- model_sel = gr.Radio(["tiny", "base", "small", "medium", "large"], value="small", label="Whisper-Modell")
146
- keep_chk = gr.Checkbox(label="Video behalten", value=False)
147
- btn = gr.Button("Transkribieren")
148
- status = gr.Textbox(label="Status")
149
- with gr.Column():
150
- transcript = gr.Textbox(label="Transkript", lines=20)
151
- srt_dl = gr.File(label="SRT", visible=False)
152
- vtt_dl = gr.File(label="VTT", visible=False)
153
- txt_dl = gr.File(label="TXT", visible=False)
154
- json_dl = gr.File(label="JSON", visible=False)
155
-
156
- def run_transcribe(f, u, m, k, c, fmt):
157
- cookies_path = c if isinstance(c, str) and os.path.exists(c) else None
158
- display, srtf, vttf, txtf, jsonf, meta = transcribe_pipeline(f, u, m, k, cookies_file=cookies_path, format_selector=fmt)
159
- return display, gr.update(value=srtf, visible=bool(srtf)), gr.update(value=vttf, visible=bool(vttf)), gr.update(value=txtf, visible=bool(txtf)), gr.update(value=jsonf, visible=bool(jsonf)), meta
160
-
161
- btn.click(run_transcribe, [file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in], [transcript, srt_dl, vtt_dl, txt_dl, json_dl, status])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  if __name__ == "__main__":
164
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
3
  """
4
  Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
5
 
6
+ Tab 1: Transkription
7
+ - Video per URL (yt-dlp) oder Upload
8
+ - Audio-Extraktion via ffmpeg
9
+ - Transkription mit Whisper (lokal)
10
+ - Downloads: SRT, VTT, TXT, JSON
11
+
12
+ Tab 2: Netzwerk / DNS Diagnose
13
+ - Testet DNS-Auflösung für mehrere Hosts
14
+ - Testet HTTP-Requests auf Basis-URLs
15
+ - Zeigt Version/Verfügbarkeit von yt-dlp und ffmpeg
16
+
17
  Hinweis: Verwende diese App nur für eigene oder freigegebene Inhalte.
18
  """
19
  import os
 
22
  import json
23
  from pathlib import Path
24
  from datetime import timedelta
25
+ import socket
26
+ import urllib.request
27
+
28
  import gradio as gr
29
 
30
  try:
 
32
  except Exception:
33
  whisper = None
34
 
35
+ # ---------------------------------------------------------------------------
36
+ # Helper: Shell
37
+ # ---------------------------------------------------------------------------
38
+
39
  def run_capture(cmd):
40
+ """Run a command and return stdout, raise RuntimeError with stderr on failure."""
41
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
42
  if result.returncode != 0:
43
  err_tail = result.stderr[-1000:] if result.stderr else ""
44
+ raise RuntimeError(f"Command failed: {' '.join(cmd)}
45
+ {err_tail}")
46
  return result.stdout
47
 
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Download & Audio
51
+ # ---------------------------------------------------------------------------
52
+
53
+ def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
54
+ """Download a video with yt-dlp into out_dir and return the video path.
55
+
56
+ - Wenn DNS/Internet für bestimmte Hosts (z.B. Instagram) geblockt ist,
57
+ wird eine verständliche Fehlermeldung zurückgegeben.
58
+ """
59
  out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
60
  cmd = ["yt-dlp", "-o", out_template]
61
  if format_selector:
 
63
  if cookies_path:
64
  cmd += ["--cookies", cookies_path]
65
  cmd.append(url)
66
+
67
  try:
68
  run_capture(cmd)
69
  except RuntimeError as e:
70
  msg = str(e)
71
  if "Failed to resolve" in msg or "Name or service not known" in msg:
72
+ raise RuntimeError(
73
+ "DNS/Internet-Problem: Der Space kann den Host nicht auflösen. "
74
+ "URL-Download ist hier nicht möglich. Bitte Videodatei direkt hochladen "
75
+ "oder den Space in einer Umgebung mit Internet/DNS-Freigabe ausführen."
76
+ )
77
  raise
78
+
79
  files = sorted(Path(out_dir).glob("*"), key=lambda p: p.stat().st_mtime, reverse=True)
80
  if not files:
81
  raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
82
  return str(files[0])
83
 
84
+
85
  def extract_audio_ffmpeg(video_path, out_wav):
86
  cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
87
  subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
88
  return out_wav
89
 
90
+
91
+ # ---------------------------------------------------------------------------
92
+ # Zeit- und Format-Helfer
93
+ # ---------------------------------------------------------------------------
94
+
95
  def seconds_to_timestamp(s):
96
  hours = int(s // 3600)
97
  minutes = int((s % 3600) // 60)
 
99
  ms = int(round((s - int(s)) * 1000))
100
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
101
 
102
+
103
  def format_timestamp_vtt(s):
104
  hours = int(s // 3600)
105
  minutes = int((s % 3600) // 60)
 
107
  ms = int(round((s - int(s)) * 1000))
108
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
109
 
110
+
111
  def segments_to_srt(segments):
112
  parts = []
113
  for i, seg in enumerate(segments, start=1):
114
  start = seconds_to_timestamp(seg['start'])
115
  end = seconds_to_timestamp(seg['end'])
116
  text = seg['text'].strip()
117
+ parts.append(f"{i}
118
+ {start} --> {end}
119
+ {text}
120
+ ")
121
+ return "
122
+ ".join(parts)
123
+
124
 
125
  def segments_to_vtt(segments):
126
+ parts = ["WEBVTT
127
+ "]
128
  for seg in segments:
129
  start = format_timestamp_vtt(seg['start'])
130
  end = format_timestamp_vtt(seg['end'])
131
  text = seg['text'].strip()
132
+ parts.append(f"{start} --> {end}
133
+ {text}
134
+ ")
135
+ return "
136
+ ".join(parts)
137
+
138
 
139
  def segments_to_txt(segments):
140
+ return "
141
+ ".join([f"[{seconds_to_timestamp(seg['start'])}] {seg['text'].strip()}" for seg in segments])
142
+
143
 
144
  def segments_to_json(segments, language=None, metadata=None):
145
  data = {"language": language, "segments": segments}
 
147
  data["metadata"] = metadata
148
  return json.dumps(data, ensure_ascii=False, indent=2)
149
 
150
+
151
+ # ---------------------------------------------------------------------------
152
+ # Kern-Pipeline: Transkription
153
+ # ---------------------------------------------------------------------------
154
+
155
  def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_file=None, format_selector=None):
156
  if whisper is None:
157
  return "Fehler: whisper ist nicht installiert.", None, None, None, None, None
158
+
159
  tmpdir = tempfile.mkdtemp(prefix="whisper_space_")
160
  try:
161
+ # Quelle bestimmen
162
  if url:
163
  cookies_path = cookies_file if cookies_file and os.path.exists(cookies_file) else None
164
  video_path = download_video_with_ytdlp(url, tmpdir, cookies_path=cookies_path, format_selector=format_selector)
 
172
  video_path = str(uploaded_path)
173
  else:
174
  return "Kein Video angegeben.", None, None, None, None, None
175
+
176
+ # Audio extrahieren
177
  audio_wav = str(Path(tmpdir) / "audio.wav")
178
  extract_audio_ffmpeg(video_path, audio_wav)
179
+
180
+ # Whisper laden & transkribieren
181
  model = whisper.load_model(model_size)
182
  result = model.transcribe(audio_wav, verbose=False)
183
  segments = result.get("segments", [])
184
  language = result.get("language", "unknown")
185
+
186
+ # Ausgaben erzeugen
187
  srt_text = segments_to_srt(segments)
188
  vtt_text = segments_to_vtt(segments)
189
  txt_text = segments_to_txt(segments)
190
  json_text = segments_to_json(segments, language, {"model": model_size})
191
+
192
  base = Path(video_path).stem
193
  files = {}
194
  for ext, content in {"srt": srt_text, "vtt": vtt_text, "txt": txt_text, "json": json_text}.items():
195
  p = Path(tmpdir) / f"{base}.{ext}"
196
  p.write_text(content, encoding="utf-8")
197
  files[ext] = str(p)
198
+
199
  if not keep_video and url:
200
  try:
201
  os.remove(video_path)
202
  except Exception:
203
  pass
204
+
205
+ meta = f"Model: {model_size}, Sprache: {language}"
206
+ return txt_text, files["srt"], files["vtt"], files["txt"], files["json"], meta
207
  except Exception as e:
208
  return f"Fehler: {e}", None, None, None, None, None
209
 
210
+
211
+ # ---------------------------------------------------------------------------
212
+ # Netzwerk / DNS Diagnose
213
+ # ---------------------------------------------------------------------------
214
+
215
+ def dns_internet_diag():
216
+ """Führt einige Basis-Checks aus und gibt einen Textreport zurück."""
217
+ lines = []
218
+
219
+ # DNS-Checks
220
+ lines.append("=== DNS-Auflösung ===")
221
+ for host in ["huggingface.co", "www.google.com", "www.instagram.com"]:
222
+ try:
223
+ ip = socket.gethostbyname(host)
224
+ lines.append(f"{host} -> {ip}")
225
+ except Exception as e:
226
+ lines.append(f"{host} -> ERROR: {e}")
227
+
228
+ # HTTP-Checks
229
+ lines.append("
230
+ === HTTP-Requests (GET) ===")
231
+ for url in ["https://huggingface.co", "https://www.google.com", "https://www.instagram.com"]:
232
+ try:
233
+ with urllib.request.urlopen(url, timeout=5) as resp:
234
+ code = getattr(resp, "status", None) or resp.getcode()
235
+ lines.append(f"{url} -> OK (Status {code})")
236
+ except Exception as e:
237
+ lines.append(f"{url} -> ERROR: {e}")
238
+
239
+ # yt-dlp
240
+ lines.append("
241
+ === yt-dlp ===")
242
+ try:
243
+ out = run_capture(["yt-dlp", "--version"])
244
+ lines.append(f"yt-dlp Version: {out.strip()}")
245
+ except Exception as e:
246
+ lines.append(f"yt-dlp Fehler: {e}")
247
+
248
+ # ffmpeg
249
+ lines.append("
250
+ === ffmpeg ===")
251
+ try:
252
+ out = run_capture(["ffmpeg", "-version"])
253
+ first = out.splitlines()[0] if out else "(keine Ausgabe)"
254
+ lines.append(first)
255
+ except Exception as e:
256
+ lines.append(f"ffmpeg Fehler: {e}")
257
+
258
+ return "
259
+ ".join(lines)
260
+
261
+
262
+ # ---------------------------------------------------------------------------
263
+ # Gradio UI mit zwei Tabs
264
+ # ---------------------------------------------------------------------------
265
+
266
  with gr.Blocks() as demo:
267
  gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)")
268
+
269
+ with gr.Tab("Transkription"):
270
+ with gr.Row():
271
+ with gr.Column():
272
+ url_in = gr.Textbox(label="Video URL", placeholder="https://...")
273
+ file_in = gr.File(label="Oder Videodatei hochladen")
274
+ cookies_in = gr.File(label="Cookies.txt (optional, für yt-dlp)")
275
+ fmt_in = gr.Textbox(label="Format (optional, yt-dlp -f)", placeholder="z.B. bestvideo+bestaudio/best")
276
+ model_sel = gr.Radio(["tiny", "base", "small", "medium", "large"], value="small", label="Whisper-Modell")
277
+ keep_chk = gr.Checkbox(label="Video behalten (bei URL-Download)", value=False)
278
+ btn = gr.Button("Transkribieren")
279
+ status = gr.Textbox(label="Status / Meta", interactive=False)
280
+ with gr.Column():
281
+ transcript = gr.Textbox(label="Transkript", lines=20)
282
+ srt_dl = gr.File(label="SRT", visible=False)
283
+ vtt_dl = gr.File(label="VTT", visible=False)
284
+ txt_dl = gr.File(label="TXT", visible=False)
285
+ json_dl = gr.File(label="JSON", visible=False)
286
+
287
+ def run_transcribe(f, u, m, k, c, fmt):
288
+ cookies_path = c if isinstance(c, str) and os.path.exists(c) else None
289
+ display, srtf, vttf, txtf, jsonf, meta = transcribe_pipeline(f, u, m, k, cookies_file=cookies_path, format_selector=(fmt or None))
290
+ return (
291
+ display,
292
+ gr.update(value=srtf, visible=bool(srtf)),
293
+ gr.update(value=vttf, visible=bool(vttf)),
294
+ gr.update(value=txtf, visible=bool(txtf)),
295
+ gr.update(value=jsonf, visible=bool(jsonf)),
296
+ meta,
297
+ )
298
+
299
+ btn.click(
300
+ run_transcribe,
301
+ [file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in],
302
+ [transcript, srt_dl, vtt_dl, txt_dl, json_dl, status],
303
+ )
304
+
305
+ with gr.Tab("Netzwerk / DNS Diagnose"):
306
+ gr.Markdown(
307
+ """Führt einfache Tests für DNS, HTTP sowie yt-dlp/ffmpeg aus.
308
+
309
+ - Wenn z. B. `www.instagram.com` nicht auflösbar ist, liegt ein DNS-/Firewall-Problem vor.
310
+
311
+ - Wenn Hugging Face / Google funktionieren, aber Instagram nicht, blockt vermutlich die Umgebung nur bestimmte Domains.
312
+ """
313
+ )
314
+ diag_btn = gr.Button("Diagnose starten")
315
+ diag_out = gr.Textbox(label="Diagnose-Ausgabe", lines=25)
316
+
317
+ diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
318
+
319
 
320
  if __name__ == "__main__":
321
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))