neuralworm commited on
Commit
f862cfc
·
verified ·
1 Parent(s): e430acc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -67
app.py CHANGED
@@ -1,21 +1,24 @@
1
  #!/usr/bin/env python3
2
  # coding: utf-8
3
- """
4
- Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
5
 
6
  Tab 1: Transkription
 
7
  - Video per URL (yt-dlp) oder Upload
8
  - Audio-Extraktion via ffmpeg
9
  - Transkription mit Whisper (lokal)
10
  - Downloads: SRT, VTT, TXT, JSON
11
 
 
12
  Tab 2: Netzwerk / DNS Diagnose
 
13
  - Testet DNS-Auflösung für mehrere Hosts
14
  - Testet HTTP-Requests auf Basis-URLs
15
  - Zeigt Version/Verfügbarkeit von yt-dlp und ffmpeg
16
 
17
- Hinweis: Verwende diese App nur für eigene oder freigegebene Inhalte.
18
- """
19
  import os
20
  import subprocess
21
  import tempfile
@@ -40,9 +43,9 @@ def run_capture(cmd):
40
  """Run a command and return stdout; raise RuntimeError with readable stderr on failure."""
41
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
42
  if result.returncode != 0:
43
- stderr_text = result.stderr or "" # Keep only tail to avoid massive logs
44
  tail = stderr_text[-2000:]
45
- # KORRIGIERTE ZEILE:
46
  raise RuntimeError("Command failed: " + " ".join(cmd) + " " + tail)
47
  return result.stdout
48
 
@@ -51,11 +54,7 @@ def run_capture(cmd):
51
  # ---------------------------------------------------------------------------
52
 
53
  def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
54
- """Download a video with yt-dlp into out_dir and return the video path.
55
-
56
- - Wenn DNS/Internet für bestimmte Hosts (z.B. Instagram) geblockt ist,
57
- wird eine verständliche Fehlermeldung zurückgegeben.
58
- """
59
  out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
60
  cmd = ["yt-dlp", "-o", out_template]
61
  if format_selector:
@@ -81,13 +80,11 @@ def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=N
81
  raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
82
  return str(files[0])
83
 
84
-
85
  def extract_audio_ffmpeg(video_path, out_wav):
86
  cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
87
  subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
88
  return out_wav
89
 
90
-
91
  # ---------------------------------------------------------------------------
92
  # Zeit- und Format-Helfer
93
  # ---------------------------------------------------------------------------
@@ -99,7 +96,6 @@ def seconds_to_timestamp(s):
99
  ms = int(round((s - int(s)) * 1000))
100
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
101
 
102
-
103
  def format_timestamp_vtt(s):
104
  hours = int(s // 3600)
105
  minutes = int((s % 3600) // 60)
@@ -107,39 +103,35 @@ def format_timestamp_vtt(s):
107
  ms = int(round((s - int(s)) * 1000))
108
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
109
 
110
-
111
  def segments_to_srt(segments):
 
112
  parts = []
113
  for i, seg in enumerate(segments, start=1):
114
  start = seconds_to_timestamp(seg['start'])
115
  end = seconds_to_timestamp(seg['end'])
116
  text = seg['text'].strip()
117
- parts.append(f"{i}
118
- {start} --> {end}
119
- {text}
120
- ")
121
- return "
122
- ".join(parts)
123
-
124
 
125
  def segments_to_vtt(segments):
126
- parts = ["WEBVTT
127
- "]
 
128
  for seg in segments:
129
  start = format_timestamp_vtt(seg['start'])
130
  end = format_timestamp_vtt(seg['end'])
131
  text = seg['text'].strip()
132
- parts.append(f"{start} --> {end}
133
- {text}
134
- ")
135
- return "
136
- ".join(parts)
137
-
138
 
139
  def segments_to_txt(segments):
140
- return "
141
- ".join([f"[{seconds_to_timestamp(seg['start'])}] {seg['text'].strip()}" for seg in segments])
142
-
143
 
144
  def segments_to_json(segments, language=None, metadata=None):
145
  data = {"language": language, "segments": segments}
@@ -147,7 +139,6 @@ def segments_to_json(segments, language=None, metadata=None):
147
  data["metadata"] = metadata
148
  return json.dumps(data, ensure_ascii=False, indent=2)
149
 
150
-
151
  # ---------------------------------------------------------------------------
152
  # Kern-Pipeline: Transkription
153
  # ---------------------------------------------------------------------------
@@ -160,16 +151,10 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_fil
160
  try:
161
  # Quelle bestimmen
162
  if url:
163
- cookies_path = cookies_file if cookies_file and os.path.exists(cookies_file) else None
164
- video_path = download_video_with_ytdlp(url, tmpdir, cookies_path=cookies_path, format_selector=format_selector)
165
  elif file_obj:
166
- if isinstance(file_obj, str) and os.path.exists(file_obj):
167
- video_path = file_obj
168
- else:
169
- uploaded_path = Path(tmpdir) / Path(getattr(file_obj, "name", "upload")).name
170
- with open(uploaded_path, "wb") as f:
171
- f.write(file_obj.read())
172
- video_path = str(uploaded_path)
173
  else:
174
  return "Kein Video angegeben.", None, None, None, None, None
175
 
@@ -184,9 +169,9 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_fil
184
  language = result.get("language", "unknown")
185
 
186
  # Ausgaben erzeugen
 
187
  srt_text = segments_to_srt(segments)
188
  vtt_text = segments_to_vtt(segments)
189
- txt_text = segments_to_txt(segments)
190
  json_text = segments_to_json(segments, language, {"model": model_size})
191
 
192
  base = Path(video_path).stem
@@ -207,7 +192,6 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False, cookies_fil
207
  except Exception as e:
208
  return f"Fehler: {e}", None, None, None, None, None
209
 
210
-
211
  # ---------------------------------------------------------------------------
212
  # Netzwerk / DNS Diagnose
213
  # ---------------------------------------------------------------------------
@@ -218,16 +202,15 @@ def dns_internet_diag():
218
 
219
  # DNS-Checks
220
  lines.append("=== DNS-Auflösung ===")
221
- for host in ["huggingface.co", "www.google.com", "www.instagram.com"]:
222
  try:
223
  ip = socket.gethostbyname(host)
224
- lines.append(f"{host} -> {ip}")
225
  except Exception as e:
226
  lines.append(f"{host} -> ERROR: {e}")
227
 
228
  # HTTP-Checks
229
- lines.append("
230
- === HTTP-Requests (GET) ===")
231
  for url in ["https://huggingface.co", "https://www.google.com", "https://www.instagram.com"]:
232
  try:
233
  with urllib.request.urlopen(url, timeout=5) as resp:
@@ -237,8 +220,7 @@ def dns_internet_diag():
237
  lines.append(f"{url} -> ERROR: {e}")
238
 
239
  # yt-dlp
240
- lines.append("
241
- === yt-dlp ===")
242
  try:
243
  out = run_capture(["yt-dlp", "--version"])
244
  lines.append(f"yt-dlp Version: {out.strip()}")
@@ -246,8 +228,7 @@ def dns_internet_diag():
246
  lines.append(f"yt-dlp Fehler: {e}")
247
 
248
  # ffmpeg
249
- lines.append("
250
- === ffmpeg ===")
251
  try:
252
  out = run_capture(["ffmpeg", "-version"])
253
  first = out.splitlines()[0] if out else "(keine Ausgabe)"
@@ -255,9 +236,7 @@ def dns_internet_diag():
255
  except Exception as e:
256
  lines.append(f"ffmpeg Fehler: {e}")
257
 
258
- return "
259
- ".join(lines)
260
-
261
 
262
  # ---------------------------------------------------------------------------
263
  # Gradio UI mit zwei Tabs
@@ -279,14 +258,19 @@ with gr.Blocks() as demo:
279
  status = gr.Textbox(label="Status / Meta", interactive=False)
280
  with gr.Column():
281
  transcript = gr.Textbox(label="Transkript", lines=20)
282
- srt_dl = gr.File(label="SRT", visible=False)
283
- vtt_dl = gr.File(label="VTT", visible=False)
284
- txt_dl = gr.File(label="TXT", visible=False)
285
- json_dl = gr.File(label="JSON", visible=False)
286
 
287
  def run_transcribe(f, u, m, k, c, fmt):
288
- cookies_path = c if isinstance(c, str) and os.path.exists(c) else None
289
- display, srtf, vttf, txtf, jsonf, meta = transcribe_pipeline(f, u, m, k, cookies_file=cookies_path, format_selector=(fmt or None))
 
 
 
 
 
290
  return (
291
  display,
292
  gr.update(value=srtf, visible=bool(srtf)),
@@ -306,16 +290,14 @@ with gr.Blocks() as demo:
306
  gr.Markdown(
307
  """Führt einfache Tests für DNS, HTTP sowie yt-dlp/ffmpeg aus.
308
 
309
- - Wenn z. B. `www.instagram.com` nicht auflösbar ist, liegt ein DNS-/Firewall-Problem vor.
310
 
311
- - Wenn Hugging Face / Google funktionieren, aber Instagram nicht, blockt vermutlich die Umgebung nur bestimmte Domains.
312
- """
313
  )
314
  diag_btn = gr.Button("Diagnose starten")
315
  diag_out = gr.Textbox(label="Diagnose-Ausgabe", lines=25)
316
 
317
  diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
318
 
319
-
320
  if __name__ == "__main__":
321
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
1
  #!/usr/bin/env python3
2
  # coding: utf-8
3
+
4
+ """ Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
5
 
6
  Tab 1: Transkription
7
+
8
  - Video per URL (yt-dlp) oder Upload
9
  - Audio-Extraktion via ffmpeg
10
  - Transkription mit Whisper (lokal)
11
  - Downloads: SRT, VTT, TXT, JSON
12
 
13
+
14
  Tab 2: Netzwerk / DNS Diagnose
15
+
16
  - Testet DNS-Auflösung für mehrere Hosts
17
  - Testet HTTP-Requests auf Basis-URLs
18
  - Zeigt Version/Verfügbarkeit von yt-dlp und ffmpeg
19
 
20
+
21
+ Hinweis: Verwende diese App nur für eigene oder freigegebene Inhalte. """
22
  import os
23
  import subprocess
24
  import tempfile
 
43
  """Run a command and return stdout; raise RuntimeError with readable stderr on failure."""
44
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
45
  if result.returncode != 0:
46
+ stderr_text = result.stderr or "" # Keep only tail to avoid massive logs
47
  tail = stderr_text[-2000:]
48
+ # KORRIGIERT: Fehlermeldung korrekt zusammengebaut und abgeschlossen.
49
  raise RuntimeError("Command failed: " + " ".join(cmd) + " " + tail)
50
  return result.stdout
51
 
 
54
  # ---------------------------------------------------------------------------
55
 
56
  def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
57
+ """Download a video with yt-dlp into out_dir and return the video path."""
 
 
 
 
58
  out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
59
  cmd = ["yt-dlp", "-o", out_template]
60
  if format_selector:
 
80
  raise FileNotFoundError("Download fehlgeschlagen — keine Datei gefunden.")
81
  return str(files[0])
82
 
 
83
  def extract_audio_ffmpeg(video_path, out_wav):
84
  cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
85
  subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
86
  return out_wav
87
 
 
88
  # ---------------------------------------------------------------------------
89
  # Zeit- und Format-Helfer
90
  # ---------------------------------------------------------------------------
 
96
  ms = int(round((s - int(s)) * 1000))
97
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
98
 
 
99
  def format_timestamp_vtt(s):
100
  hours = int(s // 3600)
101
  minutes = int((s % 3600) // 60)
 
103
  ms = int(round((s - int(s)) * 1000))
104
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
105
 
 
106
  def segments_to_srt(segments):
107
+ """Formats transcript segments into a valid SRT string."""
108
  parts = []
109
  for i, seg in enumerate(segments, start=1):
110
  start = seconds_to_timestamp(seg['start'])
111
  end = seconds_to_timestamp(seg['end'])
112
  text = seg['text'].strip()
113
+ # KORRIGIERT: Korrektes SRT-Blockformat mit Zeilenumbrüchen.
114
+ parts.append(f"{i}\n{start} --> {end}\n{text}")
115
+ # KORRIGIERT: Blöcke mit zwei Zeilenumbrüchen verbinden, um gültiges SRT zu erzeugen.
116
+ return "\n\n".join(parts) + "\n\n"
 
 
 
117
 
118
  def segments_to_vtt(segments):
119
+ """Formats transcript segments into a valid VTT string."""
120
+ # KORRIGIERT: Korrekter Header mit nachfolgendem Zeilenumbruch.
121
+ parts = ["WEBVTT\n"]
122
  for seg in segments:
123
  start = format_timestamp_vtt(seg['start'])
124
  end = format_timestamp_vtt(seg['end'])
125
  text = seg['text'].strip()
126
+ # KORRIGIERT: Korrektes VTT-Blockformat mit Zeilenumbruch.
127
+ parts.append(f"{start} --> {end}\n{text}")
128
+ # KORRIGIERT: Blöcke mit zwei Zeilenumbrüchen verbinden.
129
+ return "\n\n".join(parts)
 
 
130
 
131
  def segments_to_txt(segments):
132
+ """Formats segments to a readable plain text file."""
133
+ # VERBESSERT: Segmente mit Zeilenumbruch statt Leerzeichen für bessere Lesbarkeit trennen.
134
+ return "\n".join([f"[{seconds_to_timestamp(seg['start'])}] {seg['text'].strip()}" for seg in segments])
135
 
136
  def segments_to_json(segments, language=None, metadata=None):
137
  data = {"language": language, "segments": segments}
 
139
  data["metadata"] = metadata
140
  return json.dumps(data, ensure_ascii=False, indent=2)
141
 
 
142
  # ---------------------------------------------------------------------------
143
  # Kern-Pipeline: Transkription
144
  # ---------------------------------------------------------------------------
 
151
  try:
152
  # Quelle bestimmen
153
  if url:
154
+ video_path = download_video_with_ytdlp(url, tmpdir, cookies_path=cookies_file, format_selector=format_selector)
 
155
  elif file_obj:
156
+ # Gradio übergibt ein temporäres Dateiobjekt, dessen .name Attribut der Pfad ist.
157
+ video_path = file_obj.name
 
 
 
 
 
158
  else:
159
  return "Kein Video angegeben.", None, None, None, None, None
160
 
 
169
  language = result.get("language", "unknown")
170
 
171
  # Ausgaben erzeugen
172
+ txt_text = segments_to_txt(segments)
173
  srt_text = segments_to_srt(segments)
174
  vtt_text = segments_to_vtt(segments)
 
175
  json_text = segments_to_json(segments, language, {"model": model_size})
176
 
177
  base = Path(video_path).stem
 
192
  except Exception as e:
193
  return f"Fehler: {e}", None, None, None, None, None
194
 
 
195
  # ---------------------------------------------------------------------------
196
  # Netzwerk / DNS Diagnose
197
  # ---------------------------------------------------------------------------
 
202
 
203
  # DNS-Checks
204
  lines.append("=== DNS-Auflösung ===")
205
+ for host in ["huggingface.co", "www.google.com", "www.instagram.com", "youtube.com"]:
206
  try:
207
  ip = socket.gethostbyname(host)
208
+ lines.append(f"{host} -> {ip} (OK)")
209
  except Exception as e:
210
  lines.append(f"{host} -> ERROR: {e}")
211
 
212
  # HTTP-Checks
213
+ lines.append("\n\n=== HTTP-Requests (GET) ===")
 
214
  for url in ["https://huggingface.co", "https://www.google.com", "https://www.instagram.com"]:
215
  try:
216
  with urllib.request.urlopen(url, timeout=5) as resp:
 
220
  lines.append(f"{url} -> ERROR: {e}")
221
 
222
  # yt-dlp
223
+ lines.append("\n\n=== yt-dlp ===")
 
224
  try:
225
  out = run_capture(["yt-dlp", "--version"])
226
  lines.append(f"yt-dlp Version: {out.strip()}")
 
228
  lines.append(f"yt-dlp Fehler: {e}")
229
 
230
  # ffmpeg
231
+ lines.append("\n\n=== ffmpeg ===")
 
232
  try:
233
  out = run_capture(["ffmpeg", "-version"])
234
  first = out.splitlines()[0] if out else "(keine Ausgabe)"
 
236
  except Exception as e:
237
  lines.append(f"ffmpeg Fehler: {e}")
238
 
239
+ return "\n".join(lines)
 
 
240
 
241
  # ---------------------------------------------------------------------------
242
  # Gradio UI mit zwei Tabs
 
258
  status = gr.Textbox(label="Status / Meta", interactive=False)
259
  with gr.Column():
260
  transcript = gr.Textbox(label="Transkript", lines=20)
261
+ srt_dl = gr.File(label="SRT")
262
+ vtt_dl = gr.File(label="VTT")
263
+ txt_dl = gr.File(label="TXT")
264
+ json_dl = gr.File(label="JSON")
265
 
266
  def run_transcribe(f, u, m, k, c, fmt):
267
+ # KORRIGIERT: Korrekte Handhabung des Gradio-Dateiobjekts für Cookies.
268
+ # Wir holen den Pfad über das .name Attribut.
269
+ cookies_path = c.name if c else None
270
+
271
+ display, srtf, vttf, txtf, jsonf, meta = transcribe_pipeline(
272
+ f, u, m, k, cookies_file=cookies_path, format_selector=(fmt or None)
273
+ )
274
  return (
275
  display,
276
  gr.update(value=srtf, visible=bool(srtf)),
 
290
  gr.Markdown(
291
  """Führt einfache Tests für DNS, HTTP sowie yt-dlp/ffmpeg aus.
292
 
293
+ Wenn z. B. www.instagram.com nicht auflösbar ist, liegt ein DNS-/Firewall-Problem vor.
294
 
295
+ Wenn Hugging Face / Google funktionieren, aber Instagram nicht, blockt vermutlich die Umgebung nur bestimmte Domains."""
 
296
  )
297
  diag_btn = gr.Button("Diagnose starten")
298
  diag_out = gr.Textbox(label="Diagnose-Ausgabe", lines=25)
299
 
300
  diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
301
 
 
302
  if __name__ == "__main__":
303
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))