neuralworm commited on
Commit
f137403
·
verified ·
1 Parent(s): 11c876d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -37
app.py CHANGED
@@ -2,7 +2,7 @@
2
  # coding: utf-8
3
 
4
  """ Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
5
- FINALE, KORRIGIERTE LÖSUNG: Verwendet die korrekte yt-dlp Option --force-ip.
6
  """
7
  import os
8
  import subprocess
@@ -28,7 +28,7 @@ except ImportError:
28
  dns_resolver = None
29
 
30
  # ---------------------------------------------------------------------------
31
- # DEFINITIVE AUFRUFMETHODE: yt-dlp als Modul
32
  # ---------------------------------------------------------------------------
33
  YT_DLP_COMMAND = [sys.executable, "-m", "yt_dlp"]
34
  FFMPEG_PATH = "ffmpeg"
@@ -45,19 +45,29 @@ def run_capture(cmd):
45
  raise RuntimeError("Command failed: " + " ".join(map(str, cmd)) + " " + tail)
46
  return result.stdout
47
 
48
- # ... (resolve_hostname_with_dns_python bleibt gleich)
 
 
49
  def resolve_hostname_with_dns_python(hostname):
50
- if not dns_resolver: return socket.gethostbyname(hostname)
 
 
 
51
  try:
52
- resolver = dns_resolver.Resolver(); resolver.nameservers = ['8.8.8.8', '1.1.1.1']
53
- answers = resolver.resolve(hostname, 'A')
 
 
54
  return answers[0].to_text() if answers else None
55
- except Exception: return socket.gethostbyname(hostname)
 
 
56
 
57
  # ---------------------------------------------------------------------------
58
- # MODIFIZIERTE FUNKTION: Download & Audio mit der KORREKTEN Option
59
  # ---------------------------------------------------------------------------
60
  def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
 
61
  out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
62
  cmd = YT_DLP_COMMAND + ["-o", out_template]
63
 
@@ -66,9 +76,9 @@ def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=N
66
  if hostname:
67
  ip_address = resolve_hostname_with_dns_python(hostname)
68
  if ip_address:
69
- print(f"Resolved {hostname} to {ip_address}. Using --force-ip.")
70
- # DIES IST DIE KORREKTE OPTION, KEINE HALLUZINATION
71
- cmd.extend(["--force-ip", ip_address])
72
  except Exception as e:
73
  print(f"Custom DNS resolution failed, proceeding without it. Error: {e}")
74
 
@@ -84,57 +94,155 @@ def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=N
84
  return str(files[0])
85
 
86
  def extract_audio_ffmpeg(video_path, out_wav):
 
87
  cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
88
  subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
89
  return out_wav
90
 
91
- # ... (Rest des Codes bleibt identisch)
92
- def seconds_to_timestamp(s): h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000)); return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
93
- def format_timestamp_vtt(s): h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000)); return f"{h:02d}:{m:02d}:{s:02d}.{ms:03d}"
94
- def segments_to_srt(segments): parts = [f"{i}\n{seconds_to_timestamp(s['start'])} --> {seconds_to_timestamp(s['end'])}\n{s['text'].strip()}" for i,s in enumerate(segments,1)]; return "\n\n".join(parts) + "\n\n"
95
- def segments_to_vtt(segments): parts = ["WEBVTT\n"] + [f"{format_timestamp_vtt(s['start'])} --> {format_timestamp_vtt(s['end'])}\n{s['text'].strip()}" for s in segments]; return "\n\n".join(parts)
96
- def segments_to_txt(segments): return "\n".join([f"[{seconds_to_timestamp(s['start'])}] {s['text'].strip()}" for s in segments])
97
- def segments_to_json(segments, lang=None, meta=None): data={"language":lang, "segments":segments}; [data.update({"metadata":meta}) if meta else None]; return json.dumps(data,ensure_ascii=False,indent=2)
98
- def transcribe_pipeline(f, u, m, k, c, fmt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  if whisper is None: return "Fehler: whisper ist nicht installiert.",*[None]*5
 
100
  tmpdir = tempfile.mkdtemp(prefix="whisper_space_");
101
  try:
102
- video_path = download_video_with_ytdlp(u, tmpdir, c, fmt) if u else f.name
103
- if not video_path: return "Kein Video angegeben.",*[None]*5
104
- audio_wav=str(Path(tmpdir)/"audio.wav"); extract_audio_ffmpeg(video_path,audio_wav)
105
- model=whisper.load_model(m); result=model.transcribe(audio_wav,verbose=False)
106
- segs=result.get("segments",[]); lang=result.get("language","unknown")
107
- txt=segments_to_txt(segs); srt=segments_to_srt(segs); vtt=segments_to_vtt(segs); jsn=segments_to_json(segs,lang,{"model":m})
108
- base=Path(video_path).stem; files={}
109
- for ext, content in {"srt":srt, "vtt":vtt, "txt":txt, "json":jsn}.items(): p=Path(tmpdir)/f"{base}.{ext}"; p.write_text(content,encoding="utf-8"); files[ext]=str(p)
110
- if not k and u: [os.remove(video_path) for _ in [1] if os.path.exists(video_path)]
111
- meta=f"Model: {m}, Sprache: {lang}"; return txt,files["srt"],files["vtt"],files["txt"],files["json"],meta
112
- except Exception as e: return f"Fehler: {e}",*[None]*5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  def dns_internet_diag():
114
  lines = []
115
  lines.append("=== Python & Version Info ===")
116
  lines.append(f"Python Executable: {sys.executable}")
117
  try:
 
118
  cmd = YT_DLP_COMMAND + ["--version"]
119
  version_out = run_capture(cmd).strip()
120
  lines.append(f"Version via '{' '.join(cmd)}': {version_out}")
121
  except Exception as e:
122
  lines.append(f"Fehler bei der Prüfung der yt-dlp Modul-Version: {e}")
 
123
  lines.append("\n\n=== DNS-Auflösung (via dnspython mit 8.8.8.8) ===")
124
  for host in ["huggingface.co", "www.instagram.com", "youtube.com"]:
125
- try: ip = resolve_hostname_with_dns_python(host); lines.append(f"{host} -> {ip} (OK)")
126
- except Exception as e: lines.append(f"{host} -> ERROR: {e}")
 
 
 
127
  return "\n".join(lines)
 
 
 
 
128
  with gr.Blocks() as demo:
129
  gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)")
 
130
  with gr.Tab("Transkription"):
131
  with gr.Row():
132
- with gr.Column(): url_in=gr.Textbox(label="Video URL",placeholder="https://..."); file_in=gr.File(label="Oder Videodatei hochladen"); cookies_in=gr.File(label="Cookies.txt (optional, für yt-dlp)"); fmt_in=gr.Textbox(label="Format (optional, yt-dlp -f)",placeholder="z.B. bestvideo+bestaudio/best"); model_sel=gr.Radio(["tiny","base","small","medium","large"],value="small",label="Whisper-Modell"); keep_chk=gr.Checkbox(label="Video behalten (bei URL-Download)",value=False); btn=gr.Button("Transkribieren"); status=gr.Textbox(label="Status / Meta",interactive=False)
133
- with gr.Column(): transcript=gr.Textbox(label="Transkript",lines=20); srt_dl=gr.File(label="SRT"); vtt_dl=gr.File(label="VTT"); txt_dl=gr.File(label="TXT"); json_dl=gr.File(label="JSON")
134
- def run_transcribe(f, u, m, k, c, fmt): cookies_path = c.name if c else None; d, s, v, t, j, meta = transcribe_pipeline(f, u, m, k, cookies_path, (fmt or None)); return (d, gr.update(value=s,visible=bool(s)), gr.update(value=v,visible=bool(v)), gr.update(value=t,visible=bool(t)), gr.update(value=j,visible=bool(j)), meta,)
135
- btn.click(run_transcribe, [file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in], [transcript, srt_dl, vtt_dl, txt_dl, json_dl, status])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  with gr.Tab("Netzwerk / DNS Diagnose"):
137
- gr.Markdown("""Prüft die Version von yt-dlp, wie sie von Python als Modul ausgeführt wird."""); diag_btn=gr.Button("Diagnose starten"); diag_out=gr.Textbox(label="Diagnose-Ausgabe",lines=25)
 
 
138
  diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
 
139
  if __name__ == "__main__":
140
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
2
  # coding: utf-8
3
 
4
  """ Hugging Face Space (Gradio) App: Video -> Audio -> Whisper Transkript (+ Downloads SRT/TXT/VTT/JSON)
5
+ ENDGÜLTIGE LÖSUNG: Verwendet die präzise yt-dlp Option --force-ipv4, um DNS-Probleme zu umgehen.
6
  """
7
  import os
8
  import subprocess
 
28
  dns_resolver = None
29
 
30
  # ---------------------------------------------------------------------------
31
+ # DEFINITIVE AUFRUFMETHODE: yt-dlp als Modul, um PATH-Konflikte zu vermeiden
32
  # ---------------------------------------------------------------------------
33
  YT_DLP_COMMAND = [sys.executable, "-m", "yt_dlp"]
34
  FFMPEG_PATH = "ffmpeg"
 
45
  raise RuntimeError("Command failed: " + " ".join(map(str, cmd)) + " " + tail)
46
  return result.stdout
47
 
48
+ # ---------------------------------------------------------------------------
49
+ # DNS-Helfer
50
+ # ---------------------------------------------------------------------------
51
  def resolve_hostname_with_dns_python(hostname):
52
+ """Resolves a hostname to an IPv4 address using a public DNS server."""
53
+ if not dns_resolver:
54
+ # Fallback auf System-DNS, wenn dnspython nicht installiert ist
55
+ return socket.gethostbyname(hostname)
56
  try:
57
+ resolver = dns_resolver.Resolver()
58
+ resolver.nameservers = ['8.8.8.8', '1.1.1.1'] # Google & Cloudflare DNS
59
+ # Explizit nach A-Record (IPv4) fragen, passend zu --force-ipv4
60
+ answers = resolver.resolve(hostname, 'A')
61
  return answers[0].to_text() if answers else None
62
+ except Exception:
63
+ # Wenn der externe DNS fehlschlägt, versuchen wir es als letzte Rettung mit dem System-DNS
64
+ return socket.gethostbyname(hostname)
65
 
66
  # ---------------------------------------------------------------------------
67
+ # Download & Audio Extraktion
68
  # ---------------------------------------------------------------------------
69
  def download_video_with_ytdlp(url, out_dir, cookies_path=None, format_selector=None):
70
+ """Downloads a video using yt-dlp, bypassing DNS blocks with --force-ipv4."""
71
  out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
72
  cmd = YT_DLP_COMMAND + ["-o", out_template]
73
 
 
76
  if hostname:
77
  ip_address = resolve_hostname_with_dns_python(hostname)
78
  if ip_address:
79
+ print(f"Resolved {hostname} to IPv4 {ip_address}. Using --force-ipv4.")
80
+ # DIES IST DIE KORREKTE, PRÄZISE OPTION.
81
+ cmd.extend(["--force-ipv4", ip_address])
82
  except Exception as e:
83
  print(f"Custom DNS resolution failed, proceeding without it. Error: {e}")
84
 
 
94
  return str(files[0])
95
 
96
  def extract_audio_ffmpeg(video_path, out_wav):
97
+ """Extracts a 16kHz mono WAV audio track from a video file."""
98
  cmd = [FFMPEG_PATH, "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", out_wav]
99
  subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
100
  return out_wav
101
 
102
+ # ---------------------------------------------------------------------------
103
+ # Zeit- und Untertitel-Formatierer
104
+ # ---------------------------------------------------------------------------
105
+ def seconds_to_timestamp(s):
106
+ h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000))
107
+ return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
108
+
109
+ def format_timestamp_vtt(s):
110
+ h, m, s, ms = int(s//3600), int((s%3600)//60), int(s%60), int(round((s-int(s))*1000))
111
+ return f"{h:02d}:{m:02d}:{s:02d}.{ms:03d}"
112
+
113
+ def segments_to_srt(segments):
114
+ parts = [f"{i}\n{seconds_to_timestamp(s['start'])} --> {seconds_to_timestamp(s['end'])}\n{s['text'].strip()}" for i,s in enumerate(segments,1)]
115
+ return "\n\n".join(parts) + "\n\n"
116
+
117
+ def segments_to_vtt(segments):
118
+ parts = ["WEBVTT\n"] + [f"{format_timestamp_vtt(s['start'])} --> {format_timestamp_vtt(s['end'])}\n{s['text'].strip()}" for s in segments]
119
+ return "\n\n".join(parts)
120
+
121
+ def segments_to_txt(segments):
122
+ return "\n".join([f"[{seconds_to_timestamp(s['start'])}] {s['text'].strip()}" for s in segments])
123
+
124
+ def segments_to_json(segments, lang=None, meta=None):
125
+ data={"language":lang, "segments":segments}
126
+ if meta: data.update({"metadata":meta})
127
+ return json.dumps(data,ensure_ascii=False,indent=2)
128
+
129
+ # ---------------------------------------------------------------------------
130
+ # Kern-Pipeline: Transkription
131
+ # ---------------------------------------------------------------------------
132
+ def transcribe_pipeline(file_obj, url, model_size, keep_video, cookies_file, format_selector):
133
  if whisper is None: return "Fehler: whisper ist nicht installiert.",*[None]*5
134
+
135
  tmpdir = tempfile.mkdtemp(prefix="whisper_space_");
136
  try:
137
+ if url:
138
+ video_path = download_video_with_ytdlp(url, tmpdir, cookies_file, format_selector)
139
+ elif file_obj:
140
+ video_path = file_obj.name
141
+ else:
142
+ return "Kein Video angegeben.",*[None]*5
143
+
144
+ audio_wav=str(Path(tmpdir)/"audio.wav")
145
+ extract_audio_ffmpeg(video_path,audio_wav)
146
+
147
+ model=whisper.load_model(model_size)
148
+ result=model.transcribe(audio_wav,verbose=False)
149
+ segs=result.get("segments",[])
150
+ lang=result.get("language","unknown")
151
+
152
+ txt=segments_to_txt(segs)
153
+ srt=segments_to_srt(segs)
154
+ vtt=segments_to_vtt(segs)
155
+ jsn=segments_to_json(segs,lang,{"model":model_size})
156
+
157
+ base=Path(video_path).stem
158
+ files={}
159
+ for ext, content in {"srt":srt, "vtt":vtt, "txt":txt, "json":jsn}.items():
160
+ p=Path(tmpdir)/f"{base}.{ext}"
161
+ p.write_text(content,encoding="utf-8")
162
+ files[ext]=str(p)
163
+
164
+ if not keep_video and url:
165
+ try: os.remove(video_path)
166
+ except OSError: pass
167
+
168
+ meta=f"Model: {model_size}, Sprache: {lang}"
169
+ return txt, files["srt"], files["vtt"], files["txt"], files["json"], meta
170
+ except Exception as e:
171
+ # Räume im Fehlerfall das temporäre Verzeichnis auf
172
+ # shutil.rmtree(tmpdir, ignore_errors=True)
173
+ return f"Fehler: {e}",*[None]*5
174
+
175
+ # ---------------------------------------------------------------------------
176
+ # Netzwerk-Diagnose-Tab
177
+ # ---------------------------------------------------------------------------
178
  def dns_internet_diag():
179
  lines = []
180
  lines.append("=== Python & Version Info ===")
181
  lines.append(f"Python Executable: {sys.executable}")
182
  try:
183
+ # Führe yt-dlp als Modul aus, um die ECHTE, von pip installierte Version zu prüfen
184
  cmd = YT_DLP_COMMAND + ["--version"]
185
  version_out = run_capture(cmd).strip()
186
  lines.append(f"Version via '{' '.join(cmd)}': {version_out}")
187
  except Exception as e:
188
  lines.append(f"Fehler bei der Prüfung der yt-dlp Modul-Version: {e}")
189
+
190
  lines.append("\n\n=== DNS-Auflösung (via dnspython mit 8.8.8.8) ===")
191
  for host in ["huggingface.co", "www.instagram.com", "youtube.com"]:
192
+ try:
193
+ ip = resolve_hostname_with_dns_python(host)
194
+ lines.append(f"{host} -> {ip} (OK)")
195
+ except Exception as e:
196
+ lines.append(f"{host} -> ERROR: {e}")
197
  return "\n".join(lines)
198
+
199
+ # ---------------------------------------------------------------------------
200
+ # Gradio UI
201
+ # ---------------------------------------------------------------------------
202
  with gr.Blocks() as demo:
203
  gr.Markdown("# Video → Whisper Transkript (SRT/TXT/VTT/JSON)")
204
+
205
  with gr.Tab("Transkription"):
206
  with gr.Row():
207
+ with gr.Column():
208
+ url_in=gr.Textbox(label="Video URL",placeholder="https://...")
209
+ file_in=gr.File(label="Oder Videodatei hochladen")
210
+ cookies_in=gr.File(label="Cookies.txt (optional, für yt-dlp)")
211
+ fmt_in=gr.Textbox(label="Format (optional, yt-dlp -f)",placeholder="z.B. bestvideo+bestaudio/best")
212
+ model_sel=gr.Radio(["tiny","base","small","medium","large"],value="small",label="Whisper-Modell")
213
+ keep_chk=gr.Checkbox(label="Video behalten (bei URL-Download)",value=False)
214
+ btn=gr.Button("Transkribieren")
215
+ status=gr.Textbox(label="Status / Meta",interactive=False)
216
+ with gr.Column():
217
+ transcript=gr.Textbox(label="Transkript",lines=20)
218
+ srt_dl=gr.File(label="SRT")
219
+ vtt_dl=gr.File(label="VTT")
220
+ txt_dl=gr.File(label="TXT")
221
+ json_dl=gr.File(label="JSON")
222
+
223
+ def run_transcribe_wrapper(f, u, m, k, c, fmt):
224
+ cookies_path = c.name if c else None
225
+ display, srt, vtt, txt, jsn, meta = transcribe_pipeline(f, u, m, k, cookies_path, (fmt or None))
226
+ return (
227
+ display,
228
+ gr.update(value=srt, visible=bool(srt)),
229
+ gr.update(value=vtt, visible=bool(vtt)),
230
+ gr.update(value=txt, visible=bool(txt)),
231
+ gr.update(value=jsn, visible=bool(jsn)),
232
+ meta,
233
+ )
234
+
235
+ btn.click(
236
+ run_transcribe_wrapper,
237
+ [file_in, url_in, model_sel, keep_chk, cookies_in, fmt_in],
238
+ [transcript, srt_dl, vtt_dl, txt_dl, json_dl, status]
239
+ )
240
+
241
  with gr.Tab("Netzwerk / DNS Diagnose"):
242
+ gr.Markdown("""Prüft die Version von yt-dlp, wie sie von Python als Modul ausgeführt wird, und testet die DNS-Auflösung.""")
243
+ diag_btn=gr.Button("Diagnose starten")
244
+ diag_out=gr.Textbox(label="Diagnose-Ausgabe",lines=25)
245
  diag_btn.click(dns_internet_diag, inputs=[], outputs=[diag_out])
246
+
247
  if __name__ == "__main__":
248
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))