neuralworm commited on
Commit
fc9db83
·
verified ·
1 Parent(s): 0f1307c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -51
app.py CHANGED
@@ -28,6 +28,7 @@ except Exception as e:
28
 
29
  # Hilfsfunktionen ----------------------------------------------------------
30
 
 
31
  def run(cmd, hide_output=False):
32
  """Run shell command, raise on error."""
33
  if hide_output:
@@ -35,6 +36,7 @@ def run(cmd, hide_output=False):
35
  else:
36
  subprocess.run(cmd, check=True)
37
 
 
38
  def download_video_with_ytdlp(url: str, out_dir: str) -> str:
39
  """Download best video using yt-dlp into out_dir, return filepath"""
40
  out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
@@ -46,6 +48,7 @@ def download_video_with_ytdlp(url: str, out_dir: str) -> str:
46
  raise FileNotFoundError("Download erfolglos — keine Datei gefunden.")
47
  return str(files[0])
48
 
 
49
  def extract_audio_ffmpeg(video_path: str, out_wav: str):
50
  """Extract mono 16k WAV for Whisper"""
51
  cmd = [
@@ -61,28 +64,24 @@ def extract_audio_ffmpeg(video_path: str, out_wav: str):
61
  run(cmd, hide_output=True)
62
  return out_wav
63
 
 
64
  def seconds_to_timestamp(s: float, always_ms: bool = True) -> str:
65
  """Convert seconds (float) to SRT/VTT time format HH:MM:SS,mmm"""
66
- td = timedelta(seconds=float(s))
67
- total_seconds = int(td.total_seconds())
68
- hours = total_seconds // 3600
69
- minutes = (total_seconds % 3600) // 60
70
- seconds = total_seconds % 60
71
- milliseconds = int(td.microseconds / 1000 + (td.seconds - int(td.seconds)) * 1000)
72
- # Better approach using fractional part:
73
- frac = s - int(s)
74
- ms = int(round((s - int(s)) * 1000)) if s >= 0 else 0
75
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
76
 
 
77
  def format_timestamp_vtt(s: float) -> str:
78
- td = timedelta(seconds=float(s))
79
- total_seconds = int(td.total_seconds())
80
- hours = total_seconds // 3600
81
- minutes = (total_seconds % 3600) // 60
82
- seconds = total_seconds % 60
83
  ms = int(round((s - int(s)) * 1000))
84
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
85
 
 
86
  def segments_to_srt(segments):
87
  """Create SRT string from whisper segments"""
88
  parts = []
@@ -93,6 +92,7 @@ def segments_to_srt(segments):
93
  parts.append(f"{i}\n{start} --> {end}\n{text}\n")
94
  return "\n".join(parts)
95
 
 
96
  def segments_to_vtt(segments):
97
  """Create VTT string from whisper segments"""
98
  parts = ["WEBVTT\n"]
@@ -103,6 +103,7 @@ def segments_to_vtt(segments):
103
  parts.append(f"{start} --> {end}\n{text}\n")
104
  return "\n".join(parts)
105
 
 
106
  def segments_to_txt(segments):
107
  """Create plain TXT with timestamps per segment"""
108
  lines = []
@@ -112,6 +113,7 @@ def segments_to_txt(segments):
112
  lines.append(f"[{start}] {text}")
113
  return "\n".join(lines)
114
 
 
115
  def segments_to_json(segments, language=None, metadata=None):
116
  obj = {
117
  "language": language,
@@ -165,40 +167,4 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False):
165
 
166
  # 4) Create output strings
167
  srt_text = segments_to_srt(segments)
168
- vtt_text = segments_to_vtt(segments)
169
- txt_text = segments_to_txt(segments)
170
- json_text = segments_to_json(segments, language=language, metadata={"model": model_size})
171
-
172
- # 5) Save files to tmpdir for download via Gradio
173
- out_files = {}
174
- base_name = Path(video_path).stem
175
- files_map = {
176
- f"{base_name}.srt": srt_text,
177
- f"{base_name}.vtt": vtt_text,
178
- f"{base_name}.txt": txt_text,
179
- f"{base_name}.json": json_text
180
- }
181
- for fname, content in files_map.items():
182
- path = Path(tmpdir) / fname
183
- path.write_text(content, encoding="utf-8")
184
- out_files[fname] = str(path)
185
-
186
- # 6) prepare display text with timestamps for UI (simple combined view)
187
- display_lines = []
188
- for seg in segments:
189
- start = seconds_to_timestamp(seg['start'])
190
- display_lines.append(f"[{start}] {seg['text'].strip()}")
191
- display_text = "\n".join(display_lines)
192
-
193
- # Optionally remove video to save space
194
- if not keep_video and url:
195
- try:
196
- os.remove(video_path)
197
- except Exception:
198
- pass
199
-
200
- return display_text, out_files[f"{base_name}.srt"], out_files[f"{base_name}.vtt"], out_files[f"{base_name}.txt"], out_files[f"{base_name}.json"], f"Model: {model_size}, Language: {language}"
201
- except Exception as e:
202
- return f"Fehler während Verarbeitung: {e}", None, None, None, None, None
203
- finally:
204
- # Do not delete tmpdir immediately if the user wants to download
 
28
 
29
  # Hilfsfunktionen ----------------------------------------------------------
30
 
31
+
32
  def run(cmd, hide_output=False):
33
  """Run shell command, raise on error."""
34
  if hide_output:
 
36
  else:
37
  subprocess.run(cmd, check=True)
38
 
39
+
40
  def download_video_with_ytdlp(url: str, out_dir: str) -> str:
41
  """Download best video using yt-dlp into out_dir, return filepath"""
42
  out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
 
48
  raise FileNotFoundError("Download erfolglos — keine Datei gefunden.")
49
  return str(files[0])
50
 
51
+
52
  def extract_audio_ffmpeg(video_path: str, out_wav: str):
53
  """Extract mono 16k WAV for Whisper"""
54
  cmd = [
 
64
  run(cmd, hide_output=True)
65
  return out_wav
66
 
67
+
68
  def seconds_to_timestamp(s: float, always_ms: bool = True) -> str:
69
  """Convert seconds (float) to SRT/VTT time format HH:MM:SS,mmm"""
70
+ hours = int(s // 3600)
71
+ minutes = int((s % 3600) // 60)
72
+ seconds = int(s % 60)
73
+ ms = int(round((s - int(s)) * 1000))
 
 
 
 
 
74
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
75
 
76
+
77
  def format_timestamp_vtt(s: float) -> str:
78
+ hours = int(s // 3600)
79
+ minutes = int((s % 3600) // 60)
80
+ seconds = int(s % 60)
 
 
81
  ms = int(round((s - int(s)) * 1000))
82
  return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
83
 
84
+
85
  def segments_to_srt(segments):
86
  """Create SRT string from whisper segments"""
87
  parts = []
 
92
  parts.append(f"{i}\n{start} --> {end}\n{text}\n")
93
  return "\n".join(parts)
94
 
95
+
96
  def segments_to_vtt(segments):
97
  """Create VTT string from whisper segments"""
98
  parts = ["WEBVTT\n"]
 
103
  parts.append(f"{start} --> {end}\n{text}\n")
104
  return "\n".join(parts)
105
 
106
+
107
  def segments_to_txt(segments):
108
  """Create plain TXT with timestamps per segment"""
109
  lines = []
 
113
  lines.append(f"[{start}] {text}")
114
  return "\n".join(lines)
115
 
116
+
117
  def segments_to_json(segments, language=None, metadata=None):
118
  obj = {
119
  "language": language,
 
167
 
168
  # 4) Create output strings
169
  srt_text = segments_to_srt(segments)
170
+ vtt_text = segments