Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,6 +28,7 @@ except Exception as e:
|
|
| 28 |
|
| 29 |
# Hilfsfunktionen ----------------------------------------------------------
|
| 30 |
|
|
|
|
| 31 |
def run(cmd, hide_output=False):
|
| 32 |
"""Run shell command, raise on error."""
|
| 33 |
if hide_output:
|
|
@@ -35,6 +36,7 @@ def run(cmd, hide_output=False):
|
|
| 35 |
else:
|
| 36 |
subprocess.run(cmd, check=True)
|
| 37 |
|
|
|
|
| 38 |
def download_video_with_ytdlp(url: str, out_dir: str) -> str:
|
| 39 |
"""Download best video using yt-dlp into out_dir, return filepath"""
|
| 40 |
out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
|
|
@@ -46,6 +48,7 @@ def download_video_with_ytdlp(url: str, out_dir: str) -> str:
|
|
| 46 |
raise FileNotFoundError("Download erfolglos — keine Datei gefunden.")
|
| 47 |
return str(files[0])
|
| 48 |
|
|
|
|
| 49 |
def extract_audio_ffmpeg(video_path: str, out_wav: str):
|
| 50 |
"""Extract mono 16k WAV for Whisper"""
|
| 51 |
cmd = [
|
|
@@ -61,28 +64,24 @@ def extract_audio_ffmpeg(video_path: str, out_wav: str):
|
|
| 61 |
run(cmd, hide_output=True)
|
| 62 |
return out_wav
|
| 63 |
|
|
|
|
| 64 |
def seconds_to_timestamp(s: float, always_ms: bool = True) -> str:
|
| 65 |
"""Convert seconds (float) to SRT/VTT time format HH:MM:SS,mmm"""
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
seconds = total_seconds % 60
|
| 71 |
-
milliseconds = int(td.microseconds / 1000 + (td.seconds - int(td.seconds)) * 1000)
|
| 72 |
-
# Better approach using fractional part:
|
| 73 |
-
frac = s - int(s)
|
| 74 |
-
ms = int(round((s - int(s)) * 1000)) if s >= 0 else 0
|
| 75 |
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
|
| 76 |
|
|
|
|
| 77 |
def format_timestamp_vtt(s: float) -> str:
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
minutes = (total_seconds % 3600) // 60
|
| 82 |
-
seconds = total_seconds % 60
|
| 83 |
ms = int(round((s - int(s)) * 1000))
|
| 84 |
return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
|
| 85 |
|
|
|
|
| 86 |
def segments_to_srt(segments):
|
| 87 |
"""Create SRT string from whisper segments"""
|
| 88 |
parts = []
|
|
@@ -93,6 +92,7 @@ def segments_to_srt(segments):
|
|
| 93 |
parts.append(f"{i}\n{start} --> {end}\n{text}\n")
|
| 94 |
return "\n".join(parts)
|
| 95 |
|
|
|
|
| 96 |
def segments_to_vtt(segments):
|
| 97 |
"""Create VTT string from whisper segments"""
|
| 98 |
parts = ["WEBVTT\n"]
|
|
@@ -103,6 +103,7 @@ def segments_to_vtt(segments):
|
|
| 103 |
parts.append(f"{start} --> {end}\n{text}\n")
|
| 104 |
return "\n".join(parts)
|
| 105 |
|
|
|
|
| 106 |
def segments_to_txt(segments):
|
| 107 |
"""Create plain TXT with timestamps per segment"""
|
| 108 |
lines = []
|
|
@@ -112,6 +113,7 @@ def segments_to_txt(segments):
|
|
| 112 |
lines.append(f"[{start}] {text}")
|
| 113 |
return "\n".join(lines)
|
| 114 |
|
|
|
|
| 115 |
def segments_to_json(segments, language=None, metadata=None):
|
| 116 |
obj = {
|
| 117 |
"language": language,
|
|
@@ -165,40 +167,4 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False):
|
|
| 165 |
|
| 166 |
# 4) Create output strings
|
| 167 |
srt_text = segments_to_srt(segments)
|
| 168 |
-
vtt_text =
|
| 169 |
-
txt_text = segments_to_txt(segments)
|
| 170 |
-
json_text = segments_to_json(segments, language=language, metadata={"model": model_size})
|
| 171 |
-
|
| 172 |
-
# 5) Save files to tmpdir for download via Gradio
|
| 173 |
-
out_files = {}
|
| 174 |
-
base_name = Path(video_path).stem
|
| 175 |
-
files_map = {
|
| 176 |
-
f"{base_name}.srt": srt_text,
|
| 177 |
-
f"{base_name}.vtt": vtt_text,
|
| 178 |
-
f"{base_name}.txt": txt_text,
|
| 179 |
-
f"{base_name}.json": json_text
|
| 180 |
-
}
|
| 181 |
-
for fname, content in files_map.items():
|
| 182 |
-
path = Path(tmpdir) / fname
|
| 183 |
-
path.write_text(content, encoding="utf-8")
|
| 184 |
-
out_files[fname] = str(path)
|
| 185 |
-
|
| 186 |
-
# 6) prepare display text with timestamps for UI (simple combined view)
|
| 187 |
-
display_lines = []
|
| 188 |
-
for seg in segments:
|
| 189 |
-
start = seconds_to_timestamp(seg['start'])
|
| 190 |
-
display_lines.append(f"[{start}] {seg['text'].strip()}")
|
| 191 |
-
display_text = "\n".join(display_lines)
|
| 192 |
-
|
| 193 |
-
# Optionally remove video to save space
|
| 194 |
-
if not keep_video and url:
|
| 195 |
-
try:
|
| 196 |
-
os.remove(video_path)
|
| 197 |
-
except Exception:
|
| 198 |
-
pass
|
| 199 |
-
|
| 200 |
-
return display_text, out_files[f"{base_name}.srt"], out_files[f"{base_name}.vtt"], out_files[f"{base_name}.txt"], out_files[f"{base_name}.json"], f"Model: {model_size}, Language: {language}"
|
| 201 |
-
except Exception as e:
|
| 202 |
-
return f"Fehler während Verarbeitung: {e}", None, None, None, None, None
|
| 203 |
-
finally:
|
| 204 |
-
# Do not delete tmpdir immediately if the user wants to download
|
|
|
|
| 28 |
|
| 29 |
# Hilfsfunktionen ----------------------------------------------------------
|
| 30 |
|
| 31 |
+
|
| 32 |
def run(cmd, hide_output=False):
|
| 33 |
"""Run shell command, raise on error."""
|
| 34 |
if hide_output:
|
|
|
|
| 36 |
else:
|
| 37 |
subprocess.run(cmd, check=True)
|
| 38 |
|
| 39 |
+
|
| 40 |
def download_video_with_ytdlp(url: str, out_dir: str) -> str:
|
| 41 |
"""Download best video using yt-dlp into out_dir, return filepath"""
|
| 42 |
out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
|
|
|
|
| 48 |
raise FileNotFoundError("Download erfolglos — keine Datei gefunden.")
|
| 49 |
return str(files[0])
|
| 50 |
|
| 51 |
+
|
| 52 |
def extract_audio_ffmpeg(video_path: str, out_wav: str):
|
| 53 |
"""Extract mono 16k WAV for Whisper"""
|
| 54 |
cmd = [
|
|
|
|
| 64 |
run(cmd, hide_output=True)
|
| 65 |
return out_wav
|
| 66 |
|
| 67 |
+
|
| 68 |
def seconds_to_timestamp(s: float, always_ms: bool = True) -> str:
|
| 69 |
"""Convert seconds (float) to SRT/VTT time format HH:MM:SS,mmm"""
|
| 70 |
+
hours = int(s // 3600)
|
| 71 |
+
minutes = int((s % 3600) // 60)
|
| 72 |
+
seconds = int(s % 60)
|
| 73 |
+
ms = int(round((s - int(s)) * 1000))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
|
| 75 |
|
| 76 |
+
|
| 77 |
def format_timestamp_vtt(s: float) -> str:
|
| 78 |
+
hours = int(s // 3600)
|
| 79 |
+
minutes = int((s % 3600) // 60)
|
| 80 |
+
seconds = int(s % 60)
|
|
|
|
|
|
|
| 81 |
ms = int(round((s - int(s)) * 1000))
|
| 82 |
return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
|
| 83 |
|
| 84 |
+
|
| 85 |
def segments_to_srt(segments):
|
| 86 |
"""Create SRT string from whisper segments"""
|
| 87 |
parts = []
|
|
|
|
| 92 |
parts.append(f"{i}\n{start} --> {end}\n{text}\n")
|
| 93 |
return "\n".join(parts)
|
| 94 |
|
| 95 |
+
|
| 96 |
def segments_to_vtt(segments):
|
| 97 |
"""Create VTT string from whisper segments"""
|
| 98 |
parts = ["WEBVTT\n"]
|
|
|
|
| 103 |
parts.append(f"{start} --> {end}\n{text}\n")
|
| 104 |
return "\n".join(parts)
|
| 105 |
|
| 106 |
+
|
| 107 |
def segments_to_txt(segments):
|
| 108 |
"""Create plain TXT with timestamps per segment"""
|
| 109 |
lines = []
|
|
|
|
| 113 |
lines.append(f"[{start}] {text}")
|
| 114 |
return "\n".join(lines)
|
| 115 |
|
| 116 |
+
|
| 117 |
def segments_to_json(segments, language=None, metadata=None):
|
| 118 |
obj = {
|
| 119 |
"language": language,
|
|
|
|
| 167 |
|
| 168 |
# 4) Create output strings
|
| 169 |
srt_text = segments_to_srt(segments)
|
| 170 |
+
vtt_text = segments
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|