Spaces:

neuralworm
/

video_transcription

Sleeping

App Files Files Community

neuralworm commited on 28 days ago

Commit

fc9db83

verified ·

1 Parent(s): 0f1307c

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -51

app.py CHANGED Viewed

@@ -28,6 +28,7 @@ except Exception as e:
 # Hilfsfunktionen ----------------------------------------------------------
 def run(cmd, hide_output=False):
     """Run shell command, raise on error."""
     if hide_output:
@@ -35,6 +36,7 @@ def run(cmd, hide_output=False):
     else:
         subprocess.run(cmd, check=True)
 def download_video_with_ytdlp(url: str, out_dir: str) -> str:
     """Download best video using yt-dlp into out_dir, return filepath"""
     out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
@@ -46,6 +48,7 @@ def download_video_with_ytdlp(url: str, out_dir: str) -> str:
         raise FileNotFoundError("Download erfolglos — keine Datei gefunden.")
     return str(files[0])
 def extract_audio_ffmpeg(video_path: str, out_wav: str):
     """Extract mono 16k WAV for Whisper"""
     cmd = [
@@ -61,28 +64,24 @@ def extract_audio_ffmpeg(video_path: str, out_wav: str):
     run(cmd, hide_output=True)
     return out_wav
 def seconds_to_timestamp(s: float, always_ms: bool = True) -> str:
     """Convert seconds (float) to SRT/VTT time format HH:MM:SS,mmm"""
-    td = timedelta(seconds=float(s))
-    total_seconds = int(td.total_seconds())
-    hours = total_seconds // 3600
-    minutes = (total_seconds % 3600) // 60
-    seconds = total_seconds % 60
-    milliseconds = int(td.microseconds / 1000 + (td.seconds - int(td.seconds)) * 1000)
-    # Better approach using fractional part:
-    frac = s - int(s)
-    ms = int(round((s - int(s)) * 1000)) if s >= 0 else 0
     return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
 def format_timestamp_vtt(s: float) -> str:
-    td = timedelta(seconds=float(s))
-    total_seconds = int(td.total_seconds())
-    hours = total_seconds // 3600
-    minutes = (total_seconds % 3600) // 60
-    seconds = total_seconds % 60
     ms = int(round((s - int(s)) * 1000))
     return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
 def segments_to_srt(segments):
     """Create SRT string from whisper segments"""
     parts = []
@@ -93,6 +92,7 @@ def segments_to_srt(segments):
         parts.append(f"{i}\n{start} --> {end}\n{text}\n")
     return "\n".join(parts)
 def segments_to_vtt(segments):
     """Create VTT string from whisper segments"""
     parts = ["WEBVTT\n"]
@@ -103,6 +103,7 @@ def segments_to_vtt(segments):
         parts.append(f"{start} --> {end}\n{text}\n")
     return "\n".join(parts)
 def segments_to_txt(segments):
     """Create plain TXT with timestamps per segment"""
     lines = []
@@ -112,6 +113,7 @@ def segments_to_txt(segments):
         lines.append(f"[{start}] {text}")
     return "\n".join(lines)
 def segments_to_json(segments, language=None, metadata=None):
     obj = {
         "language": language,
@@ -165,40 +167,4 @@ def transcribe_pipeline(file_obj, url, model_size, keep_video=False):
         # 4) Create output strings
         srt_text = segments_to_srt(segments)
-        vtt_text = segments_to_vtt(segments)
-        txt_text = segments_to_txt(segments)
-        json_text = segments_to_json(segments, language=language, metadata={"model": model_size})
-        # 5) Save files to tmpdir for download via Gradio
-        out_files = {}
-        base_name = Path(video_path).stem
-        files_map = {
-            f"{base_name}.srt": srt_text,
-            f"{base_name}.vtt": vtt_text,
-            f"{base_name}.txt": txt_text,
-            f"{base_name}.json": json_text
-        }
-        for fname, content in files_map.items():
-            path = Path(tmpdir) / fname
-            path.write_text(content, encoding="utf-8")
-            out_files[fname] = str(path)
-        # 6) prepare display text with timestamps for UI (simple combined view)
-        display_lines = []
-        for seg in segments:
-            start = seconds_to_timestamp(seg['start'])
-            display_lines.append(f"[{start}] {seg['text'].strip()}")
-        display_text = "\n".join(display_lines)
-        # Optionally remove video to save space
-        if not keep_video and url:
-            try:
-                os.remove(video_path)
-            except Exception:
-                pass
-        return display_text, out_files[f"{base_name}.srt"], out_files[f"{base_name}.vtt"], out_files[f"{base_name}.txt"], out_files[f"{base_name}.json"], f"Model: {model_size}, Language: {language}"
-    except Exception as e:
-        return f"Fehler während Verarbeitung: {e}", None, None, None, None, None
-    finally:
-        # Do not delete tmpdir immediately if the user wants to download

 # Hilfsfunktionen ----------------------------------------------------------
 def run(cmd, hide_output=False):
     """Run shell command, raise on error."""
     if hide_output:
     else:
         subprocess.run(cmd, check=True)
 def download_video_with_ytdlp(url: str, out_dir: str) -> str:
     """Download best video using yt-dlp into out_dir, return filepath"""
     out_template = str(Path(out_dir) / "%(title)s.%(ext)s")
         raise FileNotFoundError("Download erfolglos — keine Datei gefunden.")
     return str(files[0])
 def extract_audio_ffmpeg(video_path: str, out_wav: str):
     """Extract mono 16k WAV for Whisper"""
     cmd = [
     run(cmd, hide_output=True)
     return out_wav
 def seconds_to_timestamp(s: float, always_ms: bool = True) -> str:
     """Convert seconds (float) to SRT/VTT time format HH:MM:SS,mmm"""
+    hours = int(s // 3600)
+    minutes = int((s % 3600) // 60)
+    seconds = int(s % 60)
+    ms = int(round((s - int(s)) * 1000))
     return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
 def format_timestamp_vtt(s: float) -> str:
+    hours = int(s // 3600)
+    minutes = int((s % 3600) // 60)
+    seconds = int(s % 60)
     ms = int(round((s - int(s)) * 1000))
     return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{ms:03d}"
 def segments_to_srt(segments):
     """Create SRT string from whisper segments"""
     parts = []
         parts.append(f"{i}\n{start} --> {end}\n{text}\n")
     return "\n".join(parts)
 def segments_to_vtt(segments):
     """Create VTT string from whisper segments"""
     parts = ["WEBVTT\n"]
         parts.append(f"{start} --> {end}\n{text}\n")
     return "\n".join(parts)
 def segments_to_txt(segments):
     """Create plain TXT with timestamps per segment"""
     lines = []
         lines.append(f"[{start}] {text}")
     return "\n".join(lines)
 def segments_to_json(segments, language=None, metadata=None):
     obj = {
         "language": language,
         # 4) Create output strings
         srt_text = segments_to_srt(segments)
+        vtt_text = segments