Spaces:

CK-Explorer
/

DuoSubs

Running

App Files Files Community

CK-Explorer commited on Aug 9

Commit

25c5a9d

1 Parent(s): 13d6531

feat: add web ui code

Browse files

Files changed (7) hide show

app.py +5 -0
assets/title.html +26 -0
ui/__init__.py +0 -0
ui/constants.py +25 -0
ui/events.py +224 -0
ui/layout.py +355 -0
ui/utils.py +44 -0

app.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from ui.layout import create_main_gr_blocks_ui as duosubs_gr_blocks
+if __name__ == "__main__":
+    webui = duosubs_gr_blocks()
+    webui.launch()

assets/title.html ADDED Viewed

	@@ -0,0 +1,26 @@

+<style>
+.hover-badge:hover {
+    transform: scale(1.075);
+    opacity: 0.9;
+    transition: 0.15s;
+}
+</style>
+<div style="text-align: center;">
+    <h1>DuoSubs</h1>
+    <p>A semantic subtitle aligner and merger for bilingual subtitle syncing</p>
+    <div style="display: flex; justify-content: center; gap: 5px; flex-wrap: wrap; margin-top: 1em;">
+        <a href="https://github.com/CK-Explorer/DuoSubs" target="_blank">
+            <img class="hover-badge" src="https://img.shields.io/github/stars/CK-Explorer/DuoSubs?style=social" />
+        </a>
+        <a href="https://pypi.org/project/duosubs/" target="_blank">
+            <img class="hover-badge" src="https://img.shields.io/pypi/v/duosubs.svg" />
+        </a>
+        <a href="https://duosubs.readthedocs.io/en/latest/" target="_blank">
+            <img class="hover-badge" src="https://img.shields.io/badge/docs-available-blue?logo=readthedocs" />
+        </a>
+            <a href="https://github.com/CK-Explorer/DuoSubs/blob/main/LICENSE" target="_blank">
+            <img class="hover-badge" src="https://img.shields.io/badge/license-Apache--2.0-blueviolet.svg" />
+        </a>
+    </div>
+</div>

ui/__init__.py ADDED Viewed

File without changes

ui/constants.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""
+Constants used in the DuoSubs Hugging Face Web UI.
+This module defines file paths, supported subtitle formats, model precision options,
+and default values for use in the UI and other components.
+"""
+from pathlib import Path
+from duosubs import ModelPrecision, SubtitleFormat
+TITLE_HTML = Path(__file__).parent.parent / "assets" / "title.html"
+SUB_EXT_LIST: list[str] = [f.value for f in SubtitleFormat]
+SUB_EXT_LIST_WITH_DOT: list[str] = [f".{ext}" for ext in SUB_EXT_LIST]
+PRECISION_LIST: list[str] = [f.value for f in ModelPrecision]
+DEFAULT_PRECISION = ModelPrecision.FLOAT32.value
+DEFAULT_SUB_EXT = SubtitleFormat.ASS.value
+MODEL_NAME_LIST = [
+    "sentence-transformers/LaBSE",
+    "BAAI/bge-m3",
+    "Qwen/Qwen3-Embedding-0.6B",
+]

ui/events.py ADDED Viewed

	@@ -0,0 +1,224 @@

+"""
+Event handlers and UI logic for subtitle merging in DuoSubs Hugging Face Web UI.
+This module contains functions to handle merging of subtitles, and update UI elements
+based on user interactions.
+"""
+from pathlib import Path
+import gradio as gr
+from duosubs import (
+    LoadSubsError,
+    MergeArgs,
+    MergeSubsError,
+    ModelPrecision,
+    OmitFile,
+    SaveSubsError,
+    SubtitleFormat,
+    load_subtitles,
+    merge_subtitles,
+    save_subtitles_in_zip,
+)
+from sentence_transformers import SentenceTransformer
+def start_merging(
+        model_pool: dict[str, SentenceTransformer],
+        primary_subtitles: str,
+        secondary_subtitles: str,
+        model_name: str,
+        batch_size: int,
+        model_precision: str,
+        ignore_non_overlap: bool,
+        retain_newline: bool,
+        secondary_above_primary: bool,
+        omit_subtitles: list[str],
+        combined_format: str,
+        primary_format:str,
+        secondary_format: str,
+        cancel_state: list[bool],
+        progress: gr.Progress | None = None
+    ) -> str | None:
+    """
+    The main function to handle the merging process of subtitles, which starts from
+    loading subtitles, merging subtitles, and saving the output in a ZIP file.
+    Args:
+        model_pool (dict[str, SentenceTransformer]): Mapping of model names to shared
+            SentenceTransformer instances.
+        primary_subtitles (str): Path to primary subtitle file.
+        secondary_subtitles (str): Path to secondary subtitle file.
+        model_name (str): Name of the model to use.
+        batch_size (int): Batch size for inference.
+        model_precision (str): Precision mode for inference.
+        ignore_non_overlap (bool): Whether to ignore non-overlapping subtitles.
+        retain_newline (bool): Whether to retain newlines in output.
+        secondary_above_primary (bool): Whether to place secondary subtitle above
+            primary.
+        omit_subtitles (list[str]): List of subtitle types to omit from output.
+        combined_format (str): Format for combined subtitles.
+        primary_format (str): Format for primary subtitles.
+        secondary_format (str): Format for secondary subtitles.
+        cancel_state (list[bool]): List tracking cancellation state.
+        progress (gradio.Progress) : Gradio progress object (optional).
+            Defaults to None.
+    Returns:
+        str | None: Path to the output ZIP file, or None if cancelled.
+    Raises:
+        gradio.Error: If any error occurs during loading, merging, or saving subtitles.
+    """
+    if progress is None:
+        progress = gr.Progress()
+    args = MergeArgs(
+        primary=primary_subtitles,
+        secondary=secondary_subtitles,
+        model_precision=ModelPrecision(model_precision),
+        batch_size=int(batch_size),
+        ignore_non_overlap_filter=ignore_non_overlap,
+        retain_newline=retain_newline,
+        secondary_above=secondary_above_primary,
+        omit=[OmitFile.EDIT],
+        format_combined=SubtitleFormat(combined_format),
+        format_primary=SubtitleFormat(primary_format),
+        format_secondary=SubtitleFormat(secondary_format)
+    )
+    if "Combined" in omit_subtitles:
+        args.omit.append(OmitFile.COMBINED)
+    if "Primary" in omit_subtitles:
+        args.omit.append(OmitFile.PRIMARY)
+    if "Secondary" in omit_subtitles:
+        args.omit.append(OmitFile.SECONDARY)
+    zip_name_with_path: str | None = None
+    if len(args.omit) == 4:
+        gr.Warning(
+            (
+                "Nothing to merge — Please adjust "
+                "<strong><em>Excluded Subtitle Files</em></strong> options "
+                "in <strong><em>File Exports</em></strong>"
+            ),
+            duration=7
+        )
+        return zip_name_with_path
+    try:
+        if not cancel_state[0]:
+            progress(progress=0, desc= "Stage 1 → Loading subtitles", total=1)
+            primary_subs_data, secondary_subs_data = load_subtitles(args)
+            progress(progress=1, desc= "Stage 1 → Loading subtitles", total=1)
+        if not cancel_state[0]:
+            def update_progress(current: int) -> None:
+                progress(
+                    progress=current/100,
+                    desc= f"Stage 2 → Merging subtitles using {model_name}",
+                    total=100
+                )
+            merged_subs = merge_subtitles(
+                args,
+                model_pool[model_name],
+                primary_subs_data,
+                secondary_subs_data,
+                cancel_state,
+                progress_callback=update_progress
+            )
+        if not cancel_state[0]:
+            full_zip_path_without_ext = str(Path(args.primary).with_suffix(""))
+            zip_name_with_path = f"{full_zip_path_without_ext}.zip"
+            zip_name = Path(zip_name_with_path).name
+            progress(
+                progress=0,
+                desc= f"Stage 3 → Compressing files into {zip_name}",
+                total=1
+            )
+            save_subtitles_in_zip(
+                args,
+                merged_subs,
+                primary_subs_data.styles,
+                secondary_subs_data.styles
+            )
+            progress(
+                progress=1,
+                desc= f"Stage 3 → Compressing files into {zip_name}",
+                total=1
+            )
+        if cancel_state[0]:
+            gr.Info("The merging process is stopped.", duration=7)
+    except LoadSubsError as e1:
+        raise gr.Error(str(e1)) from e1
+    except MergeSubsError as e2:
+        raise gr.Error(str(e2)) from e2
+    except SaveSubsError as e3:
+        raise gr.Error(str(e3)) from e3
+    return zip_name_with_path
+def cancel_merging(cancel_state: list[bool]) -> gr.Button:
+    """
+    Cancels the merging process and updates the UI state.
+    Args:
+        cancel_state (list[bool]): List tracking cancellation state.
+    Returns:
+        gradio.Button: Cancel button with updated interactivity.
+    """
+    cancel_state[0] = True
+    gr.Info("Cancelling merge process...", duration=7)
+    return gr.Button("Cancel", interactive=False)
+def states_during_merging(cancel_state: list[bool]) -> tuple[gr.Button, gr.Button]:
+    """
+    Sets UI state for buttons during the merging process, which disables the Merge
+    button and enables the Cancel button.
+    Args:
+        cancel_state (list[bool]): List tracking cancellation state.
+    Returns:
+        tuple[gradio.Button, gradio.Button]: Updated Merge and Cancel buttons.
+    """
+    cancel_state[0] = False
+    return gr.Button("Merge", interactive=False), gr.Button("Cancel", interactive=True)
+def states_after_merging(cancel_state: list[bool]) -> tuple[gr.Button, gr.Button]:
+    """
+    Sets UI state for buttons after the merging process, which enables the Merge button
+    and disables the Cancel button.
+    Args:
+        cancel_state (list[bool]): List tracking cancellation state.
+    Returns:
+        tuple[gradio.Button, gradio.Button]: Updated Merge and Cancel buttons.
+    """
+    cancel_state[0] = False
+    return gr.Button("Merge", interactive=True), gr.Button("Cancel", interactive=False)
+def validate_excluded_subtitle_file(selected: list[str]) -> None:
+    """
+    Validates the selected options for excluded subtitle files.
+    If all options are selected, it shows a warning message.
+    Args:
+        selected (list[str]): List of selected options.
+    """
+    if len(selected) == 3:
+        gr.Warning(
+            (
+                "Nothing to merge — Please adjust "
+                "<strong><em>Excluded Subtitle Files</em></strong> options "
+                "in <strong><em>File Exports</em></strong>"
+            ),
+            duration=7
+        )

ui/layout.py ADDED Viewed

	@@ -0,0 +1,355 @@

+"""
+Defines the main Gradio UI layout and configuration for DuoSubs subtitle merging
+Hugging Face web app.
+This module sets up the UI components, event handlers, and manages the model loading
+and merging process. It includes device selection, model configuration, and subtitle
+file handling.
+"""
+import gradio as gr
+from .constants import (
+    DEFAULT_PRECISION,
+    DEFAULT_SUB_EXT,
+    MODEL_NAME_LIST,
+    PRECISION_LIST,
+    SUB_EXT_LIST,
+    SUB_EXT_LIST_WITH_DOT,
+    TITLE_HTML,
+)
+from .events import (
+    cancel_merging,
+    start_merging,
+    states_after_merging,
+    states_during_merging,
+    validate_excluded_subtitle_file,
+)
+from .utils import create_model_pools, open_html
+model_pool = create_model_pools(MODEL_NAME_LIST)
+def create_main_gr_blocks_ui(
+        cache_delete_frequency: int = 3600,
+        cache_delete_age: int = 7200
+    ) -> gr.Blocks:
+    """
+    Builds and returns the main Gradio Blocks UI for DuoSubs.
+    Args:
+        cache_delete_frequency (int): How often to delete cache (seconds).
+        cache_delete_age (int): Age threshold for cache deletion (seconds).
+    Returns:
+        gradio.Blocks: The constructed Gradio UI.
+    """
+    main_block = gr.Blocks(
+        title="DuoSubs",
+        theme=gr.themes.Ocean(),
+        delete_cache=(cache_delete_frequency, cache_delete_age)
+    )
+    ui: gr.Blocks
+    with main_block as ui:
+        global model_pool
+        cancel_state = gr.State([False])
+        title_content = open_html(TITLE_HTML)
+        gr.HTML(title_content)
+        with gr.Row():
+            with gr.Column():
+                (
+                    primary_file,
+                    secondary_file,
+                    merged_file,
+                    merge_button,
+                    cancel_button
+                ) = _create_subtitles_io_block()
+            with gr.Column():
+                gr.Markdown("### 🔧 Configurations")
+                with gr.Tab("Model & Device"):
+                    (
+                        model_name,
+                        batch_size,
+                        model_precision
+                    ) =  _create_model_configurations_block()
+                with gr.Tab("Alignment Behavior"):
+                    ignore_non_overlap = _create_alignment_behaviour_block()
+                with gr.Tab("Output Styling"):
+                    (
+                        retain_newline,
+                        secondary_above_primary
+                    ) = _create_output_styling_block()
+                with gr.Tab("File Exports"):
+                    (
+                        omit_subtitles,
+                        combined_format,
+                        primary_format,
+                        secondary_format
+                    ) = _create_file_exports_block()
+        omit_subtitles.change(
+            fn=validate_excluded_subtitle_file,
+            inputs=omit_subtitles
+        )
+        merge_button.click(
+            fn=states_during_merging,
+            inputs=cancel_state,
+            outputs=[merge_button, cancel_button]
+        ).then(
+            fn=wrapped_start_merging,
+            inputs=[
+                primary_file,
+                secondary_file,
+                model_name,
+                batch_size,
+                model_precision,
+                ignore_non_overlap,
+                retain_newline,
+                secondary_above_primary,
+                omit_subtitles,
+                combined_format,
+                primary_format,
+                secondary_format,
+                cancel_state
+            ],
+            outputs=merged_file
+        ).then(
+            fn=states_after_merging,
+            inputs=cancel_state,
+            outputs=[merge_button, cancel_button]
+        )
+        cancel_button.click(
+            fn=cancel_merging,
+            inputs=cancel_state,
+            outputs=cancel_button,
+            concurrency_limit=None
+        )
+    return ui
+def _create_subtitles_io_block(
+    ) -> tuple[gr.File, gr.File, gr.File, gr.Button, gr.Button]:
+    """
+    Creates subtitle file input/output UI components.
+    This function sets up the UI for uploading primary and secondary subtitle files,
+    buttons to initiate and cancel the merging process, and creates the merged output
+    file.
+    Returns:
+        tuple[gradio.File, gradio.File, gradio.File, gradio.Button, gradio.Button]:
+        - primary_file
+        - secondary_file
+        - merged_file
+        - merge_button
+        - cancel_button
+    """
+    gr.Markdown("### 📄 Input Subtitles")
+    with gr.Row():
+        primary_file = gr.File(
+            label="Primary Subtitle File",
+            file_types=SUB_EXT_LIST_WITH_DOT
+        )
+        secondary_file = gr.File(
+            label="Secondary Subtitle File",
+            file_types=SUB_EXT_LIST_WITH_DOT
+        )
+    gr.Markdown("### 📦 Output Zip")
+    merged_file = gr.File(label="Processed Subtitles (in zip)")
+    with gr.Row():
+        merge_button = gr.Button("Merge")
+        cancel_button = gr.Button("Cancel", interactive=False)
+    return primary_file, secondary_file, merged_file, merge_button, cancel_button
+def _create_model_configurations_block() -> tuple[gr.Dropdown, gr.Slider, gr.Dropdown]:
+    """
+    Creates model and device configuration UI components.
+    This function sets up the UI for selecting the model name, batch size, and model
+    precision.
+    Returns:
+        tuple[gradio.Dropdown, gradio.Slider, gradio.Dropdown]:
+        - model_name
+        - batch_size
+        - model_precision
+    """
+    with gr.Column():
+        model_name = gr.Dropdown(
+            choices=MODEL_NAME_LIST,
+            label="Sentence Transformer Model",
+            value=MODEL_NAME_LIST[0],
+            info="Model for computing subtitle sentence similarity."
+        )
+    with gr.Row():
+        batch_size = gr.Slider(
+            label="Batch Size",
+            minimum=8,
+            maximum=256,
+            value=256,
+            step=1,
+            info="Number of sentences to process in parallel"
+        )
+        model_precision = gr.Dropdown(
+            choices=PRECISION_LIST,
+            label="Model Precision",
+            value=DEFAULT_PRECISION,
+            info="Precision mode for inference"
+        )
+    return model_name, batch_size, model_precision
+def _create_alignment_behaviour_block() -> gr.Checkbox:
+    """
+    Creates alignment behavior UI components.
+    This function sets up a checkbox for ignoring non-overlapping subtitles, in the
+    merging process.
+    Returns:
+        gradio.Checkbox: Checkbox for alignment behavior.
+    """
+    with gr.Column():
+        ignore_non_overlap = gr.Checkbox(
+            label="Ignore Non-Overlap Filter",
+            value=False,
+            info=(
+                "💡 Use only if both subtitles are **semantically identical** "
+                "and contain **no added scenes or annotations**"
+            )
+        )
+    return ignore_non_overlap
+def _create_output_styling_block() -> tuple[gr.Checkbox, gr.Checkbox]:
+    """
+    Creates output styling UI components.
+    This function sets up checkboxes for retaining newlines in the original subtitles
+    and placing secondary subtitles above primary subtitles in the merged output.
+    Returns:
+        tuple[gradio.Checkbox, gradio.Checkbox]:
+        - retain_newline
+        - secondary_above_primary
+    """
+    with gr.Column():
+        retain_newline = gr.Checkbox(
+            label="Retain Newlines",
+            value=False,
+            info="**Retain line breaks** from the original subtitles"
+        )
+        secondary_above_primary = gr.Checkbox(
+            label="Secondary subtitle above primary subtitle",
+            value=False,
+            info="Place **secondary** subtitle **above** the **primary**"
+        )
+    return retain_newline, secondary_above_primary
+def _create_file_exports_block(
+    ) -> tuple[gr.CheckboxGroup, gr.Dropdown, gr.Dropdown, gr.Dropdown]:
+    """
+    Creates file export UI components.
+    This function sets up checkboxes for excluding certain subtitle files from the ZIP
+    output, and dropdowns for selecting the format of combined, primary, and secondary
+    subtitles.
+    Returns:
+        tuple[gradio.CheckboxGroup, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown]:
+        - omit_subtitles
+        - combined_format
+        - primary_format
+        - secondary_format
+    """
+    with gr.Column():
+        omit_subtitles = gr.CheckboxGroup(
+            ["Combined", "Primary", "Secondary"],
+            type="value",
+            label="Excluded Subtitle Files from ZIP"
+        )
+    with gr.Column():
+        gr.Markdown("Subtitle Format")
+        combined_format = gr.Dropdown(
+            choices=SUB_EXT_LIST,
+            value=DEFAULT_SUB_EXT,
+            label="Combined"
+        )
+        primary_format = gr.Dropdown(
+            choices=SUB_EXT_LIST,
+            value=DEFAULT_SUB_EXT,
+            label="Primary"
+        )
+        secondary_format = gr.Dropdown(
+            choices=SUB_EXT_LIST,
+            value=DEFAULT_SUB_EXT,
+            label="Secondary"
+        )
+    return omit_subtitles, combined_format, primary_format, secondary_format
+def wrapped_start_merging(
+        primary_subtitles: str,
+        secondary_subtitles: str,
+        model_name: str,
+        batch_size: int,
+        model_precision: str,
+        ignore_non_overlap: bool,
+        retain_newline: bool,
+        secondary_above_primary: bool,
+        omit_subtitles: list[str],
+        combined_format: str,
+        primary_format:str,
+        secondary_format: str,
+        cancel_state: list[bool],
+        progress: gr.Progress | None = None
+) -> str | None:
+    """
+    Wrapper for starting the merging process with all required arguments.
+    Args:
+        primary_subtitles (str): Path to primary subtitle file.
+        secondary_subtitles (str): Path to secondary subtitle file.
+        model_name (str): Name of the model to use.
+        batch_size (int): Batch size for inference.
+        model_precision (str): Precision mode for inference.
+        ignore_non_overlap (bool): Whether to ignore non-overlapping subtitles.
+        retain_newline (bool): Whether to retain newlines in output.
+        secondary_above_primary (bool): Whether to place secondary subtitle above
+            primary.
+        omit_subtitles (list[str]): List of subtitle types to omit from output.
+        combined_format (str): Format for combined subtitles.
+        primary_format (str): Format for primary subtitles.
+        secondary_format (str): Format for secondary subtitles.
+        gpu_list (list[str]): List of available GPU names.
+        loaded_model_device (list[str]): List tracking loaded model device.
+        loaded_model_name (list[str]): List tracking loaded model name.
+        cancel_state (list[bool]): List tracking cancellation state.
+        request (gradio.Request): Gradio request object.
+        progress (gradio.Progress) : Gradio progress object (optional).
+            Defaults to None.
+    Returns:
+        str | None: Path to the output ZIP file, or None if cancelled.
+    """
+    return start_merging(
+        model_pool,
+        primary_subtitles,
+        secondary_subtitles,
+        model_name,
+        batch_size,
+        model_precision,
+        ignore_non_overlap,
+        retain_newline,
+        secondary_above_primary,
+        omit_subtitles,
+        combined_format,
+        primary_format,
+        secondary_format,
+        cancel_state,
+        progress
+    )

ui/utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""
+Utility functions for device detection, GPU listing, and HTML file reading for the web
+UI.
+These functions help in determining available devices for model inference, listing GPU
+names, and reading HTML content for the UI.
+"""
+from pathlib import Path
+from sentence_transformers import SentenceTransformer
+def open_html(file: str | Path) -> str:
+    """
+    Reads and returns the content of an HTML file.
+    Args:
+        file (str | Path): Path to the HTML file (str or Path).
+    Returns:
+        str: The content of the HTML file as a string.
+    """
+    content = ""
+    with open(file, "r", encoding="utf-8") as f:
+        content = f.read()
+    return content
+def create_model_pools(name_list: list[str]) -> dict[str, SentenceTransformer]:
+    """
+    Creates a pool of SentenceTransformer models based on the provided model names.
+    Args:
+        name_list (list[str]): List of model names to create SentenceTransformer
+            instances.
+    Returns:
+        dict[str, SentenceTransformer]: A dictionary mapping model names to their
+            corresponding SentenceTransformer instances.
+    """
+    model_pool: dict[str, SentenceTransformer] = {}
+    for name in name_list:
+        model_pool[name] = SentenceTransformer(name, device="cuda")
+    return model_pool