Spaces:

Carl3x232
/

Vince

Paused

File size: 7,044 Bytes

7b27e12

#!/usr/bin/env python3
"""
VINCIE Service UI (Gradio)
- Automatic setup runs on app load (no manual setup button).
- Multi-turn editing and multi-concept composition front-end.
- Designed for NVIDIA L40S (SM 8.9) environments aligned with CUDA 12.x.
- Functional reference: ByteDance-Seed/VINCIE.
- Space and Docker developed by Carlex (contact below).
"""

import os
from pathlib import Path
from typing import List, Tuple, Optional

import gradio as gr

# Adapt this import to the project layout.
# Provide a VincieService with:
#  - ensure_repo(): clones/updates upstream repo if missing
#  - ensure_model(): downloads/validates checkpoints to /app/ckpt/VINCIE-3B
#  - multi_turn_edit(image_path: str, turns: List[str]) -> str (output dir)
#  - multi_concept_compose(files: List[str], descs: List[str], final_prompt: str) -> str (output dir)
from services.vincie import VincieService  # change path if needed

# Instantiate the service (defaults to /app/VINCIE and /app/ckpt/VINCIE-3B)
svc = VincieService()


def setup_auto() -> str:
    """
    Run an idempotent setup on interface load:
    - Ensure the upstream repository is present and ready.
    - Ensure the model checkpoint is downloaded and ready.
    Returns an English status string for the UI. 
    """
    try:
        svc.ensure_repo()
        svc.ensure_model()
        return (
            "Setup completed successfully: repository and checkpoint are ready "
            "for inference on an NVIDIA L40S environment."
        )
    except Exception as e:
        return f"Setup encountered an error: {e}"


def _list_media(out_dir: Path, max_images: int = 24) -> Tuple[List[str], Optional[str]]:
    """
    Enumerate resulting images and the most recent video from an output directory.

    Args:
        out_dir: Path to the directory where the service wrote its results.
        max_images: Upper bound on how many images to surface in the gallery.

    Returns:
        A tuple (images, video) where:
          - images is a list of file paths to images sorted by modified time,
          - video is the path to the latest .mp4 if found, otherwise None.
    """
    img_globs = ("*.png", "*.jpg", "*.jpeg", "*.webp")
    images: List[Path] = []
    for pat in img_globs:
        images += list(out_dir.rglob(pat))
    images = sorted(images, key=lambda p: p.stat().st_mtime)
    image_paths = [str(p) for p in images[-max_images:]] if images else []
    videos = sorted(out_dir.rglob("*.mp4"), key=lambda p: p.stat().st_mtime)
    video_path = str(videos[-1]) if videos else None
    return image_paths, video_path


def ui_multi_turn(input_image: Optional[str], turns_text: Optional[str]):
    """
    Multi-turn image editing entrypoint for the UI.

    Args:
        input_image: Path to a single input image on disk.
        turns_text: User-provided editing turns, one instruction per line.

    Returns:
        (gallery, video, status) for Gradio components.
    """
    if not input_image or not str(input_image).strip():
        return [], None, "Please provide an input image."
    if not turns_text or not turns_text.strip():
        return [], None, "Please provide edit turns (one per line)."

    turns = [ln.strip() for ln in turns_text.splitlines() if ln.strip()]
    try:
        out_dir = svc.multi_turn_edit(input_image, turns)
    except Exception as e:
        return [], None, f"Generation error: {e}"

    imgs, vid = _list_media(Path(out_dir))
    status = f"Outputs saved to: {out_dir}"
    return imgs, vid, status


def ui_multi_concept(files: Optional[List[str]], descs_text: Optional[str], final_prompt: Optional[str]):
    """
    Multi-concept composition entrypoint for the UI.

    Args:
        files: List of paths to concept images on disk.
        descs_text: Per-image descriptions (one line per image, in the same order).
        final_prompt: A final composition prompt that aggregates the concepts.

    Returns:
        (gallery, video, status) for Gradio components.
    """
    if not files:
        return [], None, "Please upload concept images."
    if not descs_text or not descs_text.strip():
        return [], None, "Please provide descriptions (one per line)."
    if not final_prompt or not final_prompt.strip():
        return [], None, "Please provide a final prompt."

    descs = [ln.strip() for ln in descs_text.splitlines() if ln.strip()]
    if len(descs) != len(files):
        return [], None, f"Descriptions count ({len(descs)}) must match images count ({len(files)})."

    try:
        out_dir = svc.multi_concept_compose(files, descs, final_prompt)
    except Exception as e:
        return [], None, f"Generation error: {e}"

    imgs, vid = _list_media(Path(out_dir))
    status = f"Outputs saved to: {out_dir}"
    return imgs, vid, status


with gr.Blocks(title="VINCIE Service") as demo:
    # Header and credits
    gr.Markdown(
        "\n".join(
            [
                "# VINCIE Service — Multi-turn Editing and Multi-concept Composition",
                "- Automatic setup runs at startup; setup status appears below.",
                "- Hardware requirement: NVIDIA L40S (SM 8.9) is recommended for this build.",
                "- Functional upstream model: ByteDance-Seed/VINCIE (see project repository).",
                "- Space and Docker were developed by Carlex.",
                "- Contact: Email: Carlex22@gmail.com | GitHub: carlex22",
            ]
        )
    )

    with gr.Row():
        setup_out = gr.Textbox(label="Setup Status", interactive=False)

    with gr.Tab("Multi-turn Editing"):
        with gr.Row():
            img = gr.Image(type="filepath", label="Input image")
            turns = gr.Textbox(lines=8, label="Turns (one per line)")
        run1 = gr.Button("Run")
        out_gallery = gr.Gallery(label="Images", columns=4, height="auto")
        out_video = gr.Video(label="Video (if available)")
        out_status = gr.Textbox(label="Output", interactive=False)
        run1.click(ui_multi_turn, inputs=[img, turns], outputs=[out_gallery, out_video, out_status])

    with gr.Tab("Multi-concept Composition"):
        files = gr.File(file_count="multiple", type="filepath", label="Concept images")
        descs = gr.Textbox(lines=8, label="Descriptions (one per line, same order as images)")
        final_prompt = gr.Textbox(lines=2, label="Final prompt")
        run2 = gr.Button("Run")
        out_gallery2 = gr.Gallery(label="Images", columns=4, height="auto")
        out_video2 = gr.Video(label="Video (if available)")
        out_status2 = gr.Textbox(label="Output", interactive=False)
        run2.click(
            ui_multi_concept,
            inputs=[files, descs, final_prompt],
            outputs=[out_gallery2, out_video2, out_status2],
        )

    # Auto-setup on load (no manual button)
    demo.load(fn=setup_auto, outputs=setup_out)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.getenv("PORT", "7860")),
        allowed_paths=["/app/outputs", "/app/ckpt"],
    )