File size: 7,044 Bytes
7b27e12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
VINCIE Service UI (Gradio)
- Automatic setup runs on app load (no manual setup button).
- Multi-turn editing and multi-concept composition front-end.
- Designed for NVIDIA L40S (SM 8.9) environments aligned with CUDA 12.x.
- Functional reference: ByteDance-Seed/VINCIE.
- Space and Docker developed by Carlex (contact below).
"""

import os
from pathlib import Path
from typing import List, Tuple, Optional

import gradio as gr

# Adapt this import to the project layout.
# Provide a VincieService with:
#  - ensure_repo(): clones/updates upstream repo if missing
#  - ensure_model(): downloads/validates checkpoints to /app/ckpt/VINCIE-3B
#  - multi_turn_edit(image_path: str, turns: List[str]) -> str (output dir)
#  - multi_concept_compose(files: List[str], descs: List[str], final_prompt: str) -> str (output dir)
from services.vincie import VincieService  # change path if needed

# Instantiate the service (defaults to /app/VINCIE and /app/ckpt/VINCIE-3B)
svc = VincieService()


def setup_auto() -> str:
    """
    Run an idempotent setup on interface load:
    - Ensure the upstream repository is present and ready.
    - Ensure the model checkpoint is downloaded and ready.
    Returns an English status string for the UI. 
    """
    try:
        svc.ensure_repo()
        svc.ensure_model()
        return (
            "Setup completed successfully: repository and checkpoint are ready "
            "for inference on an NVIDIA L40S environment."
        )
    except Exception as e:
        return f"Setup encountered an error: {e}"


def _list_media(out_dir: Path, max_images: int = 24) -> Tuple[List[str], Optional[str]]:
    """
    Enumerate resulting images and the most recent video from an output directory.

    Args:
        out_dir: Path to the directory where the service wrote its results.
        max_images: Upper bound on how many images to surface in the gallery.

    Returns:
        A tuple (images, video) where:
          - images is a list of file paths to images sorted by modified time,
          - video is the path to the latest .mp4 if found, otherwise None.
    """
    img_globs = ("*.png", "*.jpg", "*.jpeg", "*.webp")
    images: List[Path] = []
    for pat in img_globs:
        images += list(out_dir.rglob(pat))
    images = sorted(images, key=lambda p: p.stat().st_mtime)
    image_paths = [str(p) for p in images[-max_images:]] if images else []
    videos = sorted(out_dir.rglob("*.mp4"), key=lambda p: p.stat().st_mtime)
    video_path = str(videos[-1]) if videos else None
    return image_paths, video_path


def ui_multi_turn(input_image: Optional[str], turns_text: Optional[str]):
    """
    Multi-turn image editing entrypoint for the UI.

    Args:
        input_image: Path to a single input image on disk.
        turns_text: User-provided editing turns, one instruction per line.

    Returns:
        (gallery, video, status) for Gradio components.
    """
    if not input_image or not str(input_image).strip():
        return [], None, "Please provide an input image."
    if not turns_text or not turns_text.strip():
        return [], None, "Please provide edit turns (one per line)."

    turns = [ln.strip() for ln in turns_text.splitlines() if ln.strip()]
    try:
        out_dir = svc.multi_turn_edit(input_image, turns)
    except Exception as e:
        return [], None, f"Generation error: {e}"

    imgs, vid = _list_media(Path(out_dir))
    status = f"Outputs saved to: {out_dir}"
    return imgs, vid, status


def ui_multi_concept(files: Optional[List[str]], descs_text: Optional[str], final_prompt: Optional[str]):
    """
    Multi-concept composition entrypoint for the UI.

    Args:
        files: List of paths to concept images on disk.
        descs_text: Per-image descriptions (one line per image, in the same order).
        final_prompt: A final composition prompt that aggregates the concepts.

    Returns:
        (gallery, video, status) for Gradio components.
    """
    if not files:
        return [], None, "Please upload concept images."
    if not descs_text or not descs_text.strip():
        return [], None, "Please provide descriptions (one per line)."
    if not final_prompt or not final_prompt.strip():
        return [], None, "Please provide a final prompt."

    descs = [ln.strip() for ln in descs_text.splitlines() if ln.strip()]
    if len(descs) != len(files):
        return [], None, f"Descriptions count ({len(descs)}) must match images count ({len(files)})."

    try:
        out_dir = svc.multi_concept_compose(files, descs, final_prompt)
    except Exception as e:
        return [], None, f"Generation error: {e}"

    imgs, vid = _list_media(Path(out_dir))
    status = f"Outputs saved to: {out_dir}"
    return imgs, vid, status


with gr.Blocks(title="VINCIE Service") as demo:
    # Header and credits
    gr.Markdown(
        "\n".join(
            [
                "# VINCIE Service — Multi-turn Editing and Multi-concept Composition",
                "- Automatic setup runs at startup; setup status appears below.",
                "- Hardware requirement: NVIDIA L40S (SM 8.9) is recommended for this build.",
                "- Functional upstream model: ByteDance-Seed/VINCIE (see project repository).",
                "- Space and Docker were developed by Carlex.",
                "- Contact: Email: Carlex22@gmail.com | GitHub: carlex22",
            ]
        )
    )

    with gr.Row():
        setup_out = gr.Textbox(label="Setup Status", interactive=False)

    with gr.Tab("Multi-turn Editing"):
        with gr.Row():
            img = gr.Image(type="filepath", label="Input image")
            turns = gr.Textbox(lines=8, label="Turns (one per line)")
        run1 = gr.Button("Run")
        out_gallery = gr.Gallery(label="Images", columns=4, height="auto")
        out_video = gr.Video(label="Video (if available)")
        out_status = gr.Textbox(label="Output", interactive=False)
        run1.click(ui_multi_turn, inputs=[img, turns], outputs=[out_gallery, out_video, out_status])

    with gr.Tab("Multi-concept Composition"):
        files = gr.File(file_count="multiple", type="filepath", label="Concept images")
        descs = gr.Textbox(lines=8, label="Descriptions (one per line, same order as images)")
        final_prompt = gr.Textbox(lines=2, label="Final prompt")
        run2 = gr.Button("Run")
        out_gallery2 = gr.Gallery(label="Images", columns=4, height="auto")
        out_video2 = gr.Video(label="Video (if available)")
        out_status2 = gr.Textbox(label="Output", interactive=False)
        run2.click(
            ui_multi_concept,
            inputs=[files, descs, final_prompt],
            outputs=[out_gallery2, out_video2, out_status2],
        )

    # Auto-setup on load (no manual button)
    demo.load(fn=setup_auto, outputs=setup_out)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.getenv("PORT", "7860")),
        allowed_paths=["/app/outputs", "/app/ckpt"],
    )