Spaces:

yuhangzang
/

spark

Running on Zero

App Files Files Community

yuhangzang commited on 9 days ago

Commit

babd02b

1 Parent(s): d173683

Gallery: pair image+prompt examples; load prompt on selection; add bottom citation bib section

Browse files

Files changed (2) hide show

app.py +76 -18
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -27,7 +27,8 @@ def _load_model_and_processor():
     try:
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             MODEL_ID,
-            torch_dtype=DTYPE,
             attn_implementation=attn_impl,
             device_map="auto",
         )
@@ -36,7 +37,8 @@ def _load_model_and_processor():
         # Fallback for environments without flash-attn
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             MODEL_ID,
-            torch_dtype=DTYPE,
             attn_implementation="eager",
             device_map="auto",
         )
@@ -142,17 +144,32 @@ def generate(image, prompt, max_new_tokens, temperature, top_p, top_k):
 def build_ui():
     with gr.Blocks() as demo:
-        gr.Markdown("# Spark-VL ZeroGPU Demo\nUpload an image or choose from the example gallery, then enter a prompt.")
-        # Build an image gallery from ./examples
-        def _gather_examples() -> List[str]:
             imgs = []
             for ext in ("jpg", "jpeg", "png", "webp"):
                 imgs.extend(glob.glob(os.path.join("examples", f"*.{ext}")))
             # Deduplicate while keeping order
-            return list(dict.fromkeys(sorted(imgs)))
-        example_images = _gather_examples()
         # Load default image if exists
         default_path = os.path.join("examples", "example_0.png")
@@ -161,26 +178,42 @@ def build_ui():
         with gr.Row():
             with gr.Column(scale=1):
                 image = gr.Image(type="pil", label="Image", value=default_image)
                 gallery = gr.Gallery(
-                    value=example_images,
-                    label="Example Gallery",
                     show_label=True,
                     columns=4,
-                    height=240,
                     allow_preview=True,
                 )
                 # When a thumbnail is clicked, load it into the image input
-                def _on_gallery_select(evt: gr.SelectData):
                     idx = evt.index
-                    if 0 <= idx < len(example_images):
                         try:
-                            return Image.open(example_images[idx])
                         except Exception:
-                            return None
-                    return None
-                gallery.select(fn=_on_gallery_select, outputs=image)
             with gr.Column(scale=1):
                 prompt = gr.Textbox(
@@ -198,6 +231,14 @@ def build_ui():
                 top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
                 run = gr.Button("Generate")
         output = gr.Textbox(label="Model Output", lines=8)
         run.click(
@@ -207,6 +248,23 @@ def build_ui():
             show_progress=True,
         )
         demo.queue(max_size=10).launch()
     return demo

     try:
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             MODEL_ID,
+            # `torch_dtype` was deprecated in Transformers; use `dtype` instead.
+            dtype=DTYPE,
             attn_implementation=attn_impl,
             device_map="auto",
         )
         # Fallback for environments without flash-attn
         model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             MODEL_ID,
+            # Use the new `dtype` kwarg for consistency with deprecations
+            dtype=DTYPE,
             attn_implementation="eager",
             device_map="auto",
         )
 def build_ui():
     with gr.Blocks() as demo:
+        gr.Markdown("# Spark-VL ZeroGPU Demo\nUpload an image or choose from the example gallery (image + prompt), then enter a prompt.")
+        # Build an image+prompt gallery from ./examples
+        # Each example is an image file with an optional sidecar .txt containing the prompt.
+        # If a .txt is present (same basename), we will display a caption and load the
+        # prompt alongside the image when the thumbnail is selected.
+        def _gather_examples() -> List[tuple]:
+            pairs = []  # (image_path, prompt_text)
             imgs = []
             for ext in ("jpg", "jpeg", "png", "webp"):
                 imgs.extend(glob.glob(os.path.join("examples", f"*.{ext}")))
             # Deduplicate while keeping order
+            for img_path in list(dict.fromkeys(sorted(imgs))):
+                stem, _ = os.path.splitext(img_path)
+                prompt_path = stem + ".txt"
+                prompt_text = None
+                if os.path.exists(prompt_path):
+                    try:
+                        with open(prompt_path, "r", encoding="utf-8") as fh:
+                            prompt_text = fh.read().strip()
+                    except Exception:
+                        prompt_text = None
+                pairs.append((img_path, prompt_text))
+            return pairs
+        example_pairs = _gather_examples()
         # Load default image if exists
         default_path = os.path.join("examples", "example_0.png")
         with gr.Row():
             with gr.Column(scale=1):
                 image = gr.Image(type="pil", label="Image", value=default_image)
+                # Prepare gallery items as (image, caption) so users can see
+                # that a prompt is associated with each example.
+                def _gallery_items():
+                    items = []
+                    for img_path, prompt_text in example_pairs:
+                        caption = (prompt_text or "").strip()
+                        # Keep captions compact to avoid tall tiles
+                        if len(caption) > 120:
+                            caption = caption[:117] + "..."
+                        items.append((img_path, caption))
+                    return items
                 gallery = gr.Gallery(
+                    value=_gallery_items(),
+                    label="Examples (Image + Prompt)",
                     show_label=True,
                     columns=4,
+                    height=260,
                     allow_preview=True,
                 )
                 # When a thumbnail is clicked, load it into the image input
+                def _on_gallery_select(evt: gr.SelectData, cur_prompt: str = ""):
+                    # Load both the example image and its paired prompt
                     idx = evt.index
+                    if 0 <= idx < len(example_pairs):
+                        img_path, prompt_text = example_pairs[idx]
                         try:
+                            img_val = Image.open(img_path)
                         except Exception:
+                            img_val = None
+                        # If no prompt sidecar, preserve the user's current prompt
+                        return img_val, (prompt_text if prompt_text is not None else cur_prompt)
+                    return None, cur_prompt
+                # Defer wiring the select handler until after the prompt component is created
             with gr.Column(scale=1):
                 prompt = gr.Textbox(
                 top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
                 run = gr.Button("Generate")
+        # Now that both components exist, wire the gallery->(image,prompt) binding
+        try:
+            gallery.select(fn=_on_gallery_select, inputs=[prompt], outputs=[image, prompt])
+        except Exception:
+            # If the event cannot be bound (e.g., running in a limited environment),
+            # just skip wiring without breaking the app.
+            pass
         output = gr.Textbox(label="Model Output", lines=8)
         run.click(
             show_progress=True,
         )
+        # Citation section at the bottom
+        gr.Markdown(
+            """
+            ---
+            If you find this project useful, please kindly cite:
+            ```bibtex
+            @article{liu2025spark,
+              title={SPARK: Synergistic Policy And Reward Co-Evolving Framework},
+              author={Liu, Ziyu and Zang, Yuhang and Ding, Shengyuan and Cao, Yuhang and Dong, Xiaoyi and Duan, Haodong and Lin, Dahua and Wang, Jiaqi},
+              journal={arXiv preprint arXiv:2509.22624},
+              year={2025}
+            }
+            ```
+            """
+        )
         demo.queue(max_size=10).launch()
     return demo

requirements.txt CHANGED Viewed

@@ -5,3 +5,6 @@ gradio>=5.49.1
 spaces>=0.24.0
 pillow
 torchvision

 spaces>=0.24.0
 pillow
 torchvision
+\n+# Optional: FlashAttention v2 for faster attention on compatible Linux CUDA GPUs.
+# This installs only on 64-bit Linux. It will be skipped on macOS/Windows/ARM.
+flash-attn; platform_system == "Linux" and platform_machine == "x86_64"