Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	
		yuhangzang
		
	commited on
		
		
					Commit 
							
							Β·
						
						fd68401
	
1
								Parent(s):
							
							babd02b
								
update
Browse files- app.py +138 -44
- examples/example_0.txt +1 -0
- examples/example_1.png +3 -0
- examples/example_1.txt +21 -0
    	
        app.py
    CHANGED
    
    | @@ -144,7 +144,30 @@ def generate(image, prompt, max_new_tokens, temperature, top_p, top_k): | |
| 144 |  | 
| 145 | 
             
            def build_ui():
         | 
| 146 | 
             
                with gr.Blocks() as demo:
         | 
| 147 | 
            -
                    gr.Markdown( | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 148 |  | 
| 149 | 
             
                    # Build an image+prompt gallery from ./examples
         | 
| 150 | 
             
                    # Each example is an image file with an optional sidecar .txt containing the prompt.
         | 
| @@ -178,42 +201,6 @@ def build_ui(): | |
| 178 | 
             
                    with gr.Row():
         | 
| 179 | 
             
                        with gr.Column(scale=1):
         | 
| 180 | 
             
                            image = gr.Image(type="pil", label="Image", value=default_image)
         | 
| 181 | 
            -
                            # Prepare gallery items as (image, caption) so users can see
         | 
| 182 | 
            -
                            # that a prompt is associated with each example.
         | 
| 183 | 
            -
                            def _gallery_items():
         | 
| 184 | 
            -
                                items = []
         | 
| 185 | 
            -
                                for img_path, prompt_text in example_pairs:
         | 
| 186 | 
            -
                                    caption = (prompt_text or "").strip()
         | 
| 187 | 
            -
                                    # Keep captions compact to avoid tall tiles
         | 
| 188 | 
            -
                                    if len(caption) > 120:
         | 
| 189 | 
            -
                                        caption = caption[:117] + "..."
         | 
| 190 | 
            -
                                    items.append((img_path, caption))
         | 
| 191 | 
            -
                                return items
         | 
| 192 | 
            -
             | 
| 193 | 
            -
                            gallery = gr.Gallery(
         | 
| 194 | 
            -
                                value=_gallery_items(),
         | 
| 195 | 
            -
                                label="Examples (Image + Prompt)",
         | 
| 196 | 
            -
                                show_label=True,
         | 
| 197 | 
            -
                                columns=4,
         | 
| 198 | 
            -
                                height=260,
         | 
| 199 | 
            -
                                allow_preview=True,
         | 
| 200 | 
            -
                            )
         | 
| 201 | 
            -
             | 
| 202 | 
            -
                            # When a thumbnail is clicked, load it into the image input
         | 
| 203 | 
            -
                            def _on_gallery_select(evt: gr.SelectData, cur_prompt: str = ""):
         | 
| 204 | 
            -
                                # Load both the example image and its paired prompt
         | 
| 205 | 
            -
                                idx = evt.index
         | 
| 206 | 
            -
                                if 0 <= idx < len(example_pairs):
         | 
| 207 | 
            -
                                    img_path, prompt_text = example_pairs[idx]
         | 
| 208 | 
            -
                                    try:
         | 
| 209 | 
            -
                                        img_val = Image.open(img_path)
         | 
| 210 | 
            -
                                    except Exception:
         | 
| 211 | 
            -
                                        img_val = None
         | 
| 212 | 
            -
                                    # If no prompt sidecar, preserve the user's current prompt
         | 
| 213 | 
            -
                                    return img_val, (prompt_text if prompt_text is not None else cur_prompt)
         | 
| 214 | 
            -
                                return None, cur_prompt
         | 
| 215 | 
            -
             | 
| 216 | 
            -
                            # Defer wiring the select handler until after the prompt component is created
         | 
| 217 |  | 
| 218 | 
             
                        with gr.Column(scale=1):
         | 
| 219 | 
             
                            prompt = gr.Textbox(
         | 
| @@ -231,13 +218,120 @@ def build_ui(): | |
| 231 | 
             
                            top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
         | 
| 232 | 
             
                            run = gr.Button("Generate")
         | 
| 233 |  | 
| 234 | 
            -
                    #  | 
| 235 | 
            -
                     | 
| 236 | 
            -
             | 
| 237 | 
            -
                     | 
| 238 | 
            -
             | 
| 239 | 
            -
             | 
| 240 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 241 |  | 
| 242 | 
             
                    output = gr.Textbox(label="Model Output", lines=8)
         | 
| 243 |  | 
|  | |
| 144 |  | 
| 145 | 
             
            def build_ui():
         | 
| 146 | 
             
                with gr.Blocks() as demo:
         | 
| 147 | 
            +
                    gr.Markdown(
         | 
| 148 | 
            +
                        """
         | 
| 149 | 
            +
                        # Spark: Synergistic Policy And Reward Co-Evolving Framework
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                        <h3 align="center">
         | 
| 152 | 
            +
                          π<a href="https://arxiv.org/abs/2509.22624">Paper</a> 
         | 
| 153 | 
            +
                        | π€<a href="https://huggingface.co/internlm/Spark-VL-7B">Models</a> 
         | 
| 154 | 
            +
                        | π€<a href="https://huggingface.co/datasets/internlm/Spark-Data">Datasets</a>
         | 
| 155 | 
            +
                        | π€<a href="https://huggingface.co/papers/2509.22624">Daily Paper</a>
         | 
| 156 | 
            +
                        </h3>
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                        **π Introduction:** We propose SPARK, <strong>a unified framework that integrates policy and reward into a single model for joint and synchronous training</strong>. SPARK can automatically derive reward and reflection data from verifiable reward, enabling <strong>self-learning and self-evolution</strong>.
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                        **π€ Models:** We release the checkpoints at [internlm/Spark-VL-7B](https://huggingface.co/internlm/Spark-VL-7B).
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                        **π€ Datasets:** Training data is available at [internlm/Spark-Data](https://huggingface.co/datasets/internlm/Spark-Data).
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                        **π» Training Code:** The training code and implementation details can be found at [InternLM/Spark](https://github.com/InternLM/Spark).
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                        ---
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                        πΈ **Upload an image and enter a prompt** or πΌοΈ **choose the input from the example gallery** (image + prompt).
         | 
| 169 | 
            +
                        """
         | 
| 170 | 
            +
                    )
         | 
| 171 |  | 
| 172 | 
             
                    # Build an image+prompt gallery from ./examples
         | 
| 173 | 
             
                    # Each example is an image file with an optional sidecar .txt containing the prompt.
         | 
|  | |
| 201 | 
             
                    with gr.Row():
         | 
| 202 | 
             
                        with gr.Column(scale=1):
         | 
| 203 | 
             
                            image = gr.Image(type="pil", label="Image", value=default_image)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 204 |  | 
| 205 | 
             
                        with gr.Column(scale=1):
         | 
| 206 | 
             
                            prompt = gr.Textbox(
         | 
|  | |
| 218 | 
             
                            top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
         | 
| 219 | 
             
                            run = gr.Button("Generate")
         | 
| 220 |  | 
| 221 | 
            +
                    # Clear prompt when image is removed
         | 
| 222 | 
            +
                    image.clear(fn=lambda: "", outputs=prompt)
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                    # Examples section: table-like layout with image and prompt columns
         | 
| 225 | 
            +
                    gr.Markdown("## Examples")
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                    # Handler for clicking on example images
         | 
| 228 | 
            +
                    def _on_example_click(img_path, prompt_text):
         | 
| 229 | 
            +
                        try:
         | 
| 230 | 
            +
                            img_val = Image.open(img_path)
         | 
| 231 | 
            +
                        except Exception:
         | 
| 232 | 
            +
                            img_val = None
         | 
| 233 | 
            +
                        return img_val, prompt_text
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                    # Categorize examples by type
         | 
| 236 | 
            +
                    math_examples = []
         | 
| 237 | 
            +
                    reward_examples = []
         | 
| 238 | 
            +
                    other_examples = []
         | 
| 239 | 
            +
             | 
| 240 | 
            +
                    for img_path, prompt_text in example_pairs:
         | 
| 241 | 
            +
                        basename = os.path.basename(img_path)
         | 
| 242 | 
            +
                        if basename.startswith("example_0"):
         | 
| 243 | 
            +
                            math_examples.append((img_path, prompt_text))
         | 
| 244 | 
            +
                        elif basename.startswith("example_1"):
         | 
| 245 | 
            +
                            reward_examples.append((img_path, prompt_text))
         | 
| 246 | 
            +
                        else:
         | 
| 247 | 
            +
                            other_examples.append((img_path, prompt_text))
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                    # Display math reasoning examples
         | 
| 250 | 
            +
                    if math_examples:
         | 
| 251 | 
            +
                        gr.Markdown("### π Math Reasoning Examples")
         | 
| 252 | 
            +
                        for idx, (img_path, prompt_text) in enumerate(math_examples):
         | 
| 253 | 
            +
                            with gr.Row():
         | 
| 254 | 
            +
                                with gr.Column(scale=1):
         | 
| 255 | 
            +
                                    ex_img = gr.Image(
         | 
| 256 | 
            +
                                        value=img_path,
         | 
| 257 | 
            +
                                        type="filepath",
         | 
| 258 | 
            +
                                        label=f"Math Example {idx}",
         | 
| 259 | 
            +
                                        interactive=False,
         | 
| 260 | 
            +
                                        show_label=True,
         | 
| 261 | 
            +
                                        height=200,
         | 
| 262 | 
            +
                                    )
         | 
| 263 | 
            +
                                    # Wire click event to load the example
         | 
| 264 | 
            +
                                    ex_img.select(
         | 
| 265 | 
            +
                                        fn=lambda ip=img_path, pt=prompt_text: _on_example_click(ip, pt),
         | 
| 266 | 
            +
                                        outputs=[image, prompt],
         | 
| 267 | 
            +
                                    )
         | 
| 268 | 
            +
                                with gr.Column(scale=3):
         | 
| 269 | 
            +
                                    ex_text = gr.Textbox(
         | 
| 270 | 
            +
                                        value=prompt_text or "",
         | 
| 271 | 
            +
                                        label="Prompt",
         | 
| 272 | 
            +
                                        lines=8,
         | 
| 273 | 
            +
                                        max_lines=8,
         | 
| 274 | 
            +
                                        interactive=False,
         | 
| 275 | 
            +
                                        show_label=True,
         | 
| 276 | 
            +
                                    )
         | 
| 277 | 
            +
             | 
| 278 | 
            +
                    # Display reward model examples
         | 
| 279 | 
            +
                    if reward_examples:
         | 
| 280 | 
            +
                        gr.Markdown("### π― Reward Model Examples")
         | 
| 281 | 
            +
                        for idx, (img_path, prompt_text) in enumerate(reward_examples):
         | 
| 282 | 
            +
                            with gr.Row():
         | 
| 283 | 
            +
                                with gr.Column(scale=1):
         | 
| 284 | 
            +
                                    ex_img = gr.Image(
         | 
| 285 | 
            +
                                        value=img_path,
         | 
| 286 | 
            +
                                        type="filepath",
         | 
| 287 | 
            +
                                        label=f"Reward Example {idx}",
         | 
| 288 | 
            +
                                        interactive=False,
         | 
| 289 | 
            +
                                        show_label=True,
         | 
| 290 | 
            +
                                        height=200,
         | 
| 291 | 
            +
                                    )
         | 
| 292 | 
            +
                                    # Wire click event to load the example
         | 
| 293 | 
            +
                                    ex_img.select(
         | 
| 294 | 
            +
                                        fn=lambda ip=img_path, pt=prompt_text: _on_example_click(ip, pt),
         | 
| 295 | 
            +
                                        outputs=[image, prompt],
         | 
| 296 | 
            +
                                    )
         | 
| 297 | 
            +
                                with gr.Column(scale=3):
         | 
| 298 | 
            +
                                    ex_text = gr.Textbox(
         | 
| 299 | 
            +
                                        value=prompt_text or "",
         | 
| 300 | 
            +
                                        label="Prompt",
         | 
| 301 | 
            +
                                        lines=8,
         | 
| 302 | 
            +
                                        max_lines=8,
         | 
| 303 | 
            +
                                        interactive=False,
         | 
| 304 | 
            +
                                        show_label=True,
         | 
| 305 | 
            +
                                    )
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                    # Display other examples if any
         | 
| 308 | 
            +
                    if other_examples:
         | 
| 309 | 
            +
                        gr.Markdown("### π Other Examples")
         | 
| 310 | 
            +
                        for idx, (img_path, prompt_text) in enumerate(other_examples):
         | 
| 311 | 
            +
                            with gr.Row():
         | 
| 312 | 
            +
                                with gr.Column(scale=1):
         | 
| 313 | 
            +
                                    ex_img = gr.Image(
         | 
| 314 | 
            +
                                        value=img_path,
         | 
| 315 | 
            +
                                        type="filepath",
         | 
| 316 | 
            +
                                        label=f"Example {idx}",
         | 
| 317 | 
            +
                                        interactive=False,
         | 
| 318 | 
            +
                                        show_label=True,
         | 
| 319 | 
            +
                                        height=200,
         | 
| 320 | 
            +
                                    )
         | 
| 321 | 
            +
                                    # Wire click event to load the example
         | 
| 322 | 
            +
                                    ex_img.select(
         | 
| 323 | 
            +
                                        fn=lambda ip=img_path, pt=prompt_text: _on_example_click(ip, pt),
         | 
| 324 | 
            +
                                        outputs=[image, prompt],
         | 
| 325 | 
            +
                                    )
         | 
| 326 | 
            +
                                with gr.Column(scale=3):
         | 
| 327 | 
            +
                                    ex_text = gr.Textbox(
         | 
| 328 | 
            +
                                        value=prompt_text or "",
         | 
| 329 | 
            +
                                        label="Prompt",
         | 
| 330 | 
            +
                                        lines=8,
         | 
| 331 | 
            +
                                        max_lines=8,
         | 
| 332 | 
            +
                                        interactive=False,
         | 
| 333 | 
            +
                                        show_label=True,
         | 
| 334 | 
            +
                                    )
         | 
| 335 |  | 
| 336 | 
             
                    output = gr.Textbox(label="Model Output", lines=8)
         | 
| 337 |  | 
    	
        examples/example_0.txt
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            As seen in the diagram, three darts are thrown at nine fixed balloons. If a balloon is hit it will burst and the dart continues in the same direction it had beforehand. How many balloons will not be hit by a dart?
         | 
    	
        examples/example_1.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        examples/example_1.txt
    ADDED
    
    | @@ -0,0 +1,21 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            You are a highly capable multimodal AI assistant tasked with evaluating answers to visual questions. Please analyze the following image and question, then determine which of the two provided answers is better.
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Question:
         | 
| 4 | 
            +
            Create a detailed description describing the elements of the given worldwide landmark image. Identify the name of the landmark and describe the objects, colors, perspectives, etc.
         | 
| 5 | 
            +
            Answer 1: 
         | 
| 6 | 
            +
            "The image showcases a breathtaking view of a mountainous region with a deep, green lake nestled amidst the rocky terrain. The perspective is from a high vantage point, providing a panoramic view of the landscape. The landmark, identified as \"sa_1563\", is a testament to the natural beauty and diversity of our world. The combination of the rugged mountains, the clear blue sky, and the vibrant green lake creates a serene and awe-inspiring scene."
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            Answer 2:
         | 
| 9 | 
            +
            "The image showcases a serene mountain lake, nestled amidst rocky terrains. The deep green color of the water contrasts with the surrounding landscape, creating a tranquil scene. This landmark is a natural beauty, located in a hilly or mountainous region. The perspective of the image gives a sense of being high above the lake, looking down upon it. However, without specific identifiable features, it's challenging to determine the exact landmark from the description."
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            Please evaluate both answers based on the following criteria:
         | 
| 12 | 
            +
            1. Accuracy: How well does the answer align with the visual information in the image?
         | 
| 13 | 
            +
            2. Completeness: Does the answer fully address all aspects of the question?
         | 
| 14 | 
            +
            3. Clarity: Is the answer easy to understand and well-articulated?
         | 
| 15 | 
            +
            4. Relevance: Does the answer directly relate to the question and the image?
         | 
| 16 | 
            +
              
         | 
| 17 | 
            +
            After your evaluation, please:
         | 
| 18 | 
            +
            1. Explain your reasoning for each criterion.
         | 
| 19 | 
            +
            2. Provide an overall judgment on which answer is better (Answer 1 or Answer 2). For example: Overall Judgment: Answer X is better.
         | 
| 20 | 
            +
              
         | 
| 21 | 
            +
            Your response should be structured and detailed, demonstrating your understanding of both the visual and textual elements of the task.
         | 
