yuhangzang commited on
Commit
fd68401
Β·
1 Parent(s): babd02b
app.py CHANGED
@@ -144,7 +144,30 @@ def generate(image, prompt, max_new_tokens, temperature, top_p, top_k):
144
 
145
  def build_ui():
146
  with gr.Blocks() as demo:
147
- gr.Markdown("# Spark-VL ZeroGPU Demo\nUpload an image or choose from the example gallery (image + prompt), then enter a prompt.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  # Build an image+prompt gallery from ./examples
150
  # Each example is an image file with an optional sidecar .txt containing the prompt.
@@ -178,42 +201,6 @@ def build_ui():
178
  with gr.Row():
179
  with gr.Column(scale=1):
180
  image = gr.Image(type="pil", label="Image", value=default_image)
181
- # Prepare gallery items as (image, caption) so users can see
182
- # that a prompt is associated with each example.
183
- def _gallery_items():
184
- items = []
185
- for img_path, prompt_text in example_pairs:
186
- caption = (prompt_text or "").strip()
187
- # Keep captions compact to avoid tall tiles
188
- if len(caption) > 120:
189
- caption = caption[:117] + "..."
190
- items.append((img_path, caption))
191
- return items
192
-
193
- gallery = gr.Gallery(
194
- value=_gallery_items(),
195
- label="Examples (Image + Prompt)",
196
- show_label=True,
197
- columns=4,
198
- height=260,
199
- allow_preview=True,
200
- )
201
-
202
- # When a thumbnail is clicked, load it into the image input
203
- def _on_gallery_select(evt: gr.SelectData, cur_prompt: str = ""):
204
- # Load both the example image and its paired prompt
205
- idx = evt.index
206
- if 0 <= idx < len(example_pairs):
207
- img_path, prompt_text = example_pairs[idx]
208
- try:
209
- img_val = Image.open(img_path)
210
- except Exception:
211
- img_val = None
212
- # If no prompt sidecar, preserve the user's current prompt
213
- return img_val, (prompt_text if prompt_text is not None else cur_prompt)
214
- return None, cur_prompt
215
-
216
- # Defer wiring the select handler until after the prompt component is created
217
 
218
  with gr.Column(scale=1):
219
  prompt = gr.Textbox(
@@ -231,13 +218,120 @@ def build_ui():
231
  top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
232
  run = gr.Button("Generate")
233
 
234
- # Now that both components exist, wire the gallery->(image,prompt) binding
235
- try:
236
- gallery.select(fn=_on_gallery_select, inputs=[prompt], outputs=[image, prompt])
237
- except Exception:
238
- # If the event cannot be bound (e.g., running in a limited environment),
239
- # just skip wiring without breaking the app.
240
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
  output = gr.Textbox(label="Model Output", lines=8)
243
 
 
144
 
145
  def build_ui():
146
  with gr.Blocks() as demo:
147
+ gr.Markdown(
148
+ """
149
+ # Spark: Synergistic Policy And Reward Co-Evolving Framework
150
+
151
+ <h3 align="center">
152
+ πŸ“–<a href="https://arxiv.org/abs/2509.22624">Paper</a>
153
+ | πŸ€—<a href="https://huggingface.co/internlm/Spark-VL-7B">Models</a>
154
+ | πŸ€—<a href="https://huggingface.co/datasets/internlm/Spark-Data">Datasets</a>
155
+ | πŸ€—<a href="https://huggingface.co/papers/2509.22624">Daily Paper</a>
156
+ </h3>
157
+
158
+ **🌈 Introduction:** We propose SPARK, <strong>a unified framework that integrates policy and reward into a single model for joint and synchronous training</strong>. SPARK can automatically derive reward and reflection data from verifiable reward, enabling <strong>self-learning and self-evolution</strong>.
159
+
160
+ **πŸ€— Models:** We release the checkpoints at [internlm/Spark-VL-7B](https://huggingface.co/internlm/Spark-VL-7B).
161
+
162
+ **πŸ€— Datasets:** Training data is available at [internlm/Spark-Data](https://huggingface.co/datasets/internlm/Spark-Data).
163
+
164
+ **πŸ’» Training Code:** The training code and implementation details can be found at [InternLM/Spark](https://github.com/InternLM/Spark).
165
+
166
+ ---
167
+
168
+ πŸ“Έ **Upload an image and enter a prompt** or πŸ–ΌοΈ **choose the input from the example gallery** (image + prompt).
169
+ """
170
+ )
171
 
172
  # Build an image+prompt gallery from ./examples
173
  # Each example is an image file with an optional sidecar .txt containing the prompt.
 
201
  with gr.Row():
202
  with gr.Column(scale=1):
203
  image = gr.Image(type="pil", label="Image", value=default_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  with gr.Column(scale=1):
206
  prompt = gr.Textbox(
 
218
  top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
219
  run = gr.Button("Generate")
220
 
221
+ # Clear prompt when image is removed
222
+ image.clear(fn=lambda: "", outputs=prompt)
223
+
224
+ # Examples section: table-like layout with image and prompt columns
225
+ gr.Markdown("## Examples")
226
+
227
+ # Handler for clicking on example images
228
+ def _on_example_click(img_path, prompt_text):
229
+ try:
230
+ img_val = Image.open(img_path)
231
+ except Exception:
232
+ img_val = None
233
+ return img_val, prompt_text
234
+
235
+ # Categorize examples by type
236
+ math_examples = []
237
+ reward_examples = []
238
+ other_examples = []
239
+
240
+ for img_path, prompt_text in example_pairs:
241
+ basename = os.path.basename(img_path)
242
+ if basename.startswith("example_0"):
243
+ math_examples.append((img_path, prompt_text))
244
+ elif basename.startswith("example_1"):
245
+ reward_examples.append((img_path, prompt_text))
246
+ else:
247
+ other_examples.append((img_path, prompt_text))
248
+
249
+ # Display math reasoning examples
250
+ if math_examples:
251
+ gr.Markdown("### πŸ“ Math Reasoning Examples")
252
+ for idx, (img_path, prompt_text) in enumerate(math_examples):
253
+ with gr.Row():
254
+ with gr.Column(scale=1):
255
+ ex_img = gr.Image(
256
+ value=img_path,
257
+ type="filepath",
258
+ label=f"Math Example {idx}",
259
+ interactive=False,
260
+ show_label=True,
261
+ height=200,
262
+ )
263
+ # Wire click event to load the example
264
+ ex_img.select(
265
+ fn=lambda ip=img_path, pt=prompt_text: _on_example_click(ip, pt),
266
+ outputs=[image, prompt],
267
+ )
268
+ with gr.Column(scale=3):
269
+ ex_text = gr.Textbox(
270
+ value=prompt_text or "",
271
+ label="Prompt",
272
+ lines=8,
273
+ max_lines=8,
274
+ interactive=False,
275
+ show_label=True,
276
+ )
277
+
278
+ # Display reward model examples
279
+ if reward_examples:
280
+ gr.Markdown("### 🎯 Reward Model Examples")
281
+ for idx, (img_path, prompt_text) in enumerate(reward_examples):
282
+ with gr.Row():
283
+ with gr.Column(scale=1):
284
+ ex_img = gr.Image(
285
+ value=img_path,
286
+ type="filepath",
287
+ label=f"Reward Example {idx}",
288
+ interactive=False,
289
+ show_label=True,
290
+ height=200,
291
+ )
292
+ # Wire click event to load the example
293
+ ex_img.select(
294
+ fn=lambda ip=img_path, pt=prompt_text: _on_example_click(ip, pt),
295
+ outputs=[image, prompt],
296
+ )
297
+ with gr.Column(scale=3):
298
+ ex_text = gr.Textbox(
299
+ value=prompt_text or "",
300
+ label="Prompt",
301
+ lines=8,
302
+ max_lines=8,
303
+ interactive=False,
304
+ show_label=True,
305
+ )
306
+
307
+ # Display other examples if any
308
+ if other_examples:
309
+ gr.Markdown("### πŸ“‹ Other Examples")
310
+ for idx, (img_path, prompt_text) in enumerate(other_examples):
311
+ with gr.Row():
312
+ with gr.Column(scale=1):
313
+ ex_img = gr.Image(
314
+ value=img_path,
315
+ type="filepath",
316
+ label=f"Example {idx}",
317
+ interactive=False,
318
+ show_label=True,
319
+ height=200,
320
+ )
321
+ # Wire click event to load the example
322
+ ex_img.select(
323
+ fn=lambda ip=img_path, pt=prompt_text: _on_example_click(ip, pt),
324
+ outputs=[image, prompt],
325
+ )
326
+ with gr.Column(scale=3):
327
+ ex_text = gr.Textbox(
328
+ value=prompt_text or "",
329
+ label="Prompt",
330
+ lines=8,
331
+ max_lines=8,
332
+ interactive=False,
333
+ show_label=True,
334
+ )
335
 
336
  output = gr.Textbox(label="Model Output", lines=8)
337
 
examples/example_0.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ As seen in the diagram, three darts are thrown at nine fixed balloons. If a balloon is hit it will burst and the dart continues in the same direction it had beforehand. How many balloons will not be hit by a dart?
examples/example_1.png ADDED

Git LFS Details

  • SHA256: 8c9d09de84a0680c43759c4359d45697937271879c92b65f9cfd83d98f54723d
  • Pointer size: 131 Bytes
  • Size of remote file: 587 kB
examples/example_1.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a highly capable multimodal AI assistant tasked with evaluating answers to visual questions. Please analyze the following image and question, then determine which of the two provided answers is better.
2
+
3
+ Question:
4
+ Create a detailed description describing the elements of the given worldwide landmark image. Identify the name of the landmark and describe the objects, colors, perspectives, etc.
5
+ Answer 1:
6
+ "The image showcases a breathtaking view of a mountainous region with a deep, green lake nestled amidst the rocky terrain. The perspective is from a high vantage point, providing a panoramic view of the landscape. The landmark, identified as \"sa_1563\", is a testament to the natural beauty and diversity of our world. The combination of the rugged mountains, the clear blue sky, and the vibrant green lake creates a serene and awe-inspiring scene."
7
+
8
+ Answer 2:
9
+ "The image showcases a serene mountain lake, nestled amidst rocky terrains. The deep green color of the water contrasts with the surrounding landscape, creating a tranquil scene. This landmark is a natural beauty, located in a hilly or mountainous region. The perspective of the image gives a sense of being high above the lake, looking down upon it. However, without specific identifiable features, it's challenging to determine the exact landmark from the description."
10
+
11
+ Please evaluate both answers based on the following criteria:
12
+ 1. Accuracy: How well does the answer align with the visual information in the image?
13
+ 2. Completeness: Does the answer fully address all aspects of the question?
14
+ 3. Clarity: Is the answer easy to understand and well-articulated?
15
+ 4. Relevance: Does the answer directly relate to the question and the image?
16
+
17
+ After your evaluation, please:
18
+ 1. Explain your reasoning for each criterion.
19
+ 2. Provide an overall judgment on which answer is better (Answer 1 or Answer 2). For example: Overall Judgment: Answer X is better.
20
+
21
+ Your response should be structured and detailed, demonstrating your understanding of both the visual and textual elements of the task.