yeliudev commited on
Commit
214dc2c
ยท
verified ยท
1 Parent(s): 41e934b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -82
app.py CHANGED
@@ -27,7 +27,7 @@ MODEL = 'PolyU-ChenLab/UniPixel-3B'
27
  TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'
28
 
29
  HEADER = """
30
- <p align="center" style="margin: 1em 0 2em;"><img width="280" src="https://raw.githubusercontent.com/PolyU-ChenLab/UniPixel/refs/heads/main/.github/logo.png"></p>
31
  <h3 align="center">Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning</h3>
32
  <div style="display: flex; justify-content: center; gap: 5px;">
33
  <a href="https://arxiv.org/abs/2509.18094" target="_blank"><img src="https://img.shields.io/badge/arXiv-2509.18094-red"></a>
@@ -46,6 +46,11 @@ function init() {
46
  if (window.innerWidth >= 1536) {
47
  document.querySelector('main').style.maxWidth = '1536px'
48
  }
 
 
 
 
 
49
  }
50
  """
51
 
@@ -91,12 +96,12 @@ def update_region(blob):
91
 
92
  def update_video(video, prompt_idx):
93
  if video is None:
94
- return
95
 
96
  _, images = load_video(video, sample_frames=16)
97
- path = images[prompt_idx - 1]
98
 
99
- return path
100
 
101
 
102
  @spaces.GPU
@@ -283,75 +288,7 @@ def infer_reg(blob, query, prompt_idx=1, video=None):
283
 
284
 
285
  def build_demo():
286
- apple_theme = gr.themes.Base(
287
- primary_hue=gr.themes.colors.blue,
288
- secondary_hue=gr.themes.colors.gray,
289
- neutral_hue=gr.themes.colors.gray,
290
- spacing_size=gr.themes.sizes.spacing_md,
291
- radius_size=gr.themes.sizes.radius_md,
292
- text_size=gr.themes.sizes.text_md,
293
- font=["-apple-system", "BlinkMacSystemFont", "Segoe UI", "Helvetica Neue", "Arial", "sans-serif"],
294
- font_mono=["SF Mono", "Monaco", "Inconsolata", "Roboto Mono", "monospace"]).set(
295
- body_background_fill="white",
296
- body_background_fill_dark="#000000",
297
- block_background_fill="#ffffff",
298
- block_background_fill_dark="#1c1c1e",
299
- block_border_color="#d1d1d6",
300
- block_border_color_dark="#38383a",
301
- block_border_width="1px",
302
- block_label_background_fill="transparent",
303
- block_label_background_fill_dark="transparent",
304
- block_label_text_color="#1d1d1f",
305
- block_label_text_color_dark="#f5f5f7",
306
- block_label_text_weight="600",
307
- block_label_text_size="*text_sm",
308
- block_title_text_weight="600",
309
- block_title_text_color="#1d1d1f",
310
- block_title_text_color_dark="#f5f5f7",
311
- button_primary_background_fill="#007aff",
312
- button_primary_background_fill_hover="#0051d5",
313
- button_primary_background_fill_dark="#0a84ff",
314
- button_primary_background_fill_hover_dark="#409cff",
315
- button_primary_text_color="white",
316
- button_primary_border_color="transparent",
317
- button_secondary_background_fill="#f5f5f7",
318
- button_secondary_background_fill_hover="#e8e8ed",
319
- button_secondary_background_fill_dark="#2c2c2e",
320
- button_secondary_background_fill_hover_dark="#3a3a3c",
321
- button_secondary_text_color="#1d1d1f",
322
- button_secondary_text_color_dark="#f5f5f7",
323
- button_secondary_border_color="transparent",
324
- button_cancel_background_fill="#ff3b30",
325
- button_cancel_background_fill_hover="#ff453a",
326
- button_cancel_text_color="white",
327
- input_background_fill="#ffffff",
328
- input_background_fill_dark="#1c1c1e",
329
- input_border_color="#d1d1d6",
330
- input_border_color_dark="#38383a",
331
- input_border_color_focus="#007aff",
332
- input_border_color_focus_dark="#0a84ff",
333
- input_placeholder_color="#8e8e93",
334
- input_placeholder_color_dark="#98989d",
335
- slider_color="#007aff",
336
- slider_color_dark="#0a84ff",
337
- checkbox_background_color="#007aff",
338
- checkbox_background_color_dark="#0a84ff",
339
- checkbox_background_color_selected="#007aff",
340
- checkbox_background_color_selected_dark="#0a84ff",
341
- checkbox_border_color="#d1d1d6",
342
- checkbox_border_color_dark="#38383a",
343
- checkbox_border_color_selected="#007aff",
344
- checkbox_border_color_selected_dark="#0a84ff",
345
- panel_background_fill="#f5f5f7",
346
- panel_background_fill_dark="#1c1c1e",
347
- panel_border_color="#d1d1d6",
348
- panel_border_color_dark="#38383a",
349
- shadow_drop="0px 1px 3px 0px rgba(0,0,0,0.1)",
350
- shadow_drop_lg="0px 10px 30px 0px rgba(0,0,0,0.15)",
351
- loader_color="#007aff",
352
- loader_color_dark="#0a84ff")
353
-
354
- with gr.Blocks(title=TITLE, js=JS, theme=apple_theme) as demo:
355
  gr.HTML(HEADER)
356
 
357
  with gr.Tab('Image Segmentation'):
@@ -366,7 +303,7 @@ def build_demo():
366
 
367
  sample_frames_1 = gr.Slider(1, 32, value=16, step=1, visible=False)
368
 
369
- query_1 = gr.Textbox(label='Text Prompt', placeholder='Please segment the...')
370
 
371
  with gr.Row():
372
  random_btn_1 = gr.Button(value='๐Ÿ”ฎ Random', visible=False)
@@ -376,7 +313,8 @@ def build_demo():
376
 
377
  download_btn_1.render()
378
 
379
- submit_btn_1 = gr.Button(value='๐Ÿš€ Submit', variant='primary')
 
380
  with gr.Column():
381
  msk_1.render()
382
  ans_1.render()
@@ -405,7 +343,7 @@ def build_demo():
405
  label='Sample Frames',
406
  info='The number of frames to sample from a video (Default: 16)')
407
 
408
- query_2 = gr.Textbox(label='Text Prompt', placeholder='Please segment the...')
409
 
410
  with gr.Row():
411
  random_btn_2 = gr.Button(value='๐Ÿ”ฎ Random', visible=False)
@@ -415,7 +353,8 @@ def build_demo():
415
 
416
  download_btn_2.render()
417
 
418
- submit_btn_2 = gr.Button(value='๐Ÿš€ Submit', variant='primary')
 
419
  with gr.Column():
420
  msk_2.render()
421
  ans_2.render()
@@ -440,7 +379,8 @@ def build_demo():
440
 
441
  prompt_frame_index_3 = gr.Slider(1, 16, value=1, step=1, visible=False)
442
 
443
- query_3 = gr.Textbox(label='Text Prompt', placeholder='Please describe the highlighted region...')
 
444
 
445
  with gr.Row():
446
  random_btn_3 = gr.Button(value='๐Ÿ”ฎ Random', visible=False)
@@ -448,7 +388,8 @@ def build_demo():
448
  reset_btn_3 = gr.ClearButton([media_3, query_3, msk_3, ans_3], value='๐Ÿ—‘๏ธ Reset')
449
  reset_btn_3.click(reset_reg, None, [prompt_frame_index_3, download_btn_3])
450
 
451
- submit_btn_3 = gr.Button(value='๐Ÿš€ Submit', variant='primary')
 
452
  with gr.Column():
453
  msk_3.render()
454
  ans_3.render()
@@ -466,13 +407,14 @@ def build_demo():
466
  step=1,
467
  interactive=True,
468
  label='Prompt Frame Index',
469
- info='The index of the frame that includes mask prompts (Default: 1)',
470
  render=False)
471
  msk_4 = gr.ImageEditor(
472
  label='Mask Prompt',
473
  brush=gr.Brush(colors=['#ff000080'], color_mode='fixed'),
474
  transforms=None,
475
  layers=False,
 
476
  render=False)
477
  ans_4 = gr.HighlightedText(label='Model Response', show_inline_category=False, render=False)
478
 
@@ -485,7 +427,8 @@ def build_demo():
485
  prompt_frame_index_4.render()
486
  prompt_frame_index_4.change(update_video, [media_4, prompt_frame_index_4], msk_4)
487
 
488
- query_4 = gr.Textbox(label='Text Prompt', placeholder='Please describe the highlighted region...')
 
489
 
490
  with gr.Row():
491
  random_btn_4 = gr.Button(value='๐Ÿ”ฎ Random', visible=False)
@@ -493,7 +436,8 @@ def build_demo():
493
  reset_btn_4 = gr.ClearButton([media_4, query_4, msk_4, ans_4], value='๐Ÿ—‘๏ธ Reset')
494
  reset_btn_4.click(reset_reg, None, [prompt_frame_index_4, download_btn_4])
495
 
496
- submit_btn_4 = gr.Button(value='๐Ÿš€ Submit', variant='primary')
 
497
  with gr.Column():
498
  msk_4.render()
499
  ans_4.render()
 
27
  TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'
28
 
29
  HEADER = """
30
+ <p align="center" style="margin: 1em 0 2em;"><img width="260" src="https://raw.githubusercontent.com/PolyU-ChenLab/UniPixel/refs/heads/main/.github/logo.png"></p>
31
  <h3 align="center">Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning</h3>
32
  <div style="display: flex; justify-content: center; gap: 5px;">
33
  <a href="https://arxiv.org/abs/2509.18094" target="_blank"><img src="https://img.shields.io/badge/arXiv-2509.18094-red"></a>
 
46
  if (window.innerWidth >= 1536) {
47
  document.querySelector('main').style.maxWidth = '1536px'
48
  }
49
+
50
+ document.getElementById('query_1').addEventListener('keydown', function f1(e) { if (e.key === 'Enter') { document.getElementById('submit_1').click() } })
51
+ document.getElementById('query_2').addEventListener('keydown', function f2(e) { if (e.key === 'Enter') { document.getElementById('submit_2').click() } })
52
+ document.getElementById('query_3').addEventListener('keydown', function f3(e) { if (e.key === 'Enter') { document.getElementById('submit_3').click() } })
53
+ document.getElementById('query_4').addEventListener('keydown', function f4(e) { if (e.key === 'Enter') { document.getElementById('submit_4').click() } })
54
  }
55
  """
56
 
 
96
 
97
  def update_video(video, prompt_idx):
98
  if video is None:
99
+ return gr.ImageEditor(value=None, interactive=False)
100
 
101
  _, images = load_video(video, sample_frames=16)
102
+ component = gr.ImageEditor(value=images[prompt_idx - 1], interactive=True)
103
 
104
+ return component
105
 
106
 
107
  @spaces.GPU
 
288
 
289
 
290
  def build_demo():
291
+ with gr.Blocks(title=TITLE, js=JS, theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  gr.HTML(HEADER)
293
 
294
  with gr.Tab('Image Segmentation'):
 
303
 
304
  sample_frames_1 = gr.Slider(1, 32, value=16, step=1, visible=False)
305
 
306
+ query_1 = gr.Textbox(label='Text Prompt', placeholder='Please segment the...', elem_id='query_1')
307
 
308
  with gr.Row():
309
  random_btn_1 = gr.Button(value='๐Ÿ”ฎ Random', visible=False)
 
313
 
314
  download_btn_1.render()
315
 
316
+ submit_btn_1 = gr.Button(value='๐Ÿš€ Submit', variant='primary', elem_id='submit_1')
317
+
318
  with gr.Column():
319
  msk_1.render()
320
  ans_1.render()
 
343
  label='Sample Frames',
344
  info='The number of frames to sample from a video (Default: 16)')
345
 
346
+ query_2 = gr.Textbox(label='Text Prompt', placeholder='Please segment the...', elem_id='query_2')
347
 
348
  with gr.Row():
349
  random_btn_2 = gr.Button(value='๐Ÿ”ฎ Random', visible=False)
 
353
 
354
  download_btn_2.render()
355
 
356
+ submit_btn_2 = gr.Button(value='๐Ÿš€ Submit', variant='primary', elem_id='submit_2')
357
+
358
  with gr.Column():
359
  msk_2.render()
360
  ans_2.render()
 
379
 
380
  prompt_frame_index_3 = gr.Slider(1, 16, value=1, step=1, visible=False)
381
 
382
+ query_3 = gr.Textbox(
383
+ label='Text Prompt', placeholder='Please describe the highlighted region...', elem_id='query_3')
384
 
385
  with gr.Row():
386
  random_btn_3 = gr.Button(value='๐Ÿ”ฎ Random', visible=False)
 
388
  reset_btn_3 = gr.ClearButton([media_3, query_3, msk_3, ans_3], value='๐Ÿ—‘๏ธ Reset')
389
  reset_btn_3.click(reset_reg, None, [prompt_frame_index_3, download_btn_3])
390
 
391
+ submit_btn_3 = gr.Button(value='๐Ÿš€ Submit', variant='primary', elem_id='submit_3')
392
+
393
  with gr.Column():
394
  msk_3.render()
395
  ans_3.render()
 
407
  step=1,
408
  interactive=True,
409
  label='Prompt Frame Index',
410
+ info='The index of the frame to apply mask prompts (Default: 1)',
411
  render=False)
412
  msk_4 = gr.ImageEditor(
413
  label='Mask Prompt',
414
  brush=gr.Brush(colors=['#ff000080'], color_mode='fixed'),
415
  transforms=None,
416
  layers=False,
417
+ interactive=False,
418
  render=False)
419
  ans_4 = gr.HighlightedText(label='Model Response', show_inline_category=False, render=False)
420
 
 
427
  prompt_frame_index_4.render()
428
  prompt_frame_index_4.change(update_video, [media_4, prompt_frame_index_4], msk_4)
429
 
430
+ query_4 = gr.Textbox(
431
+ label='Text Prompt', placeholder='Please describe the highlighted region...', elem_id='query_4')
432
 
433
  with gr.Row():
434
  random_btn_4 = gr.Button(value='๐Ÿ”ฎ Random', visible=False)
 
436
  reset_btn_4 = gr.ClearButton([media_4, query_4, msk_4, ans_4], value='๐Ÿ—‘๏ธ Reset')
437
  reset_btn_4.click(reset_reg, None, [prompt_frame_index_4, download_btn_4])
438
 
439
+ submit_btn_4 = gr.Button(value='๐Ÿš€ Submit', variant='primary', elem_id='submit_4')
440
+
441
  with gr.Column():
442
  msk_4.render()
443
  ans_4.render()