Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,7 +27,7 @@ MODEL = 'PolyU-ChenLab/UniPixel-3B'
|
|
| 27 |
TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'
|
| 28 |
|
| 29 |
HEADER = """
|
| 30 |
-
<p align="center" style="margin: 1em 0 2em;"><img width="
|
| 31 |
<h3 align="center">Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning</h3>
|
| 32 |
<div style="display: flex; justify-content: center; gap: 5px;">
|
| 33 |
<a href="https://arxiv.org/abs/2509.18094" target="_blank"><img src="https://img.shields.io/badge/arXiv-2509.18094-red"></a>
|
|
@@ -46,6 +46,11 @@ function init() {
|
|
| 46 |
if (window.innerWidth >= 1536) {
|
| 47 |
document.querySelector('main').style.maxWidth = '1536px'
|
| 48 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
}
|
| 50 |
"""
|
| 51 |
|
|
@@ -91,12 +96,12 @@ def update_region(blob):
|
|
| 91 |
|
| 92 |
def update_video(video, prompt_idx):
|
| 93 |
if video is None:
|
| 94 |
-
return
|
| 95 |
|
| 96 |
_, images = load_video(video, sample_frames=16)
|
| 97 |
-
|
| 98 |
|
| 99 |
-
return
|
| 100 |
|
| 101 |
|
| 102 |
@spaces.GPU
|
|
@@ -283,75 +288,7 @@ def infer_reg(blob, query, prompt_idx=1, video=None):
|
|
| 283 |
|
| 284 |
|
| 285 |
def build_demo():
|
| 286 |
-
|
| 287 |
-
primary_hue=gr.themes.colors.blue,
|
| 288 |
-
secondary_hue=gr.themes.colors.gray,
|
| 289 |
-
neutral_hue=gr.themes.colors.gray,
|
| 290 |
-
spacing_size=gr.themes.sizes.spacing_md,
|
| 291 |
-
radius_size=gr.themes.sizes.radius_md,
|
| 292 |
-
text_size=gr.themes.sizes.text_md,
|
| 293 |
-
font=["-apple-system", "BlinkMacSystemFont", "Segoe UI", "Helvetica Neue", "Arial", "sans-serif"],
|
| 294 |
-
font_mono=["SF Mono", "Monaco", "Inconsolata", "Roboto Mono", "monospace"]).set(
|
| 295 |
-
body_background_fill="white",
|
| 296 |
-
body_background_fill_dark="#000000",
|
| 297 |
-
block_background_fill="#ffffff",
|
| 298 |
-
block_background_fill_dark="#1c1c1e",
|
| 299 |
-
block_border_color="#d1d1d6",
|
| 300 |
-
block_border_color_dark="#38383a",
|
| 301 |
-
block_border_width="1px",
|
| 302 |
-
block_label_background_fill="transparent",
|
| 303 |
-
block_label_background_fill_dark="transparent",
|
| 304 |
-
block_label_text_color="#1d1d1f",
|
| 305 |
-
block_label_text_color_dark="#f5f5f7",
|
| 306 |
-
block_label_text_weight="600",
|
| 307 |
-
block_label_text_size="*text_sm",
|
| 308 |
-
block_title_text_weight="600",
|
| 309 |
-
block_title_text_color="#1d1d1f",
|
| 310 |
-
block_title_text_color_dark="#f5f5f7",
|
| 311 |
-
button_primary_background_fill="#007aff",
|
| 312 |
-
button_primary_background_fill_hover="#0051d5",
|
| 313 |
-
button_primary_background_fill_dark="#0a84ff",
|
| 314 |
-
button_primary_background_fill_hover_dark="#409cff",
|
| 315 |
-
button_primary_text_color="white",
|
| 316 |
-
button_primary_border_color="transparent",
|
| 317 |
-
button_secondary_background_fill="#f5f5f7",
|
| 318 |
-
button_secondary_background_fill_hover="#e8e8ed",
|
| 319 |
-
button_secondary_background_fill_dark="#2c2c2e",
|
| 320 |
-
button_secondary_background_fill_hover_dark="#3a3a3c",
|
| 321 |
-
button_secondary_text_color="#1d1d1f",
|
| 322 |
-
button_secondary_text_color_dark="#f5f5f7",
|
| 323 |
-
button_secondary_border_color="transparent",
|
| 324 |
-
button_cancel_background_fill="#ff3b30",
|
| 325 |
-
button_cancel_background_fill_hover="#ff453a",
|
| 326 |
-
button_cancel_text_color="white",
|
| 327 |
-
input_background_fill="#ffffff",
|
| 328 |
-
input_background_fill_dark="#1c1c1e",
|
| 329 |
-
input_border_color="#d1d1d6",
|
| 330 |
-
input_border_color_dark="#38383a",
|
| 331 |
-
input_border_color_focus="#007aff",
|
| 332 |
-
input_border_color_focus_dark="#0a84ff",
|
| 333 |
-
input_placeholder_color="#8e8e93",
|
| 334 |
-
input_placeholder_color_dark="#98989d",
|
| 335 |
-
slider_color="#007aff",
|
| 336 |
-
slider_color_dark="#0a84ff",
|
| 337 |
-
checkbox_background_color="#007aff",
|
| 338 |
-
checkbox_background_color_dark="#0a84ff",
|
| 339 |
-
checkbox_background_color_selected="#007aff",
|
| 340 |
-
checkbox_background_color_selected_dark="#0a84ff",
|
| 341 |
-
checkbox_border_color="#d1d1d6",
|
| 342 |
-
checkbox_border_color_dark="#38383a",
|
| 343 |
-
checkbox_border_color_selected="#007aff",
|
| 344 |
-
checkbox_border_color_selected_dark="#0a84ff",
|
| 345 |
-
panel_background_fill="#f5f5f7",
|
| 346 |
-
panel_background_fill_dark="#1c1c1e",
|
| 347 |
-
panel_border_color="#d1d1d6",
|
| 348 |
-
panel_border_color_dark="#38383a",
|
| 349 |
-
shadow_drop="0px 1px 3px 0px rgba(0,0,0,0.1)",
|
| 350 |
-
shadow_drop_lg="0px 10px 30px 0px rgba(0,0,0,0.15)",
|
| 351 |
-
loader_color="#007aff",
|
| 352 |
-
loader_color_dark="#0a84ff")
|
| 353 |
-
|
| 354 |
-
with gr.Blocks(title=TITLE, js=JS, theme=apple_theme) as demo:
|
| 355 |
gr.HTML(HEADER)
|
| 356 |
|
| 357 |
with gr.Tab('Image Segmentation'):
|
|
@@ -366,7 +303,7 @@ def build_demo():
|
|
| 366 |
|
| 367 |
sample_frames_1 = gr.Slider(1, 32, value=16, step=1, visible=False)
|
| 368 |
|
| 369 |
-
query_1 = gr.Textbox(label='Text Prompt', placeholder='Please segment the...')
|
| 370 |
|
| 371 |
with gr.Row():
|
| 372 |
random_btn_1 = gr.Button(value='๐ฎ Random', visible=False)
|
|
@@ -376,7 +313,8 @@ def build_demo():
|
|
| 376 |
|
| 377 |
download_btn_1.render()
|
| 378 |
|
| 379 |
-
submit_btn_1 = gr.Button(value='๐ Submit', variant='primary')
|
|
|
|
| 380 |
with gr.Column():
|
| 381 |
msk_1.render()
|
| 382 |
ans_1.render()
|
|
@@ -405,7 +343,7 @@ def build_demo():
|
|
| 405 |
label='Sample Frames',
|
| 406 |
info='The number of frames to sample from a video (Default: 16)')
|
| 407 |
|
| 408 |
-
query_2 = gr.Textbox(label='Text Prompt', placeholder='Please segment the...')
|
| 409 |
|
| 410 |
with gr.Row():
|
| 411 |
random_btn_2 = gr.Button(value='๐ฎ Random', visible=False)
|
|
@@ -415,7 +353,8 @@ def build_demo():
|
|
| 415 |
|
| 416 |
download_btn_2.render()
|
| 417 |
|
| 418 |
-
submit_btn_2 = gr.Button(value='๐ Submit', variant='primary')
|
|
|
|
| 419 |
with gr.Column():
|
| 420 |
msk_2.render()
|
| 421 |
ans_2.render()
|
|
@@ -440,7 +379,8 @@ def build_demo():
|
|
| 440 |
|
| 441 |
prompt_frame_index_3 = gr.Slider(1, 16, value=1, step=1, visible=False)
|
| 442 |
|
| 443 |
-
query_3 = gr.Textbox(
|
|
|
|
| 444 |
|
| 445 |
with gr.Row():
|
| 446 |
random_btn_3 = gr.Button(value='๐ฎ Random', visible=False)
|
|
@@ -448,7 +388,8 @@ def build_demo():
|
|
| 448 |
reset_btn_3 = gr.ClearButton([media_3, query_3, msk_3, ans_3], value='๐๏ธ Reset')
|
| 449 |
reset_btn_3.click(reset_reg, None, [prompt_frame_index_3, download_btn_3])
|
| 450 |
|
| 451 |
-
submit_btn_3 = gr.Button(value='๐ Submit', variant='primary')
|
|
|
|
| 452 |
with gr.Column():
|
| 453 |
msk_3.render()
|
| 454 |
ans_3.render()
|
|
@@ -466,13 +407,14 @@ def build_demo():
|
|
| 466 |
step=1,
|
| 467 |
interactive=True,
|
| 468 |
label='Prompt Frame Index',
|
| 469 |
-
info='The index of the frame
|
| 470 |
render=False)
|
| 471 |
msk_4 = gr.ImageEditor(
|
| 472 |
label='Mask Prompt',
|
| 473 |
brush=gr.Brush(colors=['#ff000080'], color_mode='fixed'),
|
| 474 |
transforms=None,
|
| 475 |
layers=False,
|
|
|
|
| 476 |
render=False)
|
| 477 |
ans_4 = gr.HighlightedText(label='Model Response', show_inline_category=False, render=False)
|
| 478 |
|
|
@@ -485,7 +427,8 @@ def build_demo():
|
|
| 485 |
prompt_frame_index_4.render()
|
| 486 |
prompt_frame_index_4.change(update_video, [media_4, prompt_frame_index_4], msk_4)
|
| 487 |
|
| 488 |
-
query_4 = gr.Textbox(
|
|
|
|
| 489 |
|
| 490 |
with gr.Row():
|
| 491 |
random_btn_4 = gr.Button(value='๐ฎ Random', visible=False)
|
|
@@ -493,7 +436,8 @@ def build_demo():
|
|
| 493 |
reset_btn_4 = gr.ClearButton([media_4, query_4, msk_4, ans_4], value='๐๏ธ Reset')
|
| 494 |
reset_btn_4.click(reset_reg, None, [prompt_frame_index_4, download_btn_4])
|
| 495 |
|
| 496 |
-
submit_btn_4 = gr.Button(value='๐ Submit', variant='primary')
|
|
|
|
| 497 |
with gr.Column():
|
| 498 |
msk_4.render()
|
| 499 |
ans_4.render()
|
|
|
|
| 27 |
TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'
|
| 28 |
|
| 29 |
HEADER = """
|
| 30 |
+
<p align="center" style="margin: 1em 0 2em;"><img width="260" src="https://raw.githubusercontent.com/PolyU-ChenLab/UniPixel/refs/heads/main/.github/logo.png"></p>
|
| 31 |
<h3 align="center">Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning</h3>
|
| 32 |
<div style="display: flex; justify-content: center; gap: 5px;">
|
| 33 |
<a href="https://arxiv.org/abs/2509.18094" target="_blank"><img src="https://img.shields.io/badge/arXiv-2509.18094-red"></a>
|
|
|
|
| 46 |
if (window.innerWidth >= 1536) {
|
| 47 |
document.querySelector('main').style.maxWidth = '1536px'
|
| 48 |
}
|
| 49 |
+
|
| 50 |
+
document.getElementById('query_1').addEventListener('keydown', function f1(e) { if (e.key === 'Enter') { document.getElementById('submit_1').click() } })
|
| 51 |
+
document.getElementById('query_2').addEventListener('keydown', function f2(e) { if (e.key === 'Enter') { document.getElementById('submit_2').click() } })
|
| 52 |
+
document.getElementById('query_3').addEventListener('keydown', function f3(e) { if (e.key === 'Enter') { document.getElementById('submit_3').click() } })
|
| 53 |
+
document.getElementById('query_4').addEventListener('keydown', function f4(e) { if (e.key === 'Enter') { document.getElementById('submit_4').click() } })
|
| 54 |
}
|
| 55 |
"""
|
| 56 |
|
|
|
|
| 96 |
|
| 97 |
def update_video(video, prompt_idx):
|
| 98 |
if video is None:
|
| 99 |
+
return gr.ImageEditor(value=None, interactive=False)
|
| 100 |
|
| 101 |
_, images = load_video(video, sample_frames=16)
|
| 102 |
+
component = gr.ImageEditor(value=images[prompt_idx - 1], interactive=True)
|
| 103 |
|
| 104 |
+
return component
|
| 105 |
|
| 106 |
|
| 107 |
@spaces.GPU
|
|
|
|
| 288 |
|
| 289 |
|
| 290 |
def build_demo():
|
| 291 |
+
with gr.Blocks(title=TITLE, js=JS, theme=gr.themes.Soft()) as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
gr.HTML(HEADER)
|
| 293 |
|
| 294 |
with gr.Tab('Image Segmentation'):
|
|
|
|
| 303 |
|
| 304 |
sample_frames_1 = gr.Slider(1, 32, value=16, step=1, visible=False)
|
| 305 |
|
| 306 |
+
query_1 = gr.Textbox(label='Text Prompt', placeholder='Please segment the...', elem_id='query_1')
|
| 307 |
|
| 308 |
with gr.Row():
|
| 309 |
random_btn_1 = gr.Button(value='๐ฎ Random', visible=False)
|
|
|
|
| 313 |
|
| 314 |
download_btn_1.render()
|
| 315 |
|
| 316 |
+
submit_btn_1 = gr.Button(value='๐ Submit', variant='primary', elem_id='submit_1')
|
| 317 |
+
|
| 318 |
with gr.Column():
|
| 319 |
msk_1.render()
|
| 320 |
ans_1.render()
|
|
|
|
| 343 |
label='Sample Frames',
|
| 344 |
info='The number of frames to sample from a video (Default: 16)')
|
| 345 |
|
| 346 |
+
query_2 = gr.Textbox(label='Text Prompt', placeholder='Please segment the...', elem_id='query_2')
|
| 347 |
|
| 348 |
with gr.Row():
|
| 349 |
random_btn_2 = gr.Button(value='๐ฎ Random', visible=False)
|
|
|
|
| 353 |
|
| 354 |
download_btn_2.render()
|
| 355 |
|
| 356 |
+
submit_btn_2 = gr.Button(value='๐ Submit', variant='primary', elem_id='submit_2')
|
| 357 |
+
|
| 358 |
with gr.Column():
|
| 359 |
msk_2.render()
|
| 360 |
ans_2.render()
|
|
|
|
| 379 |
|
| 380 |
prompt_frame_index_3 = gr.Slider(1, 16, value=1, step=1, visible=False)
|
| 381 |
|
| 382 |
+
query_3 = gr.Textbox(
|
| 383 |
+
label='Text Prompt', placeholder='Please describe the highlighted region...', elem_id='query_3')
|
| 384 |
|
| 385 |
with gr.Row():
|
| 386 |
random_btn_3 = gr.Button(value='๐ฎ Random', visible=False)
|
|
|
|
| 388 |
reset_btn_3 = gr.ClearButton([media_3, query_3, msk_3, ans_3], value='๐๏ธ Reset')
|
| 389 |
reset_btn_3.click(reset_reg, None, [prompt_frame_index_3, download_btn_3])
|
| 390 |
|
| 391 |
+
submit_btn_3 = gr.Button(value='๐ Submit', variant='primary', elem_id='submit_3')
|
| 392 |
+
|
| 393 |
with gr.Column():
|
| 394 |
msk_3.render()
|
| 395 |
ans_3.render()
|
|
|
|
| 407 |
step=1,
|
| 408 |
interactive=True,
|
| 409 |
label='Prompt Frame Index',
|
| 410 |
+
info='The index of the frame to apply mask prompts (Default: 1)',
|
| 411 |
render=False)
|
| 412 |
msk_4 = gr.ImageEditor(
|
| 413 |
label='Mask Prompt',
|
| 414 |
brush=gr.Brush(colors=['#ff000080'], color_mode='fixed'),
|
| 415 |
transforms=None,
|
| 416 |
layers=False,
|
| 417 |
+
interactive=False,
|
| 418 |
render=False)
|
| 419 |
ans_4 = gr.HighlightedText(label='Model Response', show_inline_category=False, render=False)
|
| 420 |
|
|
|
|
| 427 |
prompt_frame_index_4.render()
|
| 428 |
prompt_frame_index_4.change(update_video, [media_4, prompt_frame_index_4], msk_4)
|
| 429 |
|
| 430 |
+
query_4 = gr.Textbox(
|
| 431 |
+
label='Text Prompt', placeholder='Please describe the highlighted region...', elem_id='query_4')
|
| 432 |
|
| 433 |
with gr.Row():
|
| 434 |
random_btn_4 = gr.Button(value='๐ฎ Random', visible=False)
|
|
|
|
| 436 |
reset_btn_4 = gr.ClearButton([media_4, query_4, msk_4, ans_4], value='๐๏ธ Reset')
|
| 437 |
reset_btn_4.click(reset_reg, None, [prompt_frame_index_4, download_btn_4])
|
| 438 |
|
| 439 |
+
submit_btn_4 = gr.Button(value='๐ Submit', variant='primary', elem_id='submit_4')
|
| 440 |
+
|
| 441 |
with gr.Column():
|
| 442 |
msk_4.render()
|
| 443 |
ans_4.render()
|