Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import math
|
|
| 4 |
import os
|
| 5 |
import traceback
|
| 6 |
from io import BytesIO
|
| 7 |
-
from typing import Any, Dict, List, Optional, Tuple
|
| 8 |
import re
|
| 9 |
import time
|
| 10 |
from threading import Thread
|
|
@@ -27,6 +27,73 @@ from transformers import (
|
|
| 27 |
AutoProcessor,
|
| 28 |
TextIteratorStreamer,
|
| 29 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
# --- Constants and Model Setup ---
|
| 32 |
MAX_INPUT_TOKEN_LENGTH = 4096
|
|
@@ -271,12 +338,8 @@ def create_gradio_interface():
|
|
| 271 |
"""Builds and returns the Gradio web interface."""
|
| 272 |
css = """
|
| 273 |
.main-container { max-width: 1400px; margin: 0 auto; }
|
| 274 |
-
.process-button { border: none !important; color: white !important; font-weight: bold !important; background-color: blue !important;}
|
| 275 |
-
.process-button:hover { background-color: darkblue !important; transform: translateY(-2px) !important; box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important; }
|
| 276 |
-
.processr-button { border: none !important; color: white !important; font-weight: bold !important; background-color: blue !important;}
|
| 277 |
-
.processr-button:hover { background-color: darkblue !important; transform: translateY(-2px) !important; box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important; }
|
| 278 |
"""
|
| 279 |
-
with gr.Blocks(theme=
|
| 280 |
gr.Markdown("# Multimodal VLM v1.0 ⚡")
|
| 281 |
gr.Markdown("Explore the capabilities of various Vision Language Models for tasks like OCR, VQA, and Object Detection.")
|
| 282 |
|
|
@@ -300,8 +363,8 @@ def create_gradio_interface():
|
|
| 300 |
top_k = gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=40)
|
| 301 |
repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
|
| 302 |
|
| 303 |
-
process_btn = gr.Button("
|
| 304 |
-
clear_btn = gr.Button("
|
| 305 |
|
| 306 |
with gr.Column(scale=2):
|
| 307 |
#gr.Markdown("### 2. View Output")
|
|
@@ -334,7 +397,7 @@ def create_gradio_interface():
|
|
| 334 |
label="Max Objects (for Object Detection only)",
|
| 335 |
value=10, minimum=1, maximum=50, step=1, visible=True
|
| 336 |
)
|
| 337 |
-
md3_generate_btn = gr.Button(value="
|
| 338 |
with gr.Column(scale=1):
|
| 339 |
md3_output_image = gr.Image(type="pil", label="Result", height=400)
|
| 340 |
md3_output_textbox = gr.Textbox(label="Model Response", lines=10, show_copy_button=True)
|
|
@@ -343,7 +406,7 @@ def create_gradio_interface():
|
|
| 343 |
gr.Examples(
|
| 344 |
examples=[
|
| 345 |
["md3/1.jpg", "Object Detection", "boats", 7],
|
| 346 |
-
["
|
| 347 |
["md3/3.png", "Caption", "", 5],
|
| 348 |
["md3/4.jpeg", "Visual Question Answering", "Analyze the GDP trend over the years.", 5],
|
| 349 |
],
|
|
@@ -377,4 +440,4 @@ def create_gradio_interface():
|
|
| 377 |
|
| 378 |
if __name__ == "__main__":
|
| 379 |
demo = create_gradio_interface()
|
| 380 |
-
demo.queue(max_size=50).launch(
|
|
|
|
| 4 |
import os
|
| 5 |
import traceback
|
| 6 |
from io import BytesIO
|
| 7 |
+
from typing import Any, Dict, List, Optional, Tuple, Iterable
|
| 8 |
import re
|
| 9 |
import time
|
| 10 |
from threading import Thread
|
|
|
|
| 27 |
AutoProcessor,
|
| 28 |
TextIteratorStreamer,
|
| 29 |
)
|
| 30 |
+
from gradio.themes import Soft
|
| 31 |
+
from gradio.themes.utils import colors, fonts, sizes
|
| 32 |
+
|
| 33 |
+
# --- Theme Definition ---
|
| 34 |
+
|
| 35 |
+
# Define a new color palette for Blue
|
| 36 |
+
colors.blue_theme_color = colors.Color(
|
| 37 |
+
name="blue_theme_color",
|
| 38 |
+
c50="#E6E6FF",
|
| 39 |
+
c100="#CCCCFF",
|
| 40 |
+
c200="#9999FF",
|
| 41 |
+
c300="#6666FF",
|
| 42 |
+
c400="#3333FF",
|
| 43 |
+
c500="#0000FF", # Base Blue color
|
| 44 |
+
c600="#0000D9",
|
| 45 |
+
c700="#0000B3",
|
| 46 |
+
c800="#000080",
|
| 47 |
+
c900="#000066",
|
| 48 |
+
c950="#000033",
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
class BlueTheme(Soft):
|
| 52 |
+
def __init__(
|
| 53 |
+
self,
|
| 54 |
+
*,
|
| 55 |
+
primary_hue: colors.Color | str = colors.gray,
|
| 56 |
+
secondary_hue: colors.Color | str = colors.blue_theme_color,
|
| 57 |
+
neutral_hue: colors.Color | str = colors.slate,
|
| 58 |
+
text_size: sizes.Size | str = sizes.text_lg,
|
| 59 |
+
font: fonts.Font | str | Iterable[fonts.Font | str] = (
|
| 60 |
+
fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
|
| 61 |
+
),
|
| 62 |
+
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
|
| 63 |
+
fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
|
| 64 |
+
),
|
| 65 |
+
):
|
| 66 |
+
super().__init__(
|
| 67 |
+
primary_hue=primary_hue,
|
| 68 |
+
secondary_hue=secondary_hue,
|
| 69 |
+
neutral_hue=neutral_hue,
|
| 70 |
+
text_size=text_size,
|
| 71 |
+
font=font,
|
| 72 |
+
font_mono=font_mono,
|
| 73 |
+
)
|
| 74 |
+
super().set(
|
| 75 |
+
background_fill_primary="*primary_50",
|
| 76 |
+
background_fill_primary_dark="*primary_900",
|
| 77 |
+
body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
|
| 78 |
+
body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
|
| 79 |
+
button_primary_text_color="white",
|
| 80 |
+
button_primary_text_color_hover="white",
|
| 81 |
+
button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
|
| 82 |
+
button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
|
| 83 |
+
button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
|
| 84 |
+
button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
|
| 85 |
+
slider_color="*secondary_500",
|
| 86 |
+
slider_color_dark="*secondary_600",
|
| 87 |
+
block_title_text_weight="600",
|
| 88 |
+
block_border_width="2px",
|
| 89 |
+
block_shadow="*shadow_drop_lg",
|
| 90 |
+
button_primary_shadow="*shadow_drop_lg",
|
| 91 |
+
button_large_padding="12px",
|
| 92 |
+
block_label_background_fill="*primary_200",
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# Instantiate the theme
|
| 96 |
+
blue_theme = BlueTheme()
|
| 97 |
|
| 98 |
# --- Constants and Model Setup ---
|
| 99 |
MAX_INPUT_TOKEN_LENGTH = 4096
|
|
|
|
| 338 |
"""Builds and returns the Gradio web interface."""
|
| 339 |
css = """
|
| 340 |
.main-container { max-width: 1400px; margin: 0 auto; }
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
"""
|
| 342 |
+
with gr.Blocks(theme=blue_theme, css=css) as demo:
|
| 343 |
gr.Markdown("# Multimodal VLM v1.0 ⚡")
|
| 344 |
gr.Markdown("Explore the capabilities of various Vision Language Models for tasks like OCR, VQA, and Object Detection.")
|
| 345 |
|
|
|
|
| 363 |
top_k = gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=40)
|
| 364 |
repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
|
| 365 |
|
| 366 |
+
process_btn = gr.Button("Submit", variant="primary")
|
| 367 |
+
clear_btn = gr.Button("Clear", variant="secondary")
|
| 368 |
|
| 369 |
with gr.Column(scale=2):
|
| 370 |
#gr.Markdown("### 2. View Output")
|
|
|
|
| 397 |
label="Max Objects (for Object Detection only)",
|
| 398 |
value=10, minimum=1, maximum=50, step=1, visible=True
|
| 399 |
)
|
| 400 |
+
md3_generate_btn = gr.Button(value="Submit", variant="primary")
|
| 401 |
with gr.Column(scale=1):
|
| 402 |
md3_output_image = gr.Image(type="pil", label="Result", height=400)
|
| 403 |
md3_output_textbox = gr.Textbox(label="Model Response", lines=10, show_copy_button=True)
|
|
|
|
| 406 |
gr.Examples(
|
| 407 |
examples=[
|
| 408 |
["md3/1.jpg", "Object Detection", "boats", 7],
|
| 409 |
+
["md2.jpg", "Point Detection", "children", 7],
|
| 410 |
["md3/3.png", "Caption", "", 5],
|
| 411 |
["md3/4.jpeg", "Visual Question Answering", "Analyze the GDP trend over the years.", 5],
|
| 412 |
],
|
|
|
|
| 440 |
|
| 441 |
if __name__ == "__main__":
|
| 442 |
demo = create_gradio_interface()
|
| 443 |
+
demo.queue(max_size=50).launch(ssr_mode=False, mcp_server=True, show_error=True)
|