Spaces:

prithivMLmods
/

Multimodal-VLM-v1.0

Running on Zero

App Files Files Community

prithivMLmods commited on 25 days ago

Commit

4ebf926

verified ·

1 Parent(s): 942b435

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -11

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import math
 import os
 import traceback
 from io import BytesIO
-from typing import Any, Dict, List, Optional, Tuple
 import re
 import time
 from threading import Thread
@@ -27,6 +27,73 @@ from transformers import (
     AutoProcessor,
     TextIteratorStreamer,
 )
 # --- Constants and Model Setup ---
 MAX_INPUT_TOKEN_LENGTH = 4096
@@ -271,12 +338,8 @@ def create_gradio_interface():
     """Builds and returns the Gradio web interface."""
     css = """
     .main-container { max-width: 1400px; margin: 0 auto; }
-    .process-button { border: none !important; color: white !important; font-weight: bold !important; background-color: blue !important;}
-    .process-button:hover { background-color: darkblue !important; transform: translateY(-2px) !important; box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important; }
-    .processr-button { border: none !important; color: white !important; font-weight: bold !important; background-color: blue !important;}
-    .processr-button:hover { background-color: darkblue !important; transform: translateY(-2px) !important; box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important; }
     """
-    with gr.Blocks(theme="bethecloud/storj_theme", css=css) as demo:
         gr.Markdown("# Multimodal VLM v1.0 ⚡")
         gr.Markdown("Explore the capabilities of various Vision Language Models for tasks like OCR, VQA, and Object Detection.")
@@ -300,8 +363,8 @@ def create_gradio_interface():
                             top_k = gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=40)
                             repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
-                        process_btn = gr.Button("🚀 Process", variant="primary", elem_classes=["process-button"])
-                        clear_btn = gr.Button("🗑️ Clear", variant="secondary")
                     with gr.Column(scale=2):
                         #gr.Markdown("### 2. View Output")
@@ -334,7 +397,7 @@ def create_gradio_interface():
                             label="Max Objects (for Object Detection only)",
                             value=10, minimum=1, maximum=50, step=1, visible=True
                         )
-                        md3_generate_btn = gr.Button(value="🚀 Process", variant="primary", elem_classes=["processr-button"])
                     with gr.Column(scale=1):
                         md3_output_image = gr.Image(type="pil", label="Result", height=400)
                         md3_output_textbox = gr.Textbox(label="Model Response", lines=10, show_copy_button=True)
@@ -343,7 +406,7 @@ def create_gradio_interface():
                 gr.Examples(
                     examples=[
                         ["md3/1.jpg", "Object Detection", "boats", 7],
-                        ["md3/2.jpg", "Point Detection", "children", 7],
                         ["md3/3.png", "Caption", "", 5],
                         ["md3/4.jpeg", "Visual Question Answering", "Analyze the GDP trend over the years.", 5],
                     ],
@@ -377,4 +440,4 @@ def create_gradio_interface():
 if __name__ == "__main__":
     demo = create_gradio_interface()
-    demo.queue(max_size=50).launch(share=True, ssr_mode=False, mcp_server=True, show_error=True)

 import os
 import traceback
 from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple, Iterable
 import re
 import time
 from threading import Thread
     AutoProcessor,
     TextIteratorStreamer,
 )
+from gradio.themes import Soft
+from gradio.themes.utils import colors, fonts, sizes
+# --- Theme Definition ---
+# Define a new color palette for Blue
+colors.blue_theme_color = colors.Color(
+    name="blue_theme_color",
+    c50="#E6E6FF",
+    c100="#CCCCFF",
+    c200="#9999FF",
+    c300="#6666FF",
+    c400="#3333FF",
+    c500="#0000FF",  # Base Blue color
+    c600="#0000D9",
+    c700="#0000B3",
+    c800="#000080",
+    c900="#000066",
+    c950="#000033",
+)
+class BlueTheme(Soft):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.gray,
+        secondary_hue: colors.Color | str = colors.blue_theme_color,
+        neutral_hue: colors.Color | str = colors.slate,
+        text_size: sizes.Size | str = sizes.text_lg,
+        font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
+        ),
+        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            text_size=text_size,
+            font=font,
+            font_mono=font_mono,
+        )
+        super().set(
+            background_fill_primary="*primary_50",
+            background_fill_primary_dark="*primary_900",
+            body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
+            body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
+            button_primary_text_color="white",
+            button_primary_text_color_hover="white",
+            button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
+            button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
+            button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
+            button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
+            slider_color="*secondary_500",
+            slider_color_dark="*secondary_600",
+            block_title_text_weight="600",
+            block_border_width="2px",
+            block_shadow="*shadow_drop_lg",
+            button_primary_shadow="*shadow_drop_lg",
+            button_large_padding="12px",
+            block_label_background_fill="*primary_200",
+        )
+# Instantiate the theme
+blue_theme = BlueTheme()
 # --- Constants and Model Setup ---
 MAX_INPUT_TOKEN_LENGTH = 4096
     """Builds and returns the Gradio web interface."""
     css = """
     .main-container { max-width: 1400px; margin: 0 auto; }
     """
+    with gr.Blocks(theme=blue_theme, css=css) as demo:
         gr.Markdown("# Multimodal VLM v1.0 ⚡")
         gr.Markdown("Explore the capabilities of various Vision Language Models for tasks like OCR, VQA, and Object Detection.")
                             top_k = gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=40)
                             repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
+                        process_btn = gr.Button("Submit", variant="primary")
+                        clear_btn = gr.Button("Clear", variant="secondary")
                     with gr.Column(scale=2):
                         #gr.Markdown("### 2. View Output")
                             label="Max Objects (for Object Detection only)",
                             value=10, minimum=1, maximum=50, step=1, visible=True
                         )
+                        md3_generate_btn = gr.Button(value="Submit", variant="primary")
                     with gr.Column(scale=1):
                         md3_output_image = gr.Image(type="pil", label="Result", height=400)
                         md3_output_textbox = gr.Textbox(label="Model Response", lines=10, show_copy_button=True)
                 gr.Examples(
                     examples=[
                         ["md3/1.jpg", "Object Detection", "boats", 7],
+                        ["md2.jpg", "Point Detection", "children", 7],
                         ["md3/3.png", "Caption", "", 5],
                         ["md3/4.jpeg", "Visual Question Answering", "Analyze the GDP trend over the years.", 5],
                     ],
 if __name__ == "__main__":
     demo = create_gradio_interface()
+    demo.queue(max_size=50).launch(ssr_mode=False, mcp_server=True, show_error=True)