Spaces:
Runtime error
Runtime error
| # UVIS - Gradio App with Upload, URL & Video Support | |
| """ | |
| This script launches the UVIS (Unified Visual Intelligence System) as a Gradio Web App. | |
| Supports image, video, and URL-based media inputs for detection, segmentation, and depth estimation. | |
| Outputs include scene blueprint, structured JSON, and downloadable results. | |
| """ | |
| import time | |
| import logging | |
| import gradio as gr | |
| from PIL import Image | |
| import cv2 | |
| import timeout_decorator | |
| import spaces | |
| from registry import get_model | |
| from core.describe_scene import describe_scene | |
| from core.process import process_image | |
| from core.input_handler import resolve_input, validate_video, validate_image | |
| from utils.helpers import format_error, generate_session_id | |
| from huggingface_hub import hf_hub_download | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # Model mappings | |
| DETECTION_MODEL_MAP = { | |
| "YOLOv5-Nano": "yolov5n-seg", | |
| "YOLOv5-Small": "yolov5s-seg", | |
| "YOLOv8-Small": "yolov8s", | |
| "YOLOv8-Large": "yolov8l", | |
| "RT-DETR": "rtdetr" # For future support | |
| } | |
| SEGMENTATION_MODEL_MAP = { | |
| "SegFormer-B0": "nvidia/segformer-b0-finetuned-ade-512-512", | |
| "SegFormer-B5": "nvidia/segformer-b5-finetuned-ade-512-512", | |
| "DeepLabV3-ResNet50": "deeplabv3_resnet50" | |
| } | |
| DEPTH_MODEL_MAP = { | |
| "MiDaS v21 Small 256": "midas_v21_small_256", | |
| "MiDaS v21 384": "midas_v21_384", | |
| "DPT Hybrid 384": "dpt_hybrid_384", | |
| "DPT Swin2 Large 384": "dpt_swin2_large_384", | |
| "DPT Beit Large 512": "dpt_beit_large_512" | |
| } | |
| # Resource Limits | |
| MAX_IMAGE_MB = 5 | |
| MAX_IMAGE_RES = (1920, 1080) | |
| MAX_VIDEO_MB = 50 | |
| MAX_VIDEO_DURATION = 30 # seconds | |
| def preload_models(): | |
| """ | |
| This function is needed to activate ZeroGPU. It must be decorated with @spaces.GPU. | |
| It can be used to warm up models or load them into memory. | |
| """ | |
| from registry import get_model | |
| print("Warming up models for ZeroGPU...") | |
| get_model("detection", "yolov5n-seg", device="cpu") | |
| get_model("segmentation", "deeplabv3_resnet50", device="cpu") | |
| get_model("depth", "midas_v21_small_256", device="cpu") | |
| # Main Handler | |
| def handle(mode, media_upload, url, run_det, det_model, det_confidence, run_seg, seg_model, run_depth, depth_model, blend): | |
| """ | |
| Master handler for resolving input and processing. | |
| Returns outputs for Gradio interface. | |
| """ | |
| session_id = generate_session_id() | |
| logger.info(f"Session ID: {session_id} | Handler activated with mode: {mode}") | |
| start_time = time.time() | |
| media = resolve_input(mode, media_upload, url) | |
| if not media: | |
| return None, format_error("No valid input provided. Please check your upload or URL."), None | |
| results = [] | |
| for single_media in media: | |
| if isinstance(single_media, str): # Video file | |
| valid, err = validate_video(single_media) | |
| if not valid: | |
| return None, format_error(err), None | |
| cap = cv2.VideoCapture(single_media) | |
| ret, frame = cap.read() | |
| cap.release() | |
| if not ret: | |
| return None, format_error("Failed to read video frame."), None | |
| single_media = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) | |
| if isinstance(single_media, Image.Image): | |
| valid, err = validate_image(single_media) | |
| if not valid: | |
| return None, format_error(err), None | |
| try: | |
| return process_image(single_media, run_det, det_model, det_confidence, run_seg, seg_model, run_depth, depth_model, blend) | |
| except timeout_decorator.timeout_decorator.TimeoutError: | |
| logger.error("Image processing timed out.") | |
| return None, format_error("Processing timed out. Try a smaller image or simpler model."), None | |
| logger.warning("Unsupported media type resolved.") | |
| log_runtime(start_time) | |
| return None, format_error("Invalid input. Please check your upload or URL."), None | |
| # Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Unified Visual Intelligence System (UVIS)") | |
| with gr.Row(): | |
| # left panel | |
| with gr.Column(scale=2): | |
| # Input Mode Toggle | |
| mode = gr.Radio(["Upload", "URL"], value="Upload", label="Input Mode") | |
| # File upload: accepts multiple images or one video (user chooses wisely) | |
| media_upload = gr.File( | |
| label="Upload Images (1–5) or 1 Video", | |
| file_types=["image", ".mp4", ".mov", ".avi"], | |
| file_count="multiple" | |
| ) | |
| # URL input | |
| url = gr.Textbox(label="URL (Image/Video)", visible=False) | |
| # Toggle visibility | |
| def toggle_inputs(selected_mode): | |
| return [ | |
| gr.update(visible=(selected_mode == "Upload")), # media_upload | |
| gr.update(visible=(selected_mode == "URL")) # url | |
| ] | |
| mode.change(toggle_inputs, inputs=mode, outputs=[media_upload, url]) | |
| # Visibility logic function | |
| def toggle_visibility(checked): | |
| return gr.update(visible=checked) | |
| def toggle_det_visibility(checked): | |
| return [gr.update(visible=checked), gr.update(visible=checked)] | |
| run_det = gr.Checkbox(label="Object Detection") | |
| run_seg = gr.Checkbox(label="Semantic Segmentation") | |
| run_depth = gr.Checkbox(label="Depth Estimation") | |
| with gr.Row(): | |
| with gr.Column(visible=False) as OD_Settings: | |
| with gr.Accordion("Object Detection Settings", open=True): | |
| det_model = gr.Dropdown(choices=list(DETECTION_MODEL_MAP), label="Detection Model") | |
| det_confidence = gr.Slider(0.1, 1.0, 0.5, label="Detection Confidence Threshold") | |
| with gr.Column(visible=False) as SS_Settings: | |
| with gr.Accordion("Semantic Segmentation Settings", open=True): | |
| seg_model = gr.Dropdown(choices=list(SEGMENTATION_MODEL_MAP), label="Segmentation Model") | |
| with gr.Column(visible=False) as DE_Settings: | |
| with gr.Accordion("Depth Estimation Settings", open=True): | |
| depth_model = gr.Dropdown(choices=list(DEPTH_MODEL_MAP), label="Depth Model") | |
| # Attach Visibility Logic | |
| run_det.change(fn=toggle_visibility, inputs=[run_det], outputs=[OD_Settings]) | |
| run_seg.change(fn=toggle_visibility, inputs=[run_seg], outputs=[SS_Settings]) | |
| run_depth.change(fn=toggle_visibility, inputs=[run_depth], outputs=[DE_Settings]) | |
| blend = gr.Slider(0.0, 1.0, 0.5, label="Overlay Blend") | |
| # Run Button | |
| run = gr.Button("Run Analysis") | |
| #Right panel | |
| with gr.Column(scale=1): | |
| # single_img_preview = gr.Image(label="Preview (Image)", visible=False) | |
| # gallery_preview = gr.Gallery(label="Preview (Gallery)", columns=3, height="auto", visible=False) | |
| # video_preview = gr.Video(label="Preview (Video)", visible=False) | |
| img_out = gr.Image(label="Scene Blueprint") | |
| json_out = gr.JSON(label="Scene JSON") | |
| zip_out = gr.File(label="Download Results") | |
| # # Output Tabs | |
| # with gr.Tab("Scene JSON"): | |
| # json_out = gr.JSON() | |
| # with gr.Tab("Scene Blueprint"): | |
| # img_out = gr.Image() | |
| # with gr.Tab("Download"): | |
| # zip_out = gr.File() | |
| # Button Click Event | |
| run.click( | |
| handle, | |
| inputs=[mode, media_upload, url, run_det, det_model, det_confidence, run_seg, seg_model, run_depth, depth_model, blend], | |
| outputs=[img_out, json_out, zip_out] | |
| ) | |
| # Footer Section | |
| gr.Markdown("---") | |
| gr.Markdown( | |
| """ | |
| <div style='text-align: center; font-size: 14px;'> | |
| Built by <b>Durga Deepak Valluri</b><br> | |
| <a href="https://github.com/DurgaDeepakValluri" target="_blank">GitHub</a> | | |
| <a href="https://deecoded.io" target="_blank">Website</a> | | |
| <a href="https://www.linkedin.com/in/durga-deepak-valluri" target="_blank">LinkedIn</a> | |
| </div> | |
| """, | |
| ) | |
| # Launch the Gradio App | |
| demo.launch() | |