Spaces:

thinkPy
/

Docling-Layout-Analysis

Running

App Files Files Community

enpaiva commited on Sep 12

Commit

fc2ea23

verified ·

1 Parent(s): 62ca522

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -156

app.py CHANGED Viewed

@@ -54,6 +54,7 @@ classes_map = {
 current_model = None
 current_processor = None
 current_model_name = None
 def colormap(N=256, normalized=False):
     """Generate dynamic colormap."""
@@ -110,12 +111,12 @@ def nms_custom(boxes, scores, iou_threshold=0.5):
     return torch.tensor(keep, dtype=torch.long)
-def load_model(model_name):
-    """Load the selected model automatically."""
     global current_model, current_processor, current_model_name
-    if current_model_name == model_name:
-        return current_model, current_processor
     try:
         model_info = MODELS[model_name]
@@ -133,11 +134,11 @@ def load_model(model_name):
         current_model = model
         current_model_name = model_name
-        return model, processor
     except Exception as e:
         print(f"Error loading model: {e}")
-        return None, None
 def visualize_bbox(image_input, bboxes, classes, scores, id_to_names, alpha=0.3, show_labels=True):
     """Visualize bounding boxes with OpenCV."""
@@ -199,15 +200,32 @@ def visualize_bbox(image_input, bboxes, classes, scores, id_to_names, alpha=0.3,
     return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 def process_image(input_img, model_name, conf_threshold, iou_threshold, nms_method, alpha, show_labels):
     """Process image with document layout detection."""
     if input_img is None:
         return None, "❌ Please upload an image first."
     # Load model if needed
-    model, processor = load_model(model_name)
-    if model is None or processor is None:
-        return None, f"❌ Error loading model {model_name}."
     try:
         # Prepare image
@@ -218,20 +236,21 @@ def process_image(input_img, model_name, conf_threshold, iou_threshold, nms_meth
             input_img = input_img.convert('RGB')
         # Process with model
-        inputs = processor(images=[input_img], return_tensors="pt")
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
-            outputs = model(**inputs)
         # Post-process results
-        results = processor.post_process_object_detection(
             outputs,
             target_sizes=torch.tensor([input_img.size[::-1]]),
             threshold=conf_threshold,
         )
         if not results or len(results) == 0:
             return np.array(input_img), "ℹ️ No detections found."
         result = results[0]
@@ -240,6 +259,7 @@ def process_image(input_img, model_name, conf_threshold, iou_threshold, nms_meth
         labels = result["labels"]
         if len(boxes) == 0:
             return np.array(input_img), f"ℹ️ No detections above threshold {conf_threshold:.2f}."
         # Apply NMS
@@ -247,23 +267,26 @@ def process_image(input_img, model_name, conf_threshold, iou_threshold, nms_meth
             if nms_method == "Custom IoMin":
                 keep_indices = nms_custom(boxes=boxes, scores=scores, iou_threshold=iou_threshold)
             else:
-                # Use torchvision NMS with correct format
                 keep_indices = torchvision.ops.nms(boxes, scores, iou_threshold)
             boxes = boxes[keep_indices]
             scores = scores[keep_indices]
             labels = labels[keep_indices]
         # Visualize results
         output = visualize_bbox(input_img, boxes, labels, scores, classes_map, alpha=alpha, show_labels=show_labels)
         labels_status = "with labels" if show_labels else "without labels"
-        info = f"✅ Found {len(boxes)} detections ({labels_status}) | Model: {model_name} | Confidence: {conf_threshold:.2f}"
         return output, info
     except Exception as e:
         print(f"[ERROR] process_image failed: {e}")
         error_msg = f"❌ Processing error: {str(e)}"
         if input_img is not None:
             return np.array(input_img), error_msg
@@ -274,43 +297,27 @@ if __name__ == "__main__":
     print(f"📱 Device: {device}")
     print(f"🤖 Available models: {len(MODELS)}")
-    # Custom CSS for compact layout
     custom_css = """
     .gradio-container {
-        max-width: 1400px !important;
-        margin: 0 auto !important;
-        padding: 20px !important;
     }
-    .controls-container {
         background: #f8f9fa;
         border-radius: 12px;
-        border: 1px solid #dee2e6;
         padding: 20px;
-        margin-bottom: 20px;
     }
-    .results-container {
-        background: #ffffff;
         border-radius: 12px;
-        border: 1px solid #dee2e6;
         padding: 20px;
-    }
-    .section-divider {
-        border-top: 2px solid #e9ecef;
-        margin: 20px 0;
-        padding-top: 20px;
-    }
-    .analyze-btn {
-        background: linear-gradient(45deg, #667eea, #764ba2) !important;
-        border: none !important;
-        color: white !important;
-        font-weight: bold !important;
-        font-size: 18px !important;
-        padding: 15px 30px !important;
-        border-radius: 10px !important;
     }
     """
@@ -323,133 +330,113 @@ if __name__ == "__main__":
         # Header
         gr.HTML("""
-        <div style='text-align: center; padding: 30px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 15px; margin-bottom: 30px;'>
-            <h1 style='margin: 0; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);'>🔍 Document Layout Analysis</h1>
-            <p style='margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.9;'>Compact interface for advanced document structure detection</p>
         </div>
         """)
-        # LEFT COLUMN - Controls and Input
-        with gr.Column(scale=1):
-            # Controls Section
-            with gr.Group(elem_classes=["controls-container"]):
-                # 1. Image Upload (First)
-                gr.HTML("<h3 style='margin-top: 0;'>📄 Upload Document</h3>")
-                input_img = gr.Image(
-                    label="Document Image",
-                    type="pil",
-                    height=300,
-                    interactive=True
-                )
-                # Divider
-                gr.HTML("<div class='section-divider'></div>")
-                # 2. Model Selection (Second)
-                gr.HTML("<h3>🤖 Model Selection</h3>")
-                model_dropdown = gr.Dropdown(
-                    choices=list(MODELS.keys()),
-                    value="Egret XLarge",
-                    label="AI Model",
-                    info="Model will load automatically when analyzing",
-                    interactive=True
-                )
-                # Divider
-                gr.HTML("<div class='section-divider'></div>")
-                # 3. Detection Parameters (Third)
-                gr.HTML("<h3>⚙️ Detection Settings</h3>")
-                with gr.Row():
-                    conf_threshold = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.6,
-                        step=0.05,
-                        label="Confidence Threshold",
-                        info="Minimum confidence for detections"
                     )
-                    iou_threshold = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.5,
-                        step=0.05,
-                        label="NMS IoU Threshold",
-                        info="Non-maximum suppression threshold"
                     )
-                with gr.Row():
-                    nms_method = gr.Radio(
-                        choices=["Custom IoMin", "Standard IoU"],
-                        value="Custom IoMin",
-                        label="NMS Algorithm",
-                        info="Choose suppression method"
                     )
-                    alpha_slider = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.3,
-                        step=0.1,
-                        label="Overlay Transparency",
-                        info="Transparency of detection overlays"
                     )
-                show_labels_checkbox = gr.Checkbox(
-                    value=True,
-                    label="Show Class Labels and Confidence Scores",
-                    info="Display detection labels on the output image",
-                    interactive=True
-                )
-                # Divider
-                gr.HTML("<div class='section-divider'></div>")
-                # 4. Analyze Button (Last)
-                detect_btn = gr.Button(
-                    "🔍 Analyze Document",
-                    variant="primary",
-                    size="lg",
-                    elem_classes=["analyze-btn"]
-                )
-        # RIGHT COLUMN - Results and Output
-        with gr.Column(scale=1):
-            # Results Section
-            with gr.Group(elem_classes=["results-container"]):
-                gr.HTML("<h3 style='margin-top: 0;'>🎯 Analysis Results</h3>")
-                output_img = gr.Image(
-                    label="Analyzed Document",
-                    type="numpy",
-                    height=600,
-                    interactive=False
-                )
-                detection_info = gr.Textbox(
-                    label="Detection Summary",
-                    value="Ready for analysis. Upload an image and click 'Analyze Document'.",
-                    interactive=False,
-                    lines=2,
-                    show_copy_button=True
-                )
-        # Event Handler
-        detect_btn.click(
             fn=process_image,
-            inputs=[
-                input_img,
-                model_dropdown,
-                conf_threshold,
-                iou_threshold,
-                nms_method,
-                alpha_slider,
-                show_labels_checkbox
-            ],
             outputs=[output_img, detection_info]
         )

 current_model = None
 current_processor = None
 current_model_name = None
+cached_results = None  # Para guardar los resultados y poder cambiar labels sin reprocesar
 def colormap(N=256, normalized=False):
     """Generate dynamic colormap."""
     return torch.tensor(keep, dtype=torch.long)
+def load_model_if_needed(model_name):
+    """Load the selected model if not already loaded."""
     global current_model, current_processor, current_model_name
+    if current_model_name == model_name and current_model is not None:
+        return True
     try:
         model_info = MODELS[model_name]
         current_model = model
         current_model_name = model_name
+        return True
     except Exception as e:
         print(f"Error loading model: {e}")
+        return False
 def visualize_bbox(image_input, bboxes, classes, scores, id_to_names, alpha=0.3, show_labels=True):
     """Visualize bounding boxes with OpenCV."""
     return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+def toggle_labels_visualization(show_labels, alpha):
+    """Toggle labels without reprocessing the image."""
+    global cached_results
+    if cached_results is None:
+        return None, "⚠️ No cached results. Please analyze an image first."
+    input_img, boxes, labels, scores = cached_results
+    output = visualize_bbox(input_img, boxes, labels, scores, classes_map, alpha=alpha, show_labels=show_labels)
+    labels_status = "with labels" if show_labels else "without labels"
+    info = f"✅ Visualization updated ({labels_status}) | {len(boxes)} detections"
+    return output, info
 def process_image(input_img, model_name, conf_threshold, iou_threshold, nms_method, alpha, show_labels):
     """Process image with document layout detection."""
+    global cached_results
     if input_img is None:
         return None, "❌ Please upload an image first."
     # Load model if needed
+    if not load_model_if_needed(model_name):
+        return None, f"❌ Failed to load model {model_name}."
     try:
         # Prepare image
             input_img = input_img.convert('RGB')
         # Process with model
+        inputs = current_processor(images=[input_img], return_tensors="pt")
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
+            outputs = current_model(**inputs)
         # Post-process results
+        results = current_processor.post_process_object_detection(
             outputs,
             target_sizes=torch.tensor([input_img.size[::-1]]),
             threshold=conf_threshold,
         )
         if not results or len(results) == 0:
+            cached_results = None
             return np.array(input_img), "ℹ️ No detections found."
         result = results[0]
         labels = result["labels"]
         if len(boxes) == 0:
+            cached_results = None
             return np.array(input_img), f"ℹ️ No detections above threshold {conf_threshold:.2f}."
         # Apply NMS
             if nms_method == "Custom IoMin":
                 keep_indices = nms_custom(boxes=boxes, scores=scores, iou_threshold=iou_threshold)
             else:
                 keep_indices = torchvision.ops.nms(boxes, scores, iou_threshold)
             boxes = boxes[keep_indices]
             scores = scores[keep_indices]
             labels = labels[keep_indices]
+        # Cache results for label toggling
+        cached_results = (input_img, boxes, labels, scores)
         # Visualize results
         output = visualize_bbox(input_img, boxes, labels, scores, classes_map, alpha=alpha, show_labels=show_labels)
         labels_status = "with labels" if show_labels else "without labels"
+        info = f"✅ Found {len(boxes)} detections ({labels_status}) | Model: {model_name} | NMS: {nms_method} | Conf: {conf_threshold:.2f}"
         return output, info
     except Exception as e:
         print(f"[ERROR] process_image failed: {e}")
+        cached_results = None
         error_msg = f"❌ Processing error: {str(e)}"
         if input_img is not None:
             return np.array(input_img), error_msg
     print(f"📱 Device: {device}")
     print(f"🤖 Available models: {len(MODELS)}")
+    # Custom CSS for clean layout
     custom_css = """
     .gradio-container {
+        max-width: 100% !important;
+        padding: 15px !important;
     }
+    .control-panel {
         background: #f8f9fa;
         border-radius: 12px;
+        border: 1px solid #e9ecef;
         padding: 20px;
+        margin-bottom: 15px;
     }
+    .results-panel {
+        background: #f8f9fa;
         border-radius: 12px;
+        border: 1px solid #e9ecef;
         padding: 20px;
+        min-height: 600px;
     }
     """
         # Header
         gr.HTML("""
+        <div style='text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 12px; margin-bottom: 20px;'>
+            <h1 style='margin: 0; font-size: 2.5em;'>🔍 Document Layout Analysis</h1>
+            <p style='margin: 8px 0 0 0; font-size: 1.1em; opacity: 0.9;'>Advanced document structure detection</p>
         </div>
         """)
+        # Main content in two columns
+        with gr.Row():
+            # LEFT COLUMN - Controls (more compact)
+            with gr.Column(scale=1):
+                with gr.Group(elem_classes=["control-panel"]):
+                    # 1. Image Upload (first)
+                    gr.HTML("<h3>📄 Upload Image</h3>")
+                    input_img = gr.Image(
+                        label="Document Image",
+                        type="pil",
+                        height=300,
+                        interactive=True
                     )
+                    gr.HTML("<br><h3>🤖 Model Selection</h3>")
+                    # 2. Model Selection (second, without buttons)
+                    model_dropdown = gr.Dropdown(
+                        choices=list(MODELS.keys()),
+                        value="Egret XLarge",
+                        label="AI Model",
+                        info="Model will be loaded automatically",
+                        interactive=True
                     )
+                    gr.HTML("<br><h3>⚙️ Parameters</h3>")
+                    # 3. All parameters together (third)
+                    with gr.Row():
+                        conf_threshold = gr.Slider(
+                            minimum=0.0, maximum=1.0, value=0.6, step=0.05,
+                            label="Confidence", info="Detection threshold"
+                        )
+                        iou_threshold = gr.Slider(
+                            minimum=0.0, maximum=1.0, value=0.5, step=0.05,
+                            label="NMS IoU", info="Suppression threshold"
+                        )
+                    with gr.Row():
+                        nms_method = gr.Radio(
+                            choices=["Custom IoMin", "Standard IoU"],
+                            value="Custom IoMin",
+                            label="NMS Method", scale=2
+                        )
+                        alpha_slider = gr.Slider(
+                            minimum=0.0, maximum=1.0, value=0.3, step=0.1,
+                            label="Transparency", scale=1
+                        )
+                    gr.HTML("<br>")
+                    # 4. Analyze button (last)
+                    analyze_btn = gr.Button("🔍 Analyze Document", variant="primary", size="lg")
+            # RIGHT COLUMN - Results
+            with gr.Column(scale=1):
+                with gr.Group(elem_classes=["results-panel"]):
+                    gr.HTML("<h3>🎯 Analysis Results</h3>")
+                    output_img = gr.Image(
+                        label="Detected Layout",
+                        type="numpy",
+                        height=450,
+                        interactive=False
                     )
+                    detection_info = gr.Textbox(
+                        label="Detection Summary",
+                        value="",
+                        interactive=False,
+                        lines=2,
+                        placeholder="Results will appear here..."
+                    )
+                    # Labels toggle (independent control)
+                    gr.HTML("<h4>🎨 Visualization</h4>")
+                    show_labels_checkbox = gr.Checkbox(
+                        value=True,
+                        label="Show Class Labels",
+                        info="Toggle labels without reprocessing",
+                        interactive=True
                     )
+        # Event Handlers
+        # Main analysis (full processing)
+        analyze_btn.click(
             fn=process_image,
+            inputs=[input_img, model_dropdown, conf_threshold, iou_threshold, nms_method, alpha_slider, show_labels_checkbox],
+            outputs=[output_img, detection_info]
+        )
+        # Independent label toggle (no reprocessing)
+        show_labels_checkbox.change(
+            fn=toggle_labels_visualization,
+            inputs=[show_labels_checkbox, alpha_slider],
+            outputs=[output_img, detection_info]
+        )
+        # Also update visualization when transparency changes (if we have cached results)
+        alpha_slider.change(
+            fn=toggle_labels_visualization,
+            inputs=[show_labels_checkbox, alpha_slider],
             outputs=[output_img, detection_info]
         )