Spaces:

Godreign
/

DEIT

Sleeping

App Files Files Community

Godreign commited on Oct 9

Commit

b7be9e3

verified ·

1 Parent(s): 275c8e5

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -144

app.py CHANGED Viewed

@@ -10,36 +10,31 @@ import torchvision.transforms as T
 import urllib.request
 import json
 import cv2
-# ---------------------------
 # Model Configs
-# ---------------------------
 MODEL_CONFIGS = {
-    "DeiT-Tiny": {"type": "hf", "id": "facebook/deit-tiny-patch16-224"},
-    "DeiT-Small": {"type": "hf", "id": "facebook/deit-small-patch16-224"},
-    "ViT-Base": {"type": "hf", "id": "google/vit-base-patch16-224"},
-    "ConvNeXt-Tiny": {"type": "timm", "id": "convnext_tiny"},
-    "ConvNeXt-Nano": {"type": "timm", "id": "convnext_nano"},
-    "EfficientNet-B0": {"type": "efficientnet", "id": "efficientnet-b0"},
-    "EfficientNet-B1": {"type": "efficientnet", "id": "efficientnet-b1"},
-    "ResNet-50": {"type": "timm", "id": "resnet50"},
-    "MobileNet-V2": {"type": "timm", "id": "mobilenetv2_100"},
-    "MaxViT-Tiny": {"type": "timm", "id": "maxvit_tiny_tf_224"},
-    "MobileViT-Small": {"type": "timm", "id": "mobilevit_s"},
-    "EdgeNeXt-Small": {"type": "timm", "id": "edgenext_small"},
-    "RegNetY-002": {"type": "timm", "id": "regnety_002"}
 }
-# ---------------------------
 # ImageNet Labels
-# ---------------------------
 IMAGENET_URL = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
 with urllib.request.urlopen(IMAGENET_URL) as url:
     IMAGENET_LABELS = json.load(url)
-# ---------------------------
 # Lazy Load
-# ---------------------------
 loaded_models = {}
 def load_model(model_name):
@@ -51,20 +46,17 @@ def load_model(model_name):
         extractor = AutoFeatureExtractor.from_pretrained(config["id"])
         model = AutoModelForImageClassification.from_pretrained(config["id"], output_attentions=True)
         model.eval()
-        # Enable gradients for class-specific attention
         for param in model.parameters():
             param.requires_grad = True
     elif config["type"] == "timm":
         model = timm.create_model(config["id"], pretrained=True)
         model.eval()
-        # Enable gradients for class-specific attention
         for param in model.parameters():
             param.requires_grad = True
         extractor = None
     elif config["type"] == "efficientnet":
         model = EfficientNet.from_pretrained(config["id"])
         model.eval()
-        # Enable gradients for class-specific attention
         for param in model.parameters():
             param.requires_grad = True
         extractor = None
@@ -72,21 +64,14 @@ def load_model(model_name):
     loaded_models[model_name] = (model, extractor)
     return model, extractor
-# ---------------------------
 # Adversarial Noise
-# ---------------------------
 def add_adversarial_noise(image, epsilon):
-    """Add random noise to image"""
     img_array = np.array(image).astype(np.float32) / 255.0
     noise = np.random.randn(*img_array.shape) * epsilon
     noisy_img = np.clip(img_array + noise, 0, 1)
     return Image.fromarray((noisy_img * 255).astype(np.uint8))
-# ---------------------------
 # Grad-CAM for Class-Specific Attention
-# ---------------------------
 def get_gradcam_for_class(model, image_tensor, class_idx):
     grad = None
     fmap = None
@@ -99,7 +84,6 @@ def get_gradcam_for_class(model, image_tensor, class_idx):
         nonlocal grad
         grad = grad_out[0].detach()
-    # Find last conv layer
     last_conv = None
     for name, module in reversed(list(model.named_modules())):
         if isinstance(module, torch.nn.Conv2d):
@@ -128,15 +112,10 @@ def get_gradcam_for_class(model, image_tensor, class_idx):
     cam = cam.squeeze().cpu().numpy()
     cam = cv2.resize(cam, (224, 224))
     cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
     return cam
-# ---------------------------
 # ViT Attention for Class-Specific
-# ---------------------------
 def vit_attention_for_class(model, extractor, image, class_idx):
-    """Get attention map for specific class in ViT"""
     inputs = extractor(images=image, return_tensors="pt")
     inputs['pixel_values'].requires_grad = True
     outputs = model(**inputs)
@@ -145,7 +124,6 @@ def vit_attention_for_class(model, extractor, image, class_idx):
     model.zero_grad()
     score.backward()
-    # Use last layer attention
     if hasattr(outputs, 'attentions') and outputs.attentions is not None:
         attn = outputs.attentions[-1]
         attn = attn.mean(1)
@@ -157,10 +135,7 @@ def vit_attention_for_class(model, extractor, image, class_idx):
     return np.ones((14, 14))
-# ---------------------------
 # Grad-CAM Helper for CNNs (Top Prediction)
-# ---------------------------
 def get_gradcam(model, image_tensor):
     grad = None
     fmap = None
@@ -202,13 +177,9 @@ def get_gradcam(model, image_tensor):
     cam = cam.squeeze().cpu().numpy()
     cam = cv2.resize(cam, (224, 224))
     cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
     return cam
-# ---------------------------
 # ViT Attention Rollout Helper
-# ---------------------------
 def vit_attention_rollout(outputs):
     if not hasattr(outputs, 'attentions') or outputs.attentions is None:
         return np.ones((14, 14))
@@ -221,18 +192,12 @@ def vit_attention_rollout(outputs):
     attn_map = (attn_map - attn_map.min()) / (attn_map.max() - attn_map.min() + 1e-8)
     return attn_map
-# ---------------------------
 # Create Gradient Legend
-# ---------------------------
 def create_gradient_legend():
-    """Create a gradient legend image showing attention scale"""
     width, height = 400, 60
     gradient = np.zeros((height, width, 3), dtype=np.uint8)
-    # Create gradient from blue to red (matching COLORMAP_JET)
     for i in range(width):
-        # OpenCV's COLORMAP_JET: blue (low) -> cyan -> green -> yellow -> red (high)
         value = int(255 * i / width)
         color_single = np.array([[[value]]], dtype=np.uint8)
         color_rgb = cv2.applyColorMap(color_single, cv2.COLORMAP_JET)
@@ -240,22 +205,20 @@ def create_gradient_legend():
     gradient = cv2.cvtColor(gradient, cv2.COLOR_BGR2RGB)
-    # Convert to PIL and add text
     from PIL import ImageDraw, ImageFont
     gradient_pil = Image.fromarray(gradient)
     draw = ImageDraw.Draw(gradient_pil)
-    # Use default font
     try:
         font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 14)
     except:
         font = ImageFont.load_default()
-    # Add text labels
     draw.text((10, 20), "Low Attention", fill=(255, 255, 255), font=font)
     draw.text((width - 120, 20), "High Attention", fill=(255, 255, 255), font=font)
     return gradient_pil
 def overlay_attention(pil_img, attention_map):
     heatmap = (attention_map * 255).astype(np.uint8)
     heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
@@ -265,10 +228,7 @@ def overlay_attention(pil_img, attention_map):
     blended = Image.blend(pil_img.convert("RGB"), heatmap_pil, alpha=0.4)
     return blended
-# ---------------------------
 # Main Prediction Function
-# ---------------------------
 def predict(image, model_name, noise_level):
     try:
         if image is None:
@@ -277,7 +237,6 @@ def predict(image, model_name, noise_level):
         if model_name is None:
             return {"Error": "Please select a model"}, None, None
-        # Apply adversarial noise if requested
         if noise_level > 0:
             image = add_adversarial_noise(image, noise_level)
@@ -285,8 +244,7 @@ def predict(image, model_name, noise_level):
         transform = T.Compose([
             T.Resize((224, 224)),
             T.ToTensor(),
-            T.Normalize(mean=[0.485, 0.456, 0.406],
-                        std=[0.229, 0.224, 0.225])
         ])
         if MODEL_CONFIGS[model_name]["type"] == "hf":
@@ -320,10 +278,7 @@ def predict(image, model_name, noise_level):
         print(error_msg)
         return {"Error": str(e)}, None, None
-# ---------------------------
 # Class-Specific Attention
-# ---------------------------
 def get_class_specific_attention(image, model_name, class_query):
     try:
         if image is None:
@@ -332,7 +287,6 @@ def get_class_specific_attention(image, model_name, class_query):
         if not class_query or class_query.strip() == "":
             return None, None, "Please enter a class name"
-        # Find matching class
         class_query_lower = class_query.lower().strip()
         matching_idx = None
         matched_label = None
@@ -340,7 +294,6 @@ def get_class_specific_attention(image, model_name, class_query):
         model, extractor = load_model(model_name)
         if MODEL_CONFIGS[model_name]["type"] == "hf":
-            # Search in HF model labels
             for idx, label in model.config.id2label.items():
                 if class_query_lower in label.lower():
                     matching_idx = idx
@@ -350,11 +303,9 @@ def get_class_specific_attention(image, model_name, class_query):
             if matching_idx is None:
                 return None, None, f"Class '{class_query}' not found in model labels. Try a different class name or check sample classes."
-            # Get attention for this class
             att_map = vit_attention_for_class(model, extractor, image, matching_idx)
         else:
-            # Search in ImageNet labels
             for idx, label in enumerate(IMAGENET_LABELS):
                 if class_query_lower in label.lower():
                     matching_idx = idx
@@ -364,12 +315,10 @@ def get_class_specific_attention(image, model_name, class_query):
             if matching_idx is None:
                 return None, None, f"Class '{class_query}' not found in ImageNet labels. Try a different class name or check sample classes."
-            # Get Grad-CAM for this class
             transform = T.Compose([
                 T.Resize((224, 224)),
                 T.ToTensor(),
-                T.Normalize(mean=[0.485, 0.456, 0.406],
-                            std=[0.229, 0.224, 0.225])
             ])
             x = transform(image).unsqueeze(0)
             x.requires_grad = True
@@ -385,10 +334,7 @@ def get_class_specific_attention(image, model_name, class_query):
         print(error_trace)
         return None, None, f"Error generating attention map: {str(e)}"
-# ---------------------------
 # Sample Classes
-# ---------------------------
 SAMPLE_CLASSES = [
     "cat", "dog", "tiger", "lion", "elephant",
     "car", "truck", "airplane", "ship", "train",
@@ -397,91 +343,122 @@ SAMPLE_CLASSES = [
     "person", "bicycle", "building", "tree", "flower"
 ]
-# ---------------------------
-# Gradio UI
-# ---------------------------
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🧠 Enhanced Multi-Model Image Classifier")
-    gr.Markdown("### Features: Adversarial Examples | Class-Specific Attention | 13+ Models")
-    with gr.Row():
-        with gr.Column(scale=1):
-            input_image = gr.Image(type="pil", label="📸 Upload Image")
-            model_dropdown = gr.Dropdown(
-                choices=list(MODEL_CONFIGS.keys()),
-                label="🤖 Select Model",
-                value="DeiT-Tiny"
-            )
-            gr.Markdown("### 🎭 Adversarial Noise")
-            noise_slider = gr.Slider(
-                minimum=0,
-                maximum=0.3,
-                value=0,
-                step=0.01,
-                label="Noise Level (ε)",
-                info="Add random noise to test model robustness"
-            )
-            run_button = gr.Button("🚀 Run Model", variant="primary")
-        with gr.Column(scale=2):
-            output_label = gr.Label(num_top_classes=5, label="🎯 Top 5 Predictions")
-            output_image = gr.Image(label="🔍 Attention Map (Top Prediction)")
-            processed_image = gr.Image(label="🖼️ Processed Image (with noise if applied)", visible=False)
-    gr.Markdown("---")
-    gr.Markdown("### 🎨 Class-Specific Attention Visualization")
-    gr.Markdown("*Type any class name to see where the model looks for that specific object*")
-    with gr.Row():
-        with gr.Column(scale=1):
-            class_input = gr.Textbox(
-                label="🔍 Enter Class Name",
-                placeholder="e.g., cat, dog, car, pizza...",
-                info="Type any ImageNet class name"
-            )
-            class_button = gr.Button("🎯 Generate Class-Specific Attention", variant="primary")
-            gr.Markdown("**💡 Sample classes to try:**")
-            sample_buttons = gr.Radio(
-                choices=SAMPLE_CLASSES,
-                label="Click to auto-fill",
-                interactive=True
-            )
-        with gr.Column(scale=2):
-            class_output_image = gr.Image(label="🔍 Class-Specific Attention Map")
-            class_status = gr.Textbox(label="Status", interactive=False)
-    gr.Markdown("---")
     gr.Markdown("""
-    ### 💡 Tips:
-    - **Adversarial Noise**: Adjust the slider to add random noise and see how robust the model is
-    - **Class-Specific Attention**: Type any ImageNet class to visualize what the model looks for (e.g., "tiger", "sports car", "pizza")
-    - **Model Variety**: Try different architectures (ViT, CNN, Hybrid) to compare their behavior
-    """)
-    # Event handlers
     run_button.click(
-        predict,
         inputs=[input_image, model_dropdown, noise_slider],
-        outputs=[output_label, output_image, processed_image]
     )
-    # When user selects a sample class, update the text input
     sample_buttons.change(
-        lambda x: x,
         inputs=[sample_buttons],
         outputs=[class_input]
     )
-    # Generate attention map
     class_button.click(
-        get_class_specific_attention,
         inputs=[input_image, model_dropdown, class_input],
-        outputs=[class_output_image, gradient_legend, class_status]
     )
 if __name__ == "__main__":
     demo.launch()

 import urllib.request
 import json
 import cv2
+import uuid
 # Model Configs
 MODEL_CONFIGS = {
+    "DeiT-Tiny": {"type": "hf", "id": "facebook/deit-tiny-patch16-224", "desc": "Lightweight Vision Transformer"},
+    "DeiT-Small": {"type": "hf", "id": "facebook/deit-small-patch16-224", "desc": "Small Vision Transformer"},
+    "ViT-Base": {"type": "hf", "id": "google/vit-base-patch16-224", "desc": "Base Vision Transformer"},
+    "ConvNeXt-Tiny": {"type": "timm", "id": "convnext_tiny", "desc": "Tiny ConvNeXt CNN"},
+    "ConvNeXt-Nano": {"type": "timm", "id": "convnext_nano", "desc": "Nano ConvNeXt CNN"},
+    "EfficientNet-B0": {"type": "efficientnet", "id": "efficientnet-b0", "desc": "EfficientNet B0"},
+    "EfficientNet-B1": {"type": "efficientnet", "id": "efficientnet-b1", "desc": "EfficientNet B1"},
+    "ResNet-50": {"type": "timm", "id": "resnet50", "desc": "Classic ResNet-50 CNN"},
+    "MobileNet-V2": {"type": "timm", "id": "mobilenetv2_100", "desc": "Lightweight MobileNet-V2"},
+    "MaxViT-Tiny": {"type": "timm", "id": "maxvit_tiny_tf_224", "desc": "Tiny MaxViT Hybrid"},
+    "MobileViT-Small": {"type": "timm", "id": "mobilevit_s", "desc": "Small MobileViT"},
+    "EdgeNeXt-Small": {"type": "timm", "id": "edgenext_small", "desc": "Small EdgeNeXt"},
+    "RegNetY-002": {"type": "timm", "id": "regnety_002", "desc": "RegNetY-002 CNN"}
 }
 # ImageNet Labels
 IMAGENET_URL = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
 with urllib.request.urlopen(IMAGENET_URL) as url:
     IMAGENET_LABELS = json.load(url)
 # Lazy Load
 loaded_models = {}
 def load_model(model_name):
         extractor = AutoFeatureExtractor.from_pretrained(config["id"])
         model = AutoModelForImageClassification.from_pretrained(config["id"], output_attentions=True)
         model.eval()
         for param in model.parameters():
             param.requires_grad = True
     elif config["type"] == "timm":
         model = timm.create_model(config["id"], pretrained=True)
         model.eval()
         for param in model.parameters():
             param.requires_grad = True
         extractor = None
     elif config["type"] == "efficientnet":
         model = EfficientNet.from_pretrained(config["id"])
         model.eval()
         for param in model.parameters():
             param.requires_grad = True
         extractor = None
     loaded_models[model_name] = (model, extractor)
     return model, extractor
 # Adversarial Noise
 def add_adversarial_noise(image, epsilon):
     img_array = np.array(image).astype(np.float32) / 255.0
     noise = np.random.randn(*img_array.shape) * epsilon
     noisy_img = np.clip(img_array + noise, 0, 1)
     return Image.fromarray((noisy_img * 255).astype(np.uint8))
 # Grad-CAM for Class-Specific Attention
 def get_gradcam_for_class(model, image_tensor, class_idx):
     grad = None
     fmap = None
         nonlocal grad
         grad = grad_out[0].detach()
     last_conv = None
     for name, module in reversed(list(model.named_modules())):
         if isinstance(module, torch.nn.Conv2d):
     cam = cam.squeeze().cpu().numpy()
     cam = cv2.resize(cam, (224, 224))
     cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
     return cam
 # ViT Attention for Class-Specific
 def vit_attention_for_class(model, extractor, image, class_idx):
     inputs = extractor(images=image, return_tensors="pt")
     inputs['pixel_values'].requires_grad = True
     outputs = model(**inputs)
     model.zero_grad()
     score.backward()
     if hasattr(outputs, 'attentions') and outputs.attentions is not None:
         attn = outputs.attentions[-1]
         attn = attn.mean(1)
     return np.ones((14, 14))
 # Grad-CAM Helper for CNNs (Top Prediction)
 def get_gradcam(model, image_tensor):
     grad = None
     fmap = None
     cam = cam.squeeze().cpu().numpy()
     cam = cv2.resize(cam, (224, 224))
     cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
     return cam
 # ViT Attention Rollout Helper
 def vit_attention_rollout(outputs):
     if not hasattr(outputs, 'attentions') or outputs.attentions is None:
         return np.ones((14, 14))
     attn_map = (attn_map - attn_map.min()) / (attn_map.max() - attn_map.min() + 1e-8)
     return attn_map
 # Create Gradient Legend
 def create_gradient_legend():
     width, height = 400, 60
     gradient = np.zeros((height, width, 3), dtype=np.uint8)
     for i in range(width):
         value = int(255 * i / width)
         color_single = np.array([[[value]]], dtype=np.uint8)
         color_rgb = cv2.applyColorMap(color_single, cv2.COLORMAP_JET)
     gradient = cv2.cvtColor(gradient, cv2.COLOR_BGR2RGB)
     from PIL import ImageDraw, ImageFont
     gradient_pil = Image.fromarray(gradient)
     draw = ImageDraw.Draw(gradient_pil)
     try:
         font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 14)
     except:
         font = ImageFont.load_default()
     draw.text((10, 20), "Low Attention", fill=(255, 255, 255), font=font)
     draw.text((width - 120, 20), "High Attention", fill=(255, 255, 255), font=font)
     return gradient_pil
 def overlay_attention(pil_img, attention_map):
     heatmap = (attention_map * 255).astype(np.uint8)
     heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
     blended = Image.blend(pil_img.convert("RGB"), heatmap_pil, alpha=0.4)
     return blended
 # Main Prediction Function
 def predict(image, model_name, noise_level):
     try:
         if image is None:
         if model_name is None:
             return {"Error": "Please select a model"}, None, None
         if noise_level > 0:
             image = add_adversarial_noise(image, noise_level)
         transform = T.Compose([
             T.Resize((224, 224)),
             T.ToTensor(),
+            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
         ])
         if MODEL_CONFIGS[model_name]["type"] == "hf":
         print(error_msg)
         return {"Error": str(e)}, None, None
 # Class-Specific Attention
 def get_class_specific_attention(image, model_name, class_query):
     try:
         if image is None:
         if not class_query or class_query.strip() == "":
             return None, None, "Please enter a class name"
         class_query_lower = class_query.lower().strip()
         matching_idx = None
         matched_label = None
         model, extractor = load_model(model_name)
         if MODEL_CONFIGS[model_name]["type"] == "hf":
             for idx, label in model.config.id2label.items():
                 if class_query_lower in label.lower():
                     matching_idx = idx
             if matching_idx is None:
                 return None, None, f"Class '{class_query}' not found in model labels. Try a different class name or check sample classes."
             att_map = vit_attention_for_class(model, extractor, image, matching_idx)
         else:
             for idx, label in enumerate(IMAGENET_LABELS):
                 if class_query_lower in label.lower():
                     matching_idx = idx
             if matching_idx is None:
                 return None, None, f"Class '{class_query}' not found in ImageNet labels. Try a different class name or check sample classes."
             transform = T.Compose([
                 T.Resize((224, 224)),
                 T.ToTensor(),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
             ])
             x = transform(image).unsqueeze(0)
             x.requires_grad = True
         print(error_trace)
         return None, None, f"Error generating attention map: {str(e)}"
 # Sample Classes
 SAMPLE_CLASSES = [
     "cat", "dog", "tiger", "lion", "elephant",
     "car", "truck", "airplane", "ship", "train",
     "person", "bicycle", "building", "tree", "flower"
 ]
+# Improved Gradio UI
+with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="gray", font=["Inter", "sans-serif"])) as demo:
+    gr.Markdown("""
+        # 🧠 Advanced Image Classification Studio
+        Explore state-of-the-art image classification with multiple models, adversarial testing, and attention visualization.
+        """, elem_classes=["header-text"])
+    with gr.Tabs() as tabs:
+        with gr.TabItem("🔍 Predict & Analyze"):
+            with gr.Row(variant="panel"):
+                with gr.Column(scale=1, min_width=300):
+                    gr.Markdown("### 📷 Input")
+                    input_image = gr.Image(type="pil", label="Upload Image", height=300, interactive=True, tool="editor")
+                    model_dropdown = gr.Dropdown(
+                        choices=[f"{name} - {MODEL_CONFIGS[name]['desc']}" for name in MODEL_CONFIGS.keys()],
+                        label="Select Model",
+                        value="DeiT-Tiny - Lightweight Vision Transformer",
+                        interactive=True,
+                        info="Choose from various architectures (Transformers, CNNs, Hybrids)"
+                    )
+                    with gr.Group():
+                        gr.Markdown("### 🎭 Adversarial Testing")
+                        noise_slider = gr.Slider(
+                            minimum=0, maximum=0.3, value=0, step=0.01,
+                            label="Noise Level (ε)",
+                            info="Add noise to test model robustness",
+                            interactive=True
+                        )
+                    run_button = gr.Button("🚀 Run Prediction", variant="primary", scale=0)
+                with gr.Column(scale=2):
+                    gr.Markdown("### 📊 Results")
+                    output_label = gr.Label(num_top_classes=5, label="Top 5 Predictions", show_label=True)
+                    with gr.Row():
+                        output_image = gr.Image(label="Attention Map (Top Prediction)", height=300)
+                        processed_image = gr.Image(label="Processed Image (with noise)", height=300, visible=False)
+        with gr.TabItem("🎨 Class-Specific Attention"):
+            gr.Markdown("### Visualize Model Attention for Specific Classes")
+            with gr.Row(variant="panel"):
+                with gr.Column(scale=1, min_width=300):
+                    class_input = gr.Textbox(
+                        label="Enter Class Name",
+                        placeholder="e.g., cat, dog, car, pizza...",
+                        info="Type any ImageNet class name",
+                        interactive=True
+                    )
+                    class_button = gr.Button("🎯 Generate Attention Map", variant="primary")
+                    with gr.Accordion("💡 Sample Classes", open=False):
+                        sample_buttons = gr.CheckboxGroup(
+                            choices=SAMPLE_CLASSES,
+                            label="Select or click to auto-fill",
+                            interactive=True
+                        )
+                with gr.Column(scale=2):
+                    class_output_image = gr.Image(label="Class-Specific Attention Map", height=300)
+                    gradient_legend = gr.Image(label="Attention Scale", interactive=False)
+                    class_status = gr.Textbox(label="Status", interactive=False, lines=2)
+        with gr.TabItem("ℹ️ About Models"):
+            gr.Markdown("""
+                ### Available Models
+                Explore different architectures and their strengths:
+                """)
+            for model_name, config in MODEL_CONFIGS.items():
+                with gr.Accordion(f"{model_name}", open=False):
+                    gr.Markdown(f"- **Type**: {config['type'].upper()}")
+                    gr.Markdown(f"- **Description**: {config['desc']}")
+                    gr.Markdown(f"- **Model ID**: {config['id']}")
     gr.Markdown("""
+        ---
+        ### 💡 How to Use
+        - **Predict & Analyze**: Upload an image, select a model, adjust noise level, and run prediction to see top classes and attention maps.
+        - **Class-Specific Attention**: Enter a class name or select from samples to visualize where the model focuses for that class.
+        - **Adversarial Testing**: Use the noise slider to test model robustness against perturbations.
+        - **Model Info**: Check the 'About Models' tab for details on available architectures.
+        """, elem_classes=["footer-text"])
+    # Event Handlers
+    def update_class_input(selected_classes):
+        return selected_classes[0] if selected_classes else ""
     run_button.click(
+        fn=predict,
         inputs=[input_image, model_dropdown, noise_slider],
+        outputs=[output_label, output_image, processed_image],
+        show_progress=True
     )
     sample_buttons.change(
+        fn=update_class_input,
         inputs=[sample_buttons],
         outputs=[class_input]
     )
     class_button.click(
+        fn=get_class_specific_attention,
         inputs=[input_image, model_dropdown, class_input],
+        outputs=[class_output_image, gradient_legend, class_status],
+        show_progress=True
     )
+    # Add custom CSS for improved styling
+    gr.HTML("""
+        <style>
+            .header-text { font-size: 2rem; font-weight: bold; color: #1E3A8A; margin-bottom: 1rem; }
+            .footer-text { font-size: 0.9rem; color: #4B5563; }
+            .gr-button { transition: all 0.3s ease; }
+            .gr-button:hover { transform: scale(1.05); }
+            .gr-panel { border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }
+            .gr-image { border-radius: 8px; }
+            .gr-accordion { margin-bottom: 1rem; }
+        </style>
+    """)
 if __name__ == "__main__":
     demo.launch()