Spaces:

sagar007
/

SegmentVision

Sleeping

App Files Files Community

sagar007 commited on Mar 25

Commit

eba2946

verified ·

1 Parent(s): 2cfae42

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -70

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import numpy as np
 import random
 import os
 import wget # To download weights
 # --- Configuration & Model Loading ---
@@ -30,39 +31,68 @@ def load_clip_model():
         print(f"CLIP model loaded to {DEVICE}.")
 # --- FastSAM Setup ---
-# Use a smaller model suitable for Spaces CPU/basic GPU if needed
 FASTSAM_CHECKPOINT = "FastSAM-s.pt"
-FASTSAM_CHECKPOINT_URL = f"https://huggingface.co/spaces/An-619/FastSAM/resolve/main/{FASTSAM_CHECKPOINT}" # Example URL, find official if possible
 fastsam_model = None
 def download_fastsam_weights():
     if not os.path.exists(FASTSAM_CHECKPOINT):
-        print(f"Downloading FastSAM weights: {FASTSAM_CHECKPOINT}...")
         try:
             wget.download(FASTSAM_CHECKPOINT_URL, FASTSAM_CHECKPOINT)
             print("FastSAM weights downloaded.")
         except Exception as e:
             print(f"Error downloading FastSAM weights: {e}")
             print("Please ensure the URL is correct and reachable, or manually place the weights file.")
             return False
     return os.path.exists(FASTSAM_CHECKPOINT)
 def load_fastsam_model():
     global fastsam_model
     if fastsam_model is None:
-        if download_fastsam_weights():
             try:
-                from fastsam import FastSAM, FastSAMPrompt # Import here after potential download
                 print(f"Loading FastSAM model: {FASTSAM_CHECKPOINT}...")
                 fastsam_model = FastSAM(FASTSAM_CHECKPOINT)
-                print(f"FastSAM model loaded.") # Device handled internally by FastSAM based on its setup/torch device
-            except ImportError:
-                print("Error: 'fastsam' library not found. Please install it (pip install fastsam).")
             except Exception as e:
                 print(f"Error loading FastSAM model: {e}")
         else:
-            print("FastSAM weights not found. Cannot load model.")
 # --- Processing Functions ---
@@ -74,14 +104,16 @@ def run_clip_zero_shot(image: Image.Image, text_labels: str):
         if clip_model is None:
              return "Error: CLIP Model not loaded. Check logs.", None
-    if not text_labels:
-        return "Please provide comma-separated text labels.", None
     if image is None:
-        return "Please upload an image.", None
-    labels = [label.strip() for label in text_labels.split(',')]
     if not labels:
-         return "No valid labels provided.", None
     print(f"Running CLIP zero-shot classification with labels: {labels}")
@@ -94,28 +126,36 @@ def run_clip_zero_shot(image: Image.Image, text_labels: str):
         with torch.no_grad():
             outputs = clip_model(**inputs)
-            logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
-            probs = logits_per_image.softmax(dim=1)  # convert to probabilities
         print("CLIP processing complete.")
-        # Format output for Gradio Label
         confidences = {labels[i]: float(probs[0, i].item()) for i in range(len(labels))}
-        return confidences, image # Return original image for display alongside results
     except Exception as e:
         print(f"Error during CLIP processing: {e}")
-        return f"An error occurred: {e}", None
 # FastSAM Segmentation Function
 def run_fastsam_segmentation(image_pil: Image.Image, conf_threshold: float = 0.4, iou_threshold: float = 0.9):
     if fastsam_model is None:
-        load_fastsam_model() # Attempt to load if not already loaded
         if fastsam_model is None:
-             return "Error: FastSAM Model not loaded. Check logs.", None
     if image_pil is None:
-        return "Please upload an image.", None
     print("Running FastSAM segmentation...")
@@ -124,63 +164,52 @@ def run_fastsam_segmentation(image_pil: Image.Image, conf_threshold: float = 0.4
         if image_pil.mode != "RGB":
             image_pil = image_pil.convert("RGB")
-        # FastSAM expects a BGR numpy array or path usually. Let's try with RGB numpy.
-        # If it fails, uncomment the BGR conversion line.
         image_np_rgb = np.array(image_pil)
-        # image_np_bgr = image_np_rgb[:, :, ::-1] # Convert RGB to BGR if needed
         # Run FastSAM inference
-        # Adjust imgsz, conf, iou as needed. Higher imgsz = more detail, slower.
         everything_results = fastsam_model(
-            image_np_rgb, # Use image_np_bgr if conversion needed
             device=DEVICE,
             retina_masks=True,
-            imgsz=640, # Smaller size for faster inference on limited hardware
             conf=conf_threshold,
             iou=iou_threshold,
         )
-        # Process results using FastSAMPrompt
-        from fastsam import FastSAMPrompt # Make sure it's imported
         prompt_process = FastSAMPrompt(image_np_rgb, everything_results, device=DEVICE)
-        # Get all annotations (masks)
         ann = prompt_process.everything_prompt()
-        print(f"FastSAM found {len(ann[0]['masks']) if ann and ann[0] else 0} masks.")
-        # --- Plotting Masks on Image (Manual) ---
         output_image = image_pil.copy()
         if ann and ann[0] is not None and 'masks' in ann[0] and len(ann[0]['masks']) > 0:
-            masks = ann[0]['masks'].cpu().numpy() # shape (N, H, W)
-            # Create overlay image
             overlay = Image.new('RGBA', output_image.size, (0, 0, 0, 0))
             draw = ImageDraw.Draw(overlay)
             for i in range(masks.shape[0]):
-                mask = masks[i] # shape (H, W), boolean
-                # Generate random color with some transparency
-                color = (random.randint(50, 255), random.randint(50, 255), random.randint(50, 255), 128) # RGBA with alpha
-                # Create a single-channel image from the boolean mask
                 mask_image = Image.fromarray((mask * 255).astype(np.uint8), mode='L')
-                # Apply color to the mask area on the overlay
                 draw.bitmap((0,0), mask_image, fill=color)
-            # Composite the overlay onto the original image
             output_image = Image.alpha_composite(output_image.convert('RGBA'), overlay).convert('RGB')
         print("FastSAM processing and plotting complete.")
-        return output_image, image_pil # Return segmented and original images
     except Exception as e:
         print(f"Error during FastSAM processing: {e}")
-        import traceback
-        traceback.print_exc() # Print detailed traceback
-        return f"An error occurred: {e}", None
 # --- Gradio Interface ---
@@ -188,7 +217,7 @@ def run_fastsam_segmentation(image_pil: Image.Image, conf_threshold: float = 0.4
 # Pre-load models on startup (optional but good for performance)
 print("Attempting to preload models...")
 load_clip_model()
-load_fastsam_model()
 print("Preloading finished (or attempted).")
@@ -203,11 +232,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             with gr.Row():
                 with gr.Column(scale=1):
                     clip_input_image = gr.Image(type="pil", label="Input Image")
-                    clip_text_labels = gr.Textbox(label="Comma-Separated Labels", placeholder="e.g., astronaut, mountain, dog playing fetch")
                     clip_button = gr.Button("Run CLIP Classification", variant="primary")
                 with gr.Column(scale=1):
                     clip_output_label = gr.Label(label="Classification Probabilities")
-                    clip_output_image_display = gr.Image(type="pil", label="Input Image Preview") # Show input for context
             clip_button.click(
                 run_clip_zero_shot,
@@ -218,11 +247,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 examples=[
                     ["examples/astronaut.jpg", "astronaut, moon, rover, mountain"],
                     ["examples/dog_bike.jpg", "dog, bicycle, person, park, grass"],
                 ],
                 inputs=[clip_input_image, clip_text_labels],
                 outputs=[clip_output_label, clip_output_image_display],
                 fn=run_clip_zero_shot,
-                cache_examples=False, # Re-run for live demo
             )
         # --- FastSAM Tab ---
@@ -237,41 +267,51 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     fastsam_button = gr.Button("Run FastSAM Segmentation", variant="primary")
                 with gr.Column(scale=1):
                     fastsam_output_image = gr.Image(type="pil", label="Segmented Image")
-                    # fastsam_input_display = gr.Image(type="pil", label="Original Image") # Optional: show original side-by-side
             fastsam_button.click(
                 run_fastsam_segmentation,
                 inputs=[fastsam_input_image, fastsam_conf, fastsam_iou],
-                outputs=[fastsam_output_image] # Removed the second output for simplicity, adjust if needed
             )
             gr.Examples(
                 examples=[
                     ["examples/dogs.jpg", 0.4, 0.9],
                     ["examples/fruits.jpg", 0.5, 0.8],
                 ],
                 inputs=[fastsam_input_image, fastsam_conf, fastsam_iou],
                 outputs=[fastsam_output_image],
                 fn=run_fastsam_segmentation,
-                cache_examples=False, # Re-run for live demo
             )
     # Add example images (optional, but helpful)
-    # Create an 'examples' folder and add some jpg images like 'astronaut.jpg', 'dog_bike.jpg', 'dogs.jpg', 'fruits.jpg'
     if not os.path.exists("examples"):
         os.makedirs("examples")
-        print("Created 'examples' directory. Please add some images (e.g., astronaut.jpg, dog_bike.jpg) for the examples to work.")
-        # You might need to download some sample images here too if running on a fresh env
-        try:
-            print("Downloading example images...")
-            wget.download("https://huggingface.co/spaces/gradio/image-segmentation/resolve/main/images/lion.jpg", "examples/lion.jpg")
-            wget.download("https://raw.githubusercontent.com/openai/CLIP/main/CLIP.png", "examples/clip_logo.png")
-            wget.download("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/gradio-logo.png", "examples/gradio_logo.png")
-            # Manually add the examples used above if these don't match
-            print("Example images downloaded (or attempted). Please verify.")
-        except Exception as e:
-             print(f"Could not download example images: {e}")
 # Launch the Gradio app
 if __name__ == "__main__":
-    demo.launch(debug=True) # Set debug=False for deployment

 import random
 import os
 import wget # To download weights
+import traceback # For detailed error printing
 # --- Configuration & Model Loading ---
         print(f"CLIP model loaded to {DEVICE}.")
 # --- FastSAM Setup ---
 FASTSAM_CHECKPOINT = "FastSAM-s.pt"
+# Use the official model hub repo URL
+FASTSAM_CHECKPOINT_URL = f"https://huggingface.co/CASIA-IVA-Lab/FastSAM-s/resolve/main/{FASTSAM_CHECKPOINT}"
 fastsam_model = None
+fastsam_lib_imported = False # Flag to check if import worked
+def check_and_import_fastsam():
+    global fastsam_lib_imported
+    if not fastsam_lib_imported:
+        try:
+            from fastsam import FastSAM, FastSAMPrompt
+            globals()['FastSAM'] = FastSAM # Make classes available globally
+            globals()['FastSAMPrompt'] = FastSAMPrompt
+            fastsam_lib_imported = True
+            print("fastsam library imported successfully.")
+        except ImportError:
+            print("Error: 'fastsam' library not found or import failed.")
+            print("Please ensure 'fastsam' is installed correctly (pip install fastsam).")
+            fastsam_lib_imported = False
+        except Exception as e:
+            print(f"An unexpected error occurred during fastsam import: {e}")
+            fastsam_lib_imported = False
+    return fastsam_lib_imported
 def download_fastsam_weights():
     if not os.path.exists(FASTSAM_CHECKPOINT):
+        print(f"Downloading FastSAM weights: {FASTSAM_CHECKPOINT} from {FASTSAM_CHECKPOINT_URL}...")
         try:
             wget.download(FASTSAM_CHECKPOINT_URL, FASTSAM_CHECKPOINT)
             print("FastSAM weights downloaded.")
         except Exception as e:
             print(f"Error downloading FastSAM weights: {e}")
             print("Please ensure the URL is correct and reachable, or manually place the weights file.")
+            # Attempt to remove partially downloaded file if exists
+            if os.path.exists(FASTSAM_CHECKPOINT):
+                 try:
+                     os.remove(FASTSAM_CHECKPOINT)
+                 except OSError:
+                     pass # Ignore removal errors
             return False
     return os.path.exists(FASTSAM_CHECKPOINT)
 def load_fastsam_model():
     global fastsam_model
     if fastsam_model is None:
+        if not check_and_import_fastsam(): # Check import first
+             print("Cannot load FastSAM model because the library couldn't be imported.")
+             return # Exit if import failed
+        if download_fastsam_weights(): # Check download/existence second
             try:
+                # FastSAM class should be available via globals() now
                 print(f"Loading FastSAM model: {FASTSAM_CHECKPOINT}...")
                 fastsam_model = FastSAM(FASTSAM_CHECKPOINT)
+                print(f"FastSAM model loaded.") # Device handled internally by FastSAM
             except Exception as e:
                 print(f"Error loading FastSAM model: {e}")
+                traceback.print_exc()
         else:
+            print("FastSAM weights not found or download failed. Cannot load model.")
 # --- Processing Functions ---
         if clip_model is None:
              return "Error: CLIP Model not loaded. Check logs.", None
     if image is None:
+        return "Please upload an image.", None # Return None for the image display
+    if not text_labels:
+        # Return empty results but display the uploaded image
+        return {}, image
+    labels = [label.strip() for label in text_labels.split(',') if label.strip()] # Ensure non-empty labels
     if not labels:
+         # Return empty results but display the uploaded image
+         return {}, image
     print(f"Running CLIP zero-shot classification with labels: {labels}")
         with torch.no_grad():
             outputs = clip_model(**inputs)
+            logits_per_image = outputs.logits_per_image
+            probs = logits_per_image.softmax(dim=1)
         print("CLIP processing complete.")
         confidences = {labels[i]: float(probs[0, i].item()) for i in range(len(labels))}
+        # Return results and the original image used for prediction
+        return confidences, image
     except Exception as e:
         print(f"Error during CLIP processing: {e}")
+        traceback.print_exc()
+        # Return error message and the original image
+        return f"An error occurred during CLIP: {e}", image
 # FastSAM Segmentation Function
 def run_fastsam_segmentation(image_pil: Image.Image, conf_threshold: float = 0.4, iou_threshold: float = 0.9):
+    # Ensure model is loaded or attempt to load
     if fastsam_model is None:
+        load_fastsam_model()
         if fastsam_model is None:
+             # Return error message string for the image component (Gradio handles this)
+             return "Error: FastSAM Model not loaded. Check logs."
+    # Ensure library was imported
+    if not fastsam_lib_imported:
+        return "Error: FastSAM library not available. Cannot run segmentation."
     if image_pil is None:
+        return "Please upload an image."
     print("Running FastSAM segmentation...")
         if image_pil.mode != "RGB":
             image_pil = image_pil.convert("RGB")
         image_np_rgb = np.array(image_pil)
         # Run FastSAM inference
         everything_results = fastsam_model(
+            image_np_rgb,
             device=DEVICE,
             retina_masks=True,
+            imgsz=640,
             conf=conf_threshold,
             iou=iou_threshold,
         )
+        # FastSAMPrompt should be available via globals() if import succeeded
         prompt_process = FastSAMPrompt(image_np_rgb, everything_results, device=DEVICE)
         ann = prompt_process.everything_prompt()
+        print(f"FastSAM found {len(ann[0]['masks']) if ann and ann[0] and 'masks' in ann[0] else 0} masks.")
+        # --- Plotting Masks on Image ---
         output_image = image_pil.copy()
         if ann and ann[0] is not None and 'masks' in ann[0] and len(ann[0]['masks']) > 0:
+            masks = ann[0]['masks'].cpu().numpy() # (N, H, W) boolean
             overlay = Image.new('RGBA', output_image.size, (0, 0, 0, 0))
             draw = ImageDraw.Draw(overlay)
             for i in range(masks.shape[0]):
+                mask = masks[i]
+                color = (random.randint(50, 255), random.randint(50, 255), random.randint(50, 255), 128) # RGBA
                 mask_image = Image.fromarray((mask * 255).astype(np.uint8), mode='L')
                 draw.bitmap((0,0), mask_image, fill=color)
             output_image = Image.alpha_composite(output_image.convert('RGBA'), overlay).convert('RGB')
         print("FastSAM processing and plotting complete.")
+        # *** FIX: Return ONLY the output image for the single Image component ***
+        return output_image
+    except NameError as ne:
+         print(f"NameError during FastSAM processing: {ne}. Was the fastsam library imported correctly?")
+         traceback.print_exc()
+         return f"A NameError occurred: {ne}. Check library import."
     except Exception as e:
         print(f"Error during FastSAM processing: {e}")
+        traceback.print_exc()
+        return f"An error occurred during FastSAM: {e}"
 # --- Gradio Interface ---
 # Pre-load models on startup (optional but good for performance)
 print("Attempting to preload models...")
 load_clip_model()
+load_fastsam_model() # This will now also attempt download/check import
 print("Preloading finished (or attempted).")
             with gr.Row():
                 with gr.Column(scale=1):
                     clip_input_image = gr.Image(type="pil", label="Input Image")
+                    clip_text_labels = gr.Textbox(label="Comma-Separated Labels", placeholder="e.g., astronaut, moon, dog playing fetch")
                     clip_button = gr.Button("Run CLIP Classification", variant="primary")
                 with gr.Column(scale=1):
                     clip_output_label = gr.Label(label="Classification Probabilities")
+                    clip_output_image_display = gr.Image(type="pil", label="Input Image Preview")
             clip_button.click(
                 run_clip_zero_shot,
                 examples=[
                     ["examples/astronaut.jpg", "astronaut, moon, rover, mountain"],
                     ["examples/dog_bike.jpg", "dog, bicycle, person, park, grass"],
+                    ["examples/clip_logo.png", "logo, text, graphics, abstract art"], # Added another example
                 ],
                 inputs=[clip_input_image, clip_text_labels],
                 outputs=[clip_output_label, clip_output_image_display],
                 fn=run_clip_zero_shot,
+                cache_examples=False,
             )
         # --- FastSAM Tab ---
                     fastsam_button = gr.Button("Run FastSAM Segmentation", variant="primary")
                 with gr.Column(scale=1):
                     fastsam_output_image = gr.Image(type="pil", label="Segmented Image")
             fastsam_button.click(
                 run_fastsam_segmentation,
                 inputs=[fastsam_input_image, fastsam_conf, fastsam_iou],
+                # Output is now correctly mapped to the single component
+                outputs=[fastsam_output_image]
             )
             gr.Examples(
                 examples=[
                     ["examples/dogs.jpg", 0.4, 0.9],
                     ["examples/fruits.jpg", 0.5, 0.8],
+                    ["examples/lion.jpg", 0.45, 0.9], # Added another example
                 ],
                 inputs=[fastsam_input_image, fastsam_conf, fastsam_iou],
                 outputs=[fastsam_output_image],
                 fn=run_fastsam_segmentation,
+                cache_examples=False,
             )
     # Add example images (optional, but helpful)
     if not os.path.exists("examples"):
         os.makedirs("examples")
+        print("Created 'examples' directory. Attempting to download sample images...")
+        example_files = {
+            "astronaut.jpg": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d astronaut_-_St._Jean_Bay.jpg/640px-Astronaut_-_St._Jean_Bay.jpg", # Find suitable public domain/CC image
+            "dog_bike.jpg": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/gradio/outputs_multimodal.jpg", # Using a relevant example from HF
+            "clip_logo.png": "https://raw.githubusercontent.com/openai/CLIP/main/CLIP.png",
+            "dogs.jpg": "https://raw.githubusercontent.com/ultralytics/assets/main/im/image8.jpg", # From Ultralytics assets
+            "fruits.jpg": "https://raw.githubusercontent.com/ultralytics/assets/main/im/image9.jpg", # From Ultralytics assets
+            "lion.jpg": "https://huggingface.co/spaces/gradio/image-segmentation/resolve/main/images/lion.jpg"
+        }
+        for filename, url in example_files.items():
+             filepath = os.path.join("examples", filename)
+             if not os.path.exists(filepath):
+                 try:
+                     print(f"Downloading {filename}...")
+                     wget.download(url, filepath)
+                 except Exception as e:
+                     print(f"Could not download {filename} from {url}: {e}")
+        print("Example image download attempt finished.")
 # Launch the Gradio app
 if __name__ == "__main__":
+    # share=True is primarily for local testing to get a public link.
+    # Not needed/used when deploying on Hugging Face Spaces.
+    # debug=True is helpful for development. Set to False for production.
+    demo.launch(debug=True)