Spaces:

sagar007
/

SegmentVision

Running

App Files Files Community

sagar007 commited on Nov 29, 2024

Commit

eefe5b4

verified ·

1 Parent(s): 2b91c8a

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -23

app.py CHANGED Viewed

@@ -6,19 +6,23 @@ import numpy as np
 from transformers import CLIPProcessor, CLIPModel
 from ultralytics import FastSAM
 import supervision as sv
-from huggingface_hub import hf_hub_download
 # Load CLIP model
-model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-# Download and load FastSAM model
-model_path = hf_hub_download("Jiawei-Yang/FastSAM-x", filename="FastSAM-x.pt")
-fast_sam = FastSAM(model_path)
 def process_image_clip(image, text_input):
     if image is None:
         return "Please upload an image first."
     # Process image for CLIP
     inputs = processor(
@@ -43,24 +47,27 @@ def process_image_fastsam(image):
     # Convert PIL image to numpy array
     image_np = np.array(image)
-    # Run FastSAM inference
-    results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
-    # Get detections
-    detections = sv.Detections.from_ultralytics(results[0])
-    # Create annotator
-    box_annotator = sv.BoxAnnotator()
-    mask_annotator = sv.MaskAnnotator()
-    # Annotate image
-    annotated_image = mask_annotator.annotate(scene=image_np.copy(), detections=detections)
-    annotated_image = box_annotator.annotate(scene=annotated_image, detections=detections)
-    return Image.fromarray(annotated_image)
 # Create Gradio interface
-with gr.Blocks() as demo:
     gr.Markdown("""
     # CLIP and FastSAM Demo
     This demo combines two powerful AI models:
@@ -73,8 +80,11 @@ with gr.Blocks() as demo:
     with gr.Tab("CLIP Zero-Shot Classification"):
         with gr.Row():
             image_input = gr.Image(type="pil", label="Input Image")
-            text_input = gr.Textbox(label="What do you want to check in the image?",
-                                  placeholder="e.g., 'a dog', 'sunset', 'people playing'")
         output_text = gr.Textbox(label="Result")
         classify_btn = gr.Button("Classify")
         classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
@@ -90,6 +100,10 @@ with gr.Blocks() as demo:
     ### How to use:
     1. **CLIP Classification**: Upload an image and enter text to check if that concept exists in the image
     2. **FastSAM Segmentation**: Upload an image to get automatic segmentation with bounding boxes and masks
     """)
 demo.launch()

 from transformers import CLIPProcessor, CLIPModel
 from ultralytics import FastSAM
 import supervision as sv
+import os
 # Load CLIP model
+model = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+# Initialize FastSAM model
+FASTSAM_WEIGHTS = "FastSAM-s.pt"
+if not os.path.exists(FASTSAM_WEIGHTS):
+    os.system(f"wget https://huggingface.co/spaces/An-619/FastSAM/resolve/main/weights/{FASTSAM_WEIGHTS}")
+fast_sam = FastSAM(FASTSAM_WEIGHTS)
 def process_image_clip(image, text_input):
     if image is None:
         return "Please upload an image first."
+    if not text_input:
+        return "Please enter some text to check in the image."
     # Process image for CLIP
     inputs = processor(
     # Convert PIL image to numpy array
     image_np = np.array(image)
+    try:
+        # Run FastSAM inference
+        results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
+        # Get detections
+        detections = sv.Detections.from_ultralytics(results[0])
+        # Create annotator
+        box_annotator = sv.BoxAnnotator()
+        mask_annotator = sv.MaskAnnotator()
+        # Annotate image
+        annotated_image = mask_annotator.annotate(scene=image_np.copy(), detections=detections)
+        annotated_image = box_annotator.annotate(scene=annotated_image, detections=detections)
+        return Image.fromarray(annotated_image)
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(css="footer {visibility: hidden}") as demo:
     gr.Markdown("""
     # CLIP and FastSAM Demo
     This demo combines two powerful AI models:
     with gr.Tab("CLIP Zero-Shot Classification"):
         with gr.Row():
             image_input = gr.Image(type="pil", label="Input Image")
+            text_input = gr.Textbox(
+                label="What do you want to check in the image?",
+                placeholder="e.g., 'a dog', 'sunset', 'people playing'",
+                info="Enter any concept you want to check in the image"
+            )
         output_text = gr.Textbox(label="Result")
         classify_btn = gr.Button("Classify")
         classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
     ### How to use:
     1. **CLIP Classification**: Upload an image and enter text to check if that concept exists in the image
     2. **FastSAM Segmentation**: Upload an image to get automatic segmentation with bounding boxes and masks
+    ### Note:
+    - The models run on CPU, so processing might take a few seconds
+    - For best results, use clear images with good lighting
     """)
 demo.launch()