Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Commit 
							
							Β·
						
						ad13250
	
1
								Parent(s):
							
							fc85de6
								
First commit
Browse files- .gitattributes +2 -0
 - app.py +129 -0
 - assets/desciglio.jpg +3 -0
 - assets/patio.jpg +3 -0
 - assets/pool.jpg +3 -0
 - requirements.txt +6 -0
 
    	
        .gitattributes
    CHANGED
    
    | 
         @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 
     | 
|
| 33 | 
         
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         
     | 
| 34 | 
         
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         
     | 
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 
         | 
|
| 
         | 
| 
         | 
|
| 33 | 
         
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         
     | 
| 34 | 
         
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         
     | 
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 36 | 
         
            +
            *.png filter=lfs diff=lfs merge=lfs -text
         
     | 
| 37 | 
         
            +
            *.jpg filter=lfs diff=lfs merge=lfs -text
         
     | 
    	
        app.py
    ADDED
    
    | 
         @@ -0,0 +1,129 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            import gradio as gr
         
     | 
| 3 | 
         
            +
            from transformers import Owlv2Processor, Owlv2ForObjectDetection
         
     | 
| 4 | 
         
            +
            import os
         
     | 
| 5 | 
         
            +
            import torchvision
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            # --- Setup ---
         
     | 
| 8 | 
         
            +
            os.environ["GRADIO_TEMP_DIR"] = "tmp"
         
     | 
| 9 | 
         
            +
            os.makedirs(os.environ["GRADIO_TEMP_DIR"], exist_ok=True)
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # Handle ZeroGPU safely for local debugging
         
     | 
| 12 | 
         
            +
            try:
         
     | 
| 13 | 
         
            +
                import spaces
         
     | 
| 14 | 
         
            +
            except ImportError:
         
     | 
| 15 | 
         
            +
                class spaces:
         
     | 
| 16 | 
         
            +
                    def GPU(*args, **kwargs):
         
     | 
| 17 | 
         
            +
                        def decorator(fn): return fn
         
     | 
| 18 | 
         
            +
                        return decorator
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            device = "cuda" if torch.cuda.is_available() else "cpu"
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            # --- Load Models ---
         
     | 
| 23 | 
         
            +
            print("Loading models...")
         
     | 
| 24 | 
         
            +
            noctowlv2_base = Owlv2ForObjectDetection.from_pretrained(
         
     | 
| 25 | 
         
            +
                "lorebianchi98/NoctOWLv2-base-patch16"
         
     | 
| 26 | 
         
            +
            ).to(device)
         
     | 
| 27 | 
         
            +
            processorv2_base = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            noctowlv2_large = Owlv2ForObjectDetection.from_pretrained(
         
     | 
| 30 | 
         
            +
                "lorebianchi98/NoctOWLv2-large-patch14"
         
     | 
| 31 | 
         
            +
            ).to(device)
         
     | 
| 32 | 
         
            +
            processorv2_large = Owlv2Processor.from_pretrained("google/owlv2-large-patch14")
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            MODELS = {
         
     | 
| 35 | 
         
            +
                "NoctOWLv2-Base": (noctowlv2_base, processorv2_base),
         
     | 
| 36 | 
         
            +
                "NoctOWLv2-Large": (noctowlv2_large, processorv2_large),
         
     | 
| 37 | 
         
            +
            }
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
            # --- Inference Function ---
         
     | 
| 41 | 
         
            +
            @spaces.GPU(duration=120)
         
     | 
| 42 | 
         
            +
            def query_image(img, text_queries, score_threshold, selected_model):
         
     | 
| 43 | 
         
            +
                if img is None:
         
     | 
| 44 | 
         
            +
                    raise gr.Error("Please upload or select an example image first.")
         
     | 
| 45 | 
         
            +
                if not text_queries.strip():
         
     | 
| 46 | 
         
            +
                    raise gr.Error("Please enter at least one text query.")
         
     | 
| 47 | 
         
            +
                if selected_model is None or selected_model == "":
         
     | 
| 48 | 
         
            +
                    raise gr.Error("Please select a model before running inference.")
         
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
                model, processor = MODELS[selected_model]
         
     | 
| 51 | 
         
            +
                model = model.to(device)
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
                # Prepare text
         
     | 
| 54 | 
         
            +
                text_queries = [f"a {t.strip()}" for t in text_queries.split(",") if t.strip()]
         
     | 
| 55 | 
         
            +
                if not text_queries:
         
     | 
| 56 | 
         
            +
                    raise gr.Error("No valid queries found. Please check your input text.")
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
                # Preprocess
         
     | 
| 59 | 
         
            +
                size = max(img.shape[:2])
         
     | 
| 60 | 
         
            +
                target_sizes = torch.Tensor([[size, size]])
         
     | 
| 61 | 
         
            +
                inputs = processor(text=text_queries, images=img, return_tensors="pt").to(device)
         
     | 
| 62 | 
         
            +
             
     | 
| 63 | 
         
            +
                # Inference
         
     | 
| 64 | 
         
            +
                with torch.no_grad():
         
     | 
| 65 | 
         
            +
                    outputs = model(**inputs)
         
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
                # Postprocess
         
     | 
| 68 | 
         
            +
                outputs.logits = outputs.logits.cpu()
         
     | 
| 69 | 
         
            +
                outputs.pred_boxes = outputs.pred_boxes.cpu()
         
     | 
| 70 | 
         
            +
                results = processor.post_process_object_detection(
         
     | 
| 71 | 
         
            +
                    outputs=outputs, target_sizes=target_sizes, threshold=score_threshold
         
     | 
| 72 | 
         
            +
                )
         
     | 
| 73 | 
         
            +
             
     | 
| 74 | 
         
            +
                boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]
         
     | 
| 75 | 
         
            +
             
     | 
| 76 | 
         
            +
                # Non-Maximum Suppression
         
     | 
| 77 | 
         
            +
                keep = torchvision.ops.nms(boxes, scores, iou_threshold=0.5)
         
     | 
| 78 | 
         
            +
                boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
                # Format output
         
     | 
| 81 | 
         
            +
                result_labels = []
         
     | 
| 82 | 
         
            +
                for box, score, label in zip(boxes, scores, labels):
         
     | 
| 83 | 
         
            +
                    if score < score_threshold:
         
     | 
| 84 | 
         
            +
                        continue
         
     | 
| 85 | 
         
            +
                    box = [int(i) for i in box.tolist()]
         
     | 
| 86 | 
         
            +
                    result_labels.append((box, f"{text_queries[label.item()]} ({score:.2f})"))
         
     | 
| 87 | 
         
            +
             
     | 
| 88 | 
         
            +
                return img, result_labels
         
     | 
| 89 | 
         
            +
             
     | 
| 90 | 
         
            +
             
     | 
| 91 | 
         
            +
            # --- Interface Description ---
         
     | 
| 92 | 
         
            +
            description = """
         
     | 
| 93 | 
         
            +
            # π¦ **NoctOWLv2: Fine-Grained Open-Vocabulary Object Detection**
         
     | 
| 94 | 
         
            +
             
     | 
| 95 | 
         
            +
            **NoctOWL** (***N***ot **o**nly **c**oarse-**t**ext **OWL**) extends **OWL-ViT** and **OWLv2** for **Fine-Grained Open-Vocabulary Detection (FG-OVD)**.  
         
     | 
| 96 | 
         
            +
            It can recognize subtle object differences such as **color, texture, and material**, while retaining strong coarse-grained detection abilities.
         
     | 
| 97 | 
         
            +
             
     | 
| 98 | 
         
            +
            **Available Models:**
         
     | 
| 99 | 
         
            +
            - π§© **NoctOWLv2-Base** β Smaller and faster.
         
     | 
| 100 | 
         
            +
            - π§  **NoctOWLv2-Large** β More accurate, higher capacity.
         
     | 
| 101 | 
         
            +
             
     | 
| 102 | 
         
            +
            π [Training & evaluation code](https://github.com/lorebianchi98/FG-OVD/NoctOWL)
         
     | 
| 103 | 
         
            +
            """
         
     | 
| 104 | 
         
            +
             
     | 
| 105 | 
         
            +
            # --- Gradio Interface ---
         
     | 
| 106 | 
         
            +
            demo = gr.Interface(
         
     | 
| 107 | 
         
            +
                fn=query_image,
         
     | 
| 108 | 
         
            +
                inputs=[
         
     | 
| 109 | 
         
            +
                    gr.Image(label="Input Image"),
         
     | 
| 110 | 
         
            +
                    gr.Textbox(label="Text Queries (comma-separated)", placeholder="e.g., red shoes, striped shirt, yellow ball"),
         
     | 
| 111 | 
         
            +
                    gr.Slider(0, 1, value=0.1, step=0.01, label="Score Threshold"),
         
     | 
| 112 | 
         
            +
                    gr.Dropdown(
         
     | 
| 113 | 
         
            +
                        choices=["NoctOWLv2-Base", "NoctOWLv2-Large"],
         
     | 
| 114 | 
         
            +
                        label="Select Model",
         
     | 
| 115 | 
         
            +
                        value=None,
         
     | 
| 116 | 
         
            +
                        info="Select which model to use for detection",
         
     | 
| 117 | 
         
            +
                    ),
         
     | 
| 118 | 
         
            +
                ],
         
     | 
| 119 | 
         
            +
                outputs=gr.AnnotatedImage(label="Detected Objects"),
         
     | 
| 120 | 
         
            +
                title="NoctOWLv2 β Fine-Grained Zero-Shot Object Detection",
         
     | 
| 121 | 
         
            +
                description=description,
         
     | 
| 122 | 
         
            +
                examples=[
         
     | 
| 123 | 
         
            +
                    ["assets/desciglio.jpg", "striped football shirt, plain red football shirt, yellow shoes, red shoes", 0.07],
         
     | 
| 124 | 
         
            +
                    ["assets/pool.jpg", "white ball, blue ball, black ball, yellow ball", 0.1],
         
     | 
| 125 | 
         
            +
                    ["assets/patio.jpg", "ceramic mug, glass mug, pink flowers, blue flowers", 0.09],
         
     | 
| 126 | 
         
            +
                ],
         
     | 
| 127 | 
         
            +
            )
         
     | 
| 128 | 
         
            +
             
     | 
| 129 | 
         
            +
            demo.launch()
         
     | 
    	
        assets/desciglio.jpg
    ADDED
    
    
											 
									 | 
									
								
											Git LFS Details
  | 
									
    	
        assets/patio.jpg
    ADDED
    
    
											 
									 | 
									
								
											Git LFS Details
  | 
									
    	
        assets/pool.jpg
    ADDED
    
    
											 
									 | 
									
								
											Git LFS Details
  | 
									
    	
        requirements.txt
    ADDED
    
    | 
         @@ -0,0 +1,6 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            numpy>=1.18.5
         
     | 
| 2 | 
         
            +
            torch>=1.7.0
         
     | 
| 3 | 
         
            +
            torchvision>=0.8.1
         
     | 
| 4 | 
         
            +
            git+https://github.com/huggingface/transformers.git
         
     | 
| 5 | 
         
            +
            scipy
         
     | 
| 6 | 
         
            +
            spaces
         
     |