lorebianchi98 commited on
Commit
ad13250
Β·
1 Parent(s): fc85de6

First commit

Browse files
Files changed (6) hide show
  1. .gitattributes +2 -0
  2. app.py +129 -0
  3. assets/desciglio.jpg +3 -0
  4. assets/patio.jpg +3 -0
  5. assets/pool.jpg +3 -0
  6. requirements.txt +6 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.jpg filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import Owlv2Processor, Owlv2ForObjectDetection
4
+ import os
5
+ import torchvision
6
+
7
+ # --- Setup ---
8
+ os.environ["GRADIO_TEMP_DIR"] = "tmp"
9
+ os.makedirs(os.environ["GRADIO_TEMP_DIR"], exist_ok=True)
10
+
11
+ # Handle ZeroGPU safely for local debugging
12
+ try:
13
+ import spaces
14
+ except ImportError:
15
+ class spaces:
16
+ def GPU(*args, **kwargs):
17
+ def decorator(fn): return fn
18
+ return decorator
19
+
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+
22
+ # --- Load Models ---
23
+ print("Loading models...")
24
+ noctowlv2_base = Owlv2ForObjectDetection.from_pretrained(
25
+ "lorebianchi98/NoctOWLv2-base-patch16"
26
+ ).to(device)
27
+ processorv2_base = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
28
+
29
+ noctowlv2_large = Owlv2ForObjectDetection.from_pretrained(
30
+ "lorebianchi98/NoctOWLv2-large-patch14"
31
+ ).to(device)
32
+ processorv2_large = Owlv2Processor.from_pretrained("google/owlv2-large-patch14")
33
+
34
+ MODELS = {
35
+ "NoctOWLv2-Base": (noctowlv2_base, processorv2_base),
36
+ "NoctOWLv2-Large": (noctowlv2_large, processorv2_large),
37
+ }
38
+
39
+
40
+ # --- Inference Function ---
41
+ @spaces.GPU(duration=120)
42
+ def query_image(img, text_queries, score_threshold, selected_model):
43
+ if img is None:
44
+ raise gr.Error("Please upload or select an example image first.")
45
+ if not text_queries.strip():
46
+ raise gr.Error("Please enter at least one text query.")
47
+ if selected_model is None or selected_model == "":
48
+ raise gr.Error("Please select a model before running inference.")
49
+
50
+ model, processor = MODELS[selected_model]
51
+ model = model.to(device)
52
+
53
+ # Prepare text
54
+ text_queries = [f"a {t.strip()}" for t in text_queries.split(",") if t.strip()]
55
+ if not text_queries:
56
+ raise gr.Error("No valid queries found. Please check your input text.")
57
+
58
+ # Preprocess
59
+ size = max(img.shape[:2])
60
+ target_sizes = torch.Tensor([[size, size]])
61
+ inputs = processor(text=text_queries, images=img, return_tensors="pt").to(device)
62
+
63
+ # Inference
64
+ with torch.no_grad():
65
+ outputs = model(**inputs)
66
+
67
+ # Postprocess
68
+ outputs.logits = outputs.logits.cpu()
69
+ outputs.pred_boxes = outputs.pred_boxes.cpu()
70
+ results = processor.post_process_object_detection(
71
+ outputs=outputs, target_sizes=target_sizes, threshold=score_threshold
72
+ )
73
+
74
+ boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]
75
+
76
+ # Non-Maximum Suppression
77
+ keep = torchvision.ops.nms(boxes, scores, iou_threshold=0.5)
78
+ boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
79
+
80
+ # Format output
81
+ result_labels = []
82
+ for box, score, label in zip(boxes, scores, labels):
83
+ if score < score_threshold:
84
+ continue
85
+ box = [int(i) for i in box.tolist()]
86
+ result_labels.append((box, f"{text_queries[label.item()]} ({score:.2f})"))
87
+
88
+ return img, result_labels
89
+
90
+
91
+ # --- Interface Description ---
92
+ description = """
93
+ # πŸ¦‰ **NoctOWLv2: Fine-Grained Open-Vocabulary Object Detection**
94
+
95
+ **NoctOWL** (***N***ot **o**nly **c**oarse-**t**ext **OWL**) extends **OWL-ViT** and **OWLv2** for **Fine-Grained Open-Vocabulary Detection (FG-OVD)**.
96
+ It can recognize subtle object differences such as **color, texture, and material**, while retaining strong coarse-grained detection abilities.
97
+
98
+ **Available Models:**
99
+ - 🧩 **NoctOWLv2-Base** β€” Smaller and faster.
100
+ - 🧠 **NoctOWLv2-Large** β€” More accurate, higher capacity.
101
+
102
+ πŸ“˜ [Training & evaluation code](https://github.com/lorebianchi98/FG-OVD/NoctOWL)
103
+ """
104
+
105
+ # --- Gradio Interface ---
106
+ demo = gr.Interface(
107
+ fn=query_image,
108
+ inputs=[
109
+ gr.Image(label="Input Image"),
110
+ gr.Textbox(label="Text Queries (comma-separated)", placeholder="e.g., red shoes, striped shirt, yellow ball"),
111
+ gr.Slider(0, 1, value=0.1, step=0.01, label="Score Threshold"),
112
+ gr.Dropdown(
113
+ choices=["NoctOWLv2-Base", "NoctOWLv2-Large"],
114
+ label="Select Model",
115
+ value=None,
116
+ info="Select which model to use for detection",
117
+ ),
118
+ ],
119
+ outputs=gr.AnnotatedImage(label="Detected Objects"),
120
+ title="NoctOWLv2 β€” Fine-Grained Zero-Shot Object Detection",
121
+ description=description,
122
+ examples=[
123
+ ["assets/desciglio.jpg", "striped football shirt, plain red football shirt, yellow shoes, red shoes", 0.07],
124
+ ["assets/pool.jpg", "white ball, blue ball, black ball, yellow ball", 0.1],
125
+ ["assets/patio.jpg", "ceramic mug, glass mug, pink flowers, blue flowers", 0.09],
126
+ ],
127
+ )
128
+
129
+ demo.launch()
assets/desciglio.jpg ADDED

Git LFS Details

  • SHA256: 46cae508c3fb2f760b6c6c7adccd3d34aa129b8d0ddce184ea9b002a654ef281
  • Pointer size: 130 Bytes
  • Size of remote file: 62.9 kB
assets/patio.jpg ADDED

Git LFS Details

  • SHA256: bbe11a884efe04fe6e1f7a531dcbbce7fcc8b7b03931abe82282252fe7e77000
  • Pointer size: 131 Bytes
  • Size of remote file: 263 kB
assets/pool.jpg ADDED

Git LFS Details

  • SHA256: da2cd6f4a34576ad3d465f85700e9a0e82c82a194b2558cc0b7e736725c0f3ce
  • Pointer size: 131 Bytes
  • Size of remote file: 311 kB
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy>=1.18.5
2
+ torch>=1.7.0
3
+ torchvision>=0.8.1
4
+ git+https://github.com/huggingface/transformers.git
5
+ scipy
6
+ spaces