Files changed (2) hide show
  1. app.py +27 -61
  2. requirements.txt +1 -2
app.py CHANGED
@@ -6,7 +6,7 @@ import gradio as gr
6
  import numpy as np
7
  import supervision as sv
8
  from PIL import Image
9
- from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge, RFDETRSegPreview
10
  from rfdetr.detr import RFDETR
11
  from rfdetr.util.coco_classes import COCO_CLASSES
12
 
@@ -17,24 +17,23 @@ ImageType = TypeVar("ImageType", Image.Image, np.ndarray)
17
 
18
  MARKDOWN = """
19
  # RF-DETR 🔥
 
20
  [`[code]`](https://github.com/roboflow/rf-detr)
21
  [`[blog]`](https://blog.roboflow.com/rf-detr)
22
  [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 
23
  RF-DETR is a real-time, transformer-based object detection model architecture developed
24
  by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
25
  """
26
 
27
  IMAGE_PROCESSING_EXAMPLES = [
28
- ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 1024, "medium (object detection)"],
29
- ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 1024, "medium (object detection)"],
30
- ['https://media.roboflow.com/supervision/image-examples/motorbike.png', 0.3, 1024, "medium (object detection)"],
31
- ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 512, "nano (object detection)"],
32
- ['https://media.roboflow.com/notebooks/examples/dog-3.jpeg', 0.5, 512, "nano (object detection)"],
33
- ['https://media.roboflow.com/supervision/image-examples/basketball-1.png', 0.5, 512, "nano (object detection)"],
34
  ]
35
  VIDEO_PROCESSING_EXAMPLES = [
36
- ["videos/people-walking.mp4", 0.3, 1024, "medium (object detection)"],
37
- ["videos/vehicles.mp4", 0.3, 1024, "medium (object detection)"],
38
  ]
39
 
40
  COLOR = sv.ColorPalette.from_hex([
@@ -52,8 +51,7 @@ create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
52
  def detect_and_annotate(
53
  model: RFDETR,
54
  image: ImageType,
55
- confidence: float,
56
- checkpoint: str = "medium (object detection)"
57
  ) -> ImageType:
58
  detections = model.predict(image, threshold=confidence)
59
 
@@ -61,7 +59,6 @@ def detect_and_annotate(
61
  text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
62
  thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
63
 
64
- mask_annotator = sv.MaskAnnotator(color=COLOR)
65
  bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
66
  label_annotator = sv.LabelAnnotator(
67
  color=COLOR,
@@ -74,62 +71,29 @@ def detect_and_annotate(
74
  for class_id, confidence
75
  in zip(detections.class_id, detections.confidence)
76
  ]
77
- print(detections)
78
  annotated_image = image.copy()
79
  annotated_image = bbox_annotator.annotate(annotated_image, detections)
80
  annotated_image = label_annotator.annotate(annotated_image, detections, labels)
81
- if checkpoint == "segmentation preview":
82
- annotated_image = mask_annotator.annotate(annotated_image, detections)
83
  return annotated_image
84
 
85
 
86
  def load_model(resolution: int, checkpoint: str) -> RFDETR:
87
- if checkpoint == "nano (object detection)":
88
- return RFDETRNano(resolution=resolution)
89
- if checkpoint == "small (object detection)":
90
- return RFDETRSmall(resolution=resolution)
91
- if checkpoint == "medium (object detection)":
92
- return RFDETRMedium(resolution=resolution)
93
- if checkpoint == "base (object detection)":
94
  return RFDETRBase(resolution=resolution)
95
- elif checkpoint == "large (object detection)":
96
  return RFDETRLarge(resolution=resolution)
97
- elif checkpoint == "segmentation preview":
98
- return RFDETRSegPreview(resolution=resolution)
99
  raise TypeError("Checkpoint must be a base or large.")
100
 
101
 
102
- def adjust_resolution(checkpoint: str, resolution: int) -> int:
103
- if checkpoint == "segmentation preview":
104
- divisor = 24
105
- elif checkpoint in {"nano (object detection)", "small (object detection)", "medium (object detection)"}:
106
- divisor = 32
107
- elif checkpoint in {"base (object detection)", "large (object detection)"}:
108
- divisor = 56
109
- else:
110
- raise ValueError(f"Unknown checkpoint: {checkpoint}")
111
-
112
- remainder = resolution % divisor
113
- if remainder == 0:
114
- return resolution
115
- lower = resolution - remainder
116
- upper = lower + divisor
117
-
118
- if resolution - lower < upper - resolution:
119
- return lower
120
- else:
121
- return upper
122
-
123
-
124
  def image_processing_inference(
125
  input_image: Image.Image,
126
  confidence: float,
127
  resolution: int,
128
  checkpoint: str
129
  ):
130
- resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
131
  model = load_model(resolution=resolution, checkpoint=checkpoint)
132
- return detect_and_annotate(model=model, image=input_image, confidence=confidence, checkpoint=checkpoint)
133
 
134
 
135
  def video_processing_inference(
@@ -137,8 +101,8 @@ def video_processing_inference(
137
  confidence: float,
138
  resolution: int,
139
  checkpoint: str,
 
140
  ):
141
- resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
142
  model = load_model(resolution=resolution, checkpoint=checkpoint)
143
 
144
  name = generate_unique_name()
@@ -156,8 +120,7 @@ def video_processing_inference(
156
  annotated_frame = detect_and_annotate(
157
  model=model,
158
  image=frame,
159
- confidence=confidence,
160
- checkpoint=checkpoint
161
  )
162
  annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
163
  sink.write_frame(annotated_frame)
@@ -191,15 +154,15 @@ with gr.Blocks() as demo:
191
  )
192
  image_processing_resolution_slider = gr.Slider(
193
  label="Inference resolution",
194
- minimum=224,
195
- maximum=2240,
196
- step=1,
197
- value=896,
198
  )
199
  image_processing_checkpoint_dropdown = gr.Dropdown(
200
  label="Checkpoint",
201
- choices=["nano (object detection)", "small (object detection)", "medium (object detection)", "segmentation preview"],
202
- value="segmentation preview"
203
  )
204
  with gr.Column():
205
  image_processing_submit_button = gr.Button("Submit", value="primary")
@@ -214,6 +177,8 @@ with gr.Blocks() as demo:
214
  image_processing_checkpoint_dropdown
215
  ],
216
  outputs=image_processing_output_image,
 
 
217
  )
218
 
219
  image_processing_submit_button.click(
@@ -254,8 +219,8 @@ with gr.Blocks() as demo:
254
  )
255
  video_processing_checkpoint_dropdown = gr.Dropdown(
256
  label="Checkpoint",
257
- choices=["nano (object detection)", "small (object detection)", "medium (object detection)", "segmentation preview"],
258
- value="segmentation preview"
259
  )
260
  with gr.Column():
261
  video_processing_submit_button = gr.Button("Submit", value="primary")
@@ -269,7 +234,8 @@ with gr.Blocks() as demo:
269
  video_processing_resolution_slider,
270
  video_processing_checkpoint_dropdown
271
  ],
272
- outputs=video_processing_output_video
 
273
  )
274
 
275
  video_processing_submit_button.click(
 
6
  import numpy as np
7
  import supervision as sv
8
  from PIL import Image
9
+ from rfdetr import RFDETRBase, RFDETRLarge
10
  from rfdetr.detr import RFDETR
11
  from rfdetr.util.coco_classes import COCO_CLASSES
12
 
 
17
 
18
  MARKDOWN = """
19
  # RF-DETR 🔥
20
+
21
  [`[code]`](https://github.com/roboflow/rf-detr)
22
  [`[blog]`](https://blog.roboflow.com/rf-detr)
23
  [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
24
+
25
  RF-DETR is a real-time, transformer-based object detection model architecture developed
26
  by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
27
  """
28
 
29
  IMAGE_PROCESSING_EXAMPLES = [
30
+ ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
31
+ ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
32
+ ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
 
 
 
33
  ]
34
  VIDEO_PROCESSING_EXAMPLES = [
35
+ ["videos/people-walking.mp4", 0.3, 728, "large"],
36
+ ["videos/vehicles.mp4", 0.3, 728, "large"],
37
  ]
38
 
39
  COLOR = sv.ColorPalette.from_hex([
 
51
  def detect_and_annotate(
52
  model: RFDETR,
53
  image: ImageType,
54
+ confidence: float
 
55
  ) -> ImageType:
56
  detections = model.predict(image, threshold=confidence)
57
 
 
59
  text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
60
  thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
61
 
 
62
  bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
63
  label_annotator = sv.LabelAnnotator(
64
  color=COLOR,
 
71
  for class_id, confidence
72
  in zip(detections.class_id, detections.confidence)
73
  ]
74
+
75
  annotated_image = image.copy()
76
  annotated_image = bbox_annotator.annotate(annotated_image, detections)
77
  annotated_image = label_annotator.annotate(annotated_image, detections, labels)
 
 
78
  return annotated_image
79
 
80
 
81
  def load_model(resolution: int, checkpoint: str) -> RFDETR:
82
+ if checkpoint == "base":
 
 
 
 
 
 
83
  return RFDETRBase(resolution=resolution)
84
+ elif checkpoint == "large":
85
  return RFDETRLarge(resolution=resolution)
 
 
86
  raise TypeError("Checkpoint must be a base or large.")
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def image_processing_inference(
90
  input_image: Image.Image,
91
  confidence: float,
92
  resolution: int,
93
  checkpoint: str
94
  ):
 
95
  model = load_model(resolution=resolution, checkpoint=checkpoint)
96
+ return detect_and_annotate(model=model, image=input_image, confidence=confidence)
97
 
98
 
99
  def video_processing_inference(
 
101
  confidence: float,
102
  resolution: int,
103
  checkpoint: str,
104
+ progress=gr.Progress(track_tqdm=True)
105
  ):
 
106
  model = load_model(resolution=resolution, checkpoint=checkpoint)
107
 
108
  name = generate_unique_name()
 
120
  annotated_frame = detect_and_annotate(
121
  model=model,
122
  image=frame,
123
+ confidence=confidence
 
124
  )
125
  annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
126
  sink.write_frame(annotated_frame)
 
154
  )
155
  image_processing_resolution_slider = gr.Slider(
156
  label="Inference resolution",
157
+ minimum=560,
158
+ maximum=1120,
159
+ step=56,
160
+ value=728,
161
  )
162
  image_processing_checkpoint_dropdown = gr.Dropdown(
163
  label="Checkpoint",
164
+ choices=["base", "large"],
165
+ value="base"
166
  )
167
  with gr.Column():
168
  image_processing_submit_button = gr.Button("Submit", value="primary")
 
177
  image_processing_checkpoint_dropdown
178
  ],
179
  outputs=image_processing_output_image,
180
+ cache_examples=True,
181
+ run_on_click=True
182
  )
183
 
184
  image_processing_submit_button.click(
 
219
  )
220
  video_processing_checkpoint_dropdown = gr.Dropdown(
221
  label="Checkpoint",
222
+ choices=["base", "large"],
223
+ value="base"
224
  )
225
  with gr.Column():
226
  video_processing_submit_button = gr.Button("Submit", value="primary")
 
234
  video_processing_resolution_slider,
235
  video_processing_checkpoint_dropdown
236
  ],
237
+ outputs=video_processing_output_video,
238
+ run_on_click=True
239
  )
240
 
241
  video_processing_submit_button.click(
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  gradio
2
  spaces
3
- # rfdetr
4
- git+https://github.com/roboflow/rf-detr.git@1.3.0
5
  tqdm
 
1
  gradio
2
  spaces
3
+ rfdetr
 
4
  tqdm