Spaces:

Roboflow
/

RF-DETR

Running on T4

App Files Files Community

gradio-runtime-fixes

by onuralpszr - opened Jul 20

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+28

-63

Files changed (2) hide show

app.py +27 -61
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import gradio as gr
 import numpy as np
 import supervision as sv
 from PIL import Image
-from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge, RFDETRSegPreview
 from rfdetr.detr import RFDETR
 from rfdetr.util.coco_classes import COCO_CLASSES
@@ -17,24 +17,23 @@ ImageType = TypeVar("ImageType", Image.Image, np.ndarray)
 MARKDOWN = """
 # RF-DETR 🔥
 [`[code]`](https://github.com/roboflow/rf-detr)
 [`[blog]`](https://blog.roboflow.com/rf-detr)
 [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 RF-DETR is a real-time, transformer-based object detection model architecture developed
 by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
 """
 IMAGE_PROCESSING_EXAMPLES = [
-    ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 1024, "medium (object detection)"],
-    ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 1024, "medium (object detection)"],
-    ['https://media.roboflow.com/supervision/image-examples/motorbike.png', 0.3, 1024, "medium (object detection)"],
-    ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 512, "nano (object detection)"],
-    ['https://media.roboflow.com/notebooks/examples/dog-3.jpeg', 0.5, 512, "nano (object detection)"],
-    ['https://media.roboflow.com/supervision/image-examples/basketball-1.png', 0.5, 512, "nano (object detection)"],
 ]
 VIDEO_PROCESSING_EXAMPLES = [
-    ["videos/people-walking.mp4", 0.3, 1024, "medium (object detection)"],
-    ["videos/vehicles.mp4", 0.3, 1024, "medium (object detection)"],
 ]
 COLOR = sv.ColorPalette.from_hex([
@@ -52,8 +51,7 @@ create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
 def detect_and_annotate(
         model: RFDETR,
         image: ImageType,
-        confidence: float,
-        checkpoint: str = "medium (object detection)"
 ) -> ImageType:
     detections = model.predict(image, threshold=confidence)
@@ -61,7 +59,6 @@ def detect_and_annotate(
     text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
     thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
-    mask_annotator = sv.MaskAnnotator(color=COLOR)
     bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
     label_annotator = sv.LabelAnnotator(
         color=COLOR,
@@ -74,62 +71,29 @@ def detect_and_annotate(
         for class_id, confidence
         in zip(detections.class_id, detections.confidence)
     ]
-    print(detections)
     annotated_image = image.copy()
     annotated_image = bbox_annotator.annotate(annotated_image, detections)
     annotated_image = label_annotator.annotate(annotated_image, detections, labels)
-    if checkpoint == "segmentation preview":
-        annotated_image = mask_annotator.annotate(annotated_image, detections)
     return annotated_image
 def load_model(resolution: int, checkpoint: str) -> RFDETR:
-    if checkpoint == "nano (object detection)":
-        return RFDETRNano(resolution=resolution)
-    if checkpoint == "small (object detection)":
-        return RFDETRSmall(resolution=resolution)
-    if checkpoint == "medium (object detection)":
-        return RFDETRMedium(resolution=resolution)
-    if checkpoint == "base (object detection)":
         return RFDETRBase(resolution=resolution)
-    elif checkpoint == "large (object detection)":
         return RFDETRLarge(resolution=resolution)
-    elif checkpoint == "segmentation preview":
-        return RFDETRSegPreview(resolution=resolution)
     raise TypeError("Checkpoint must be a base or large.")
-def adjust_resolution(checkpoint: str, resolution: int) -> int:
-    if checkpoint == "segmentation preview":
-        divisor = 24
-    elif checkpoint in {"nano (object detection)", "small (object detection)", "medium (object detection)"}:
-        divisor = 32
-    elif checkpoint in {"base (object detection)", "large (object detection)"}:
-        divisor = 56
-    else:
-        raise ValueError(f"Unknown checkpoint: {checkpoint}")
-    remainder = resolution % divisor
-    if remainder == 0:
-        return resolution
-    lower = resolution - remainder
-    upper = lower + divisor
-    if resolution - lower < upper - resolution:
-        return lower
-    else:
-        return upper
 def image_processing_inference(
         input_image: Image.Image,
         confidence: float,
         resolution: int,
         checkpoint: str
 ):
-    resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
     model = load_model(resolution=resolution, checkpoint=checkpoint)
-    return detect_and_annotate(model=model, image=input_image, confidence=confidence, checkpoint=checkpoint)
 def video_processing_inference(
@@ -137,8 +101,8 @@ def video_processing_inference(
         confidence: float,
         resolution: int,
         checkpoint: str,
 ):
-    resolution = adjust_resolution(checkpoint=checkpoint, resolution=resolution)
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     name = generate_unique_name()
@@ -156,8 +120,7 @@ def video_processing_inference(
             annotated_frame = detect_and_annotate(
                 model=model,
                 image=frame,
-                confidence=confidence,
-                checkpoint=checkpoint
             )
             annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
             sink.write_frame(annotated_frame)
@@ -191,15 +154,15 @@ with gr.Blocks() as demo:
                 )
                 image_processing_resolution_slider = gr.Slider(
                     label="Inference resolution",
-                    minimum=224,
-                    maximum=2240,
-                    step=1,
-                    value=896,
                 )
                 image_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
-                    choices=["nano (object detection)", "small (object detection)", "medium (object detection)", "segmentation preview"],
-                    value="segmentation preview"
                 )
             with gr.Column():
                 image_processing_submit_button = gr.Button("Submit", value="primary")
@@ -214,6 +177,8 @@ with gr.Blocks() as demo:
                 image_processing_checkpoint_dropdown
             ],
             outputs=image_processing_output_image,
         )
         image_processing_submit_button.click(
@@ -254,8 +219,8 @@ with gr.Blocks() as demo:
                 )
                 video_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
-                    choices=["nano (object detection)", "small (object detection)", "medium (object detection)", "segmentation preview"],
-                    value="segmentation preview"
                 )
             with gr.Column():
                 video_processing_submit_button = gr.Button("Submit", value="primary")
@@ -269,7 +234,8 @@ with gr.Blocks() as demo:
                 video_processing_resolution_slider,
                 video_processing_checkpoint_dropdown
             ],
-            outputs=video_processing_output_video
         )
         video_processing_submit_button.click(

 import numpy as np
 import supervision as sv
 from PIL import Image
+from rfdetr import RFDETRBase, RFDETRLarge
 from rfdetr.detr import RFDETR
 from rfdetr.util.coco_classes import COCO_CLASSES
 MARKDOWN = """
 # RF-DETR 🔥
 [`[code]`](https://github.com/roboflow/rf-detr)
 [`[blog]`](https://blog.roboflow.com/rf-detr)
 [`[notebook]`](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb)
 RF-DETR is a real-time, transformer-based object detection model architecture developed
 by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
 """
 IMAGE_PROCESSING_EXAMPLES = [
+    ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
+    ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
+    ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
 ]
 VIDEO_PROCESSING_EXAMPLES = [
+    ["videos/people-walking.mp4", 0.3, 728, "large"],
+    ["videos/vehicles.mp4", 0.3, 728, "large"],
 ]
 COLOR = sv.ColorPalette.from_hex([
 def detect_and_annotate(
         model: RFDETR,
         image: ImageType,
+        confidence: float
 ) -> ImageType:
     detections = model.predict(image, threshold=confidence)
     text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
     thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
     bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
     label_annotator = sv.LabelAnnotator(
         color=COLOR,
         for class_id, confidence
         in zip(detections.class_id, detections.confidence)
     ]
     annotated_image = image.copy()
     annotated_image = bbox_annotator.annotate(annotated_image, detections)
     annotated_image = label_annotator.annotate(annotated_image, detections, labels)
     return annotated_image
 def load_model(resolution: int, checkpoint: str) -> RFDETR:
+    if checkpoint == "base":
         return RFDETRBase(resolution=resolution)
+    elif checkpoint == "large":
         return RFDETRLarge(resolution=resolution)
     raise TypeError("Checkpoint must be a base or large.")
 def image_processing_inference(
         input_image: Image.Image,
         confidence: float,
         resolution: int,
         checkpoint: str
 ):
     model = load_model(resolution=resolution, checkpoint=checkpoint)
+    return detect_and_annotate(model=model, image=input_image, confidence=confidence)
 def video_processing_inference(
         confidence: float,
         resolution: int,
         checkpoint: str,
+        progress=gr.Progress(track_tqdm=True)
 ):
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     name = generate_unique_name()
             annotated_frame = detect_and_annotate(
                 model=model,
                 image=frame,
+                confidence=confidence
             )
             annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
             sink.write_frame(annotated_frame)
                 )
                 image_processing_resolution_slider = gr.Slider(
                     label="Inference resolution",
+                    minimum=560,
+                    maximum=1120,
+                    step=56,
+                    value=728,
                 )
                 image_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
+                    choices=["base", "large"],
+                    value="base"
                 )
             with gr.Column():
                 image_processing_submit_button = gr.Button("Submit", value="primary")
                 image_processing_checkpoint_dropdown
             ],
             outputs=image_processing_output_image,
+            cache_examples=True,
+            run_on_click=True
         )
         image_processing_submit_button.click(
                 )
                 video_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
+                    choices=["base", "large"],
+                    value="base"
                 )
             with gr.Column():
                 video_processing_submit_button = gr.Button("Submit", value="primary")
                 video_processing_resolution_slider,
                 video_processing_checkpoint_dropdown
             ],
+            outputs=video_processing_output_video,
+            run_on_click=True
         )
         video_processing_submit_button.click(

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 gradio
 spaces
-# rfdetr
-git+https://github.com/roboflow/rf-detr.git@1.3.0
 tqdm

 gradio
 spaces
+rfdetr
 tqdm