Spaces:

henry000
/

YOLO

Running

henry000 commited on Aug 23, 2024

Commit

d44dbc0

1 Parent(s): 8228669

✨ [New] Add a classification head!

Files changed (3) hide show

yolo/config/model/v9-c-cls.yaml ADDED Viewed

+name: v9-c-cls
+anchor:
+  reg_max: 16
+  strides: [8, 16, 32]
+model:
+  backbone:
+    - Conv:
+        args: {out_channels: 64, kernel_size: 3, stride: 2}
+        source: 0
+    - Conv:
+        args: {out_channels: 128, kernel_size: 3, stride: 2}
+    - RepNCSPELAN:
+        args: {out_channels: 256, part_channels: 128}
+    - ADown:
+        args: {out_channels: 256}
+    - RepNCSPELAN:
+        args: {out_channels: 512, part_channels: 256}
+    - ADown:
+        args: {out_channels: 512}
+    - RepNCSPELAN:
+        args: {out_channels: 512, part_channels: 512}
+    - ADown:
+        args: {out_channels: 512}
+    - RepNCSPELAN:
+        args: {out_channels: 512, part_channels: 512}
+  detection:
+    - Classification:
+        source: -1
+        tags: Main
+        output: True

yolo/model/module.py CHANGED Viewed

@@ -130,6 +130,7 @@ class MultiheadDetection(nn.Module):
         return [head(x) for x, head in zip(x_list, self.heads)]
 class Segmentation(nn.Module):
     def __init__(self, in_channels: Tuple[int], num_maskes: int):
         super().__init__()
@@ -176,6 +177,20 @@ class Anchor2Vec(nn.Module):
         return anchor_x, vector_x
 # ----------- Backbone Class ----------- #
 class RepConv(nn.Module):
     """A convolutional block that combines two convolution layers (kernel and point-wise)."""

         return [head(x) for x, head in zip(x_list, self.heads)]
+# ----------- Segmentation Class ----------- #
 class Segmentation(nn.Module):
     def __init__(self, in_channels: Tuple[int], num_maskes: int):
         super().__init__()
         return anchor_x, vector_x
+# ----------- Classification Class ----------- #
+class Classification(nn.Module):
+    def __init__(self, in_channel: int, num_classes: int, *, neck_channels=1024, **head_args):
+        super().__init__()
+        self.conv = Conv(in_channel, neck_channels, 1)
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.head = nn.Linear(neck_channels, num_classes)
+    def forward(self, x: Tensor) -> Tuple[Tensor]:
+        x = self.pool(self.conv(x))
+        x = self.head(x.flatten(start_dim=1))
+        return x
 # ----------- Backbone Class ----------- #
 class RepConv(nn.Module):
     """A convolutional block that combines two convolution layers (kernel and point-wise)."""

yolo/model/yolo.py CHANGED Viewed

@@ -46,8 +46,11 @@ class YOLO(nn.Module):
                 # Find in channels
                 if any(module in layer_type for module in ["Conv", "ELAN", "ADown", "AConv", "CBLinear"]):
                     layer_args["in_channels"] = output_dim[source]
-                if "Detection" in layer_type or "Segmentation" in layer_type:
-                    layer_args["in_channels"] = [output_dim[idx] for idx in source]
                     layer_args["num_classes"] = self.num_classes
                     layer_args["reg_max"] = self.reg_max

                 # Find in channels
                 if any(module in layer_type for module in ["Conv", "ELAN", "ADown", "AConv", "CBLinear"]):
                     layer_args["in_channels"] = output_dim[source]
+                if any(module in layer_type for module in ["Detection", "Segmentation", "Classification"]):
+                    if isinstance(source, list):
+                        layer_args["in_channels"] = [output_dim[idx] for idx in source]
+                    else:
+                        layer_args["in_channel"] = output_dim[source]
                     layer_args["num_classes"] = self.num_classes
                     layer_args["reg_max"] = self.reg_max