✨ [New] Add a classification head!
Browse files- yolo/config/model/v9-c-cls.yaml +36 -0
- yolo/model/module.py +15 -0
- yolo/model/yolo.py +5 -2
yolo/config/model/v9-c-cls.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: v9-c-cls
|
| 2 |
+
|
| 3 |
+
anchor:
|
| 4 |
+
reg_max: 16
|
| 5 |
+
strides: [8, 16, 32]
|
| 6 |
+
|
| 7 |
+
model:
|
| 8 |
+
backbone:
|
| 9 |
+
- Conv:
|
| 10 |
+
args: {out_channels: 64, kernel_size: 3, stride: 2}
|
| 11 |
+
source: 0
|
| 12 |
+
- Conv:
|
| 13 |
+
args: {out_channels: 128, kernel_size: 3, stride: 2}
|
| 14 |
+
- RepNCSPELAN:
|
| 15 |
+
args: {out_channels: 256, part_channels: 128}
|
| 16 |
+
|
| 17 |
+
- ADown:
|
| 18 |
+
args: {out_channels: 256}
|
| 19 |
+
- RepNCSPELAN:
|
| 20 |
+
args: {out_channels: 512, part_channels: 256}
|
| 21 |
+
|
| 22 |
+
- ADown:
|
| 23 |
+
args: {out_channels: 512}
|
| 24 |
+
- RepNCSPELAN:
|
| 25 |
+
args: {out_channels: 512, part_channels: 512}
|
| 26 |
+
|
| 27 |
+
- ADown:
|
| 28 |
+
args: {out_channels: 512}
|
| 29 |
+
- RepNCSPELAN:
|
| 30 |
+
args: {out_channels: 512, part_channels: 512}
|
| 31 |
+
|
| 32 |
+
detection:
|
| 33 |
+
- Classification:
|
| 34 |
+
source: -1
|
| 35 |
+
tags: Main
|
| 36 |
+
output: True
|
yolo/model/module.py
CHANGED
|
@@ -130,6 +130,7 @@ class MultiheadDetection(nn.Module):
|
|
| 130 |
return [head(x) for x, head in zip(x_list, self.heads)]
|
| 131 |
|
| 132 |
|
|
|
|
| 133 |
class Segmentation(nn.Module):
|
| 134 |
def __init__(self, in_channels: Tuple[int], num_maskes: int):
|
| 135 |
super().__init__()
|
|
@@ -176,6 +177,20 @@ class Anchor2Vec(nn.Module):
|
|
| 176 |
return anchor_x, vector_x
|
| 177 |
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
# ----------- Backbone Class ----------- #
|
| 180 |
class RepConv(nn.Module):
|
| 181 |
"""A convolutional block that combines two convolution layers (kernel and point-wise)."""
|
|
|
|
| 130 |
return [head(x) for x, head in zip(x_list, self.heads)]
|
| 131 |
|
| 132 |
|
| 133 |
+
# ----------- Segmentation Class ----------- #
|
| 134 |
class Segmentation(nn.Module):
|
| 135 |
def __init__(self, in_channels: Tuple[int], num_maskes: int):
|
| 136 |
super().__init__()
|
|
|
|
| 177 |
return anchor_x, vector_x
|
| 178 |
|
| 179 |
|
| 180 |
+
# ----------- Classification Class ----------- #
|
| 181 |
+
class Classification(nn.Module):
|
| 182 |
+
def __init__(self, in_channel: int, num_classes: int, *, neck_channels=1024, **head_args):
|
| 183 |
+
super().__init__()
|
| 184 |
+
self.conv = Conv(in_channel, neck_channels, 1)
|
| 185 |
+
self.pool = nn.AdaptiveAvgPool2d(1)
|
| 186 |
+
self.head = nn.Linear(neck_channels, num_classes)
|
| 187 |
+
|
| 188 |
+
def forward(self, x: Tensor) -> Tuple[Tensor]:
|
| 189 |
+
x = self.pool(self.conv(x))
|
| 190 |
+
x = self.head(x.flatten(start_dim=1))
|
| 191 |
+
return x
|
| 192 |
+
|
| 193 |
+
|
| 194 |
# ----------- Backbone Class ----------- #
|
| 195 |
class RepConv(nn.Module):
|
| 196 |
"""A convolutional block that combines two convolution layers (kernel and point-wise)."""
|
yolo/model/yolo.py
CHANGED
|
@@ -46,8 +46,11 @@ class YOLO(nn.Module):
|
|
| 46 |
# Find in channels
|
| 47 |
if any(module in layer_type for module in ["Conv", "ELAN", "ADown", "AConv", "CBLinear"]):
|
| 48 |
layer_args["in_channels"] = output_dim[source]
|
| 49 |
-
if
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
| 51 |
layer_args["num_classes"] = self.num_classes
|
| 52 |
layer_args["reg_max"] = self.reg_max
|
| 53 |
|
|
|
|
| 46 |
# Find in channels
|
| 47 |
if any(module in layer_type for module in ["Conv", "ELAN", "ADown", "AConv", "CBLinear"]):
|
| 48 |
layer_args["in_channels"] = output_dim[source]
|
| 49 |
+
if any(module in layer_type for module in ["Detection", "Segmentation", "Classification"]):
|
| 50 |
+
if isinstance(source, list):
|
| 51 |
+
layer_args["in_channels"] = [output_dim[idx] for idx in source]
|
| 52 |
+
else:
|
| 53 |
+
layer_args["in_channel"] = output_dim[source]
|
| 54 |
layer_args["num_classes"] = self.num_classes
|
| 55 |
layer_args["reg_max"] = self.reg_max
|
| 56 |
|