Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on Jun 9, 2024

Commit

d1aee89

1 Parent(s): 78e3679

✨ [New] v9-s, v9-m model! new model arch& weight

Browse files

Files changed (3) hide show

yolo/config/model/v9-m.yaml +133 -0
yolo/config/model/v9-s.yaml +134 -0
yolo/model/module.py +45 -20

yolo/config/model/v9-m.yaml ADDED Viewed

	@@ -0,0 +1,133 @@

+anchor:
+  reg_max: 16
+model:
+  backbone:
+    - Conv:
+        args: {out_channels: 32, kernel_size: 3, stride: 2}
+        source: 0
+    - Conv:
+        args: {out_channels: 64, kernel_size: 3, stride: 2}
+    - RepNCSPELAN:
+        args: {out_channels: 128, part_channels: 128}
+    - AConv:
+        args: {out_channels: 240}
+    - RepNCSPELAN:
+        args: {out_channels: 240, part_channels: 240}
+        tags: B3
+    - AConv:
+        args: {out_channels: 360}
+    - RepNCSPELAN:
+        args: {out_channels: 360, part_channels: 360}
+        tags: B4
+    - AConv:
+        args: {out_channels: 480}
+    - RepNCSPELAN:
+        args: {out_channels: 480, part_channels: 480}
+        tags: B5
+  neck:
+    - SPPELAN:
+        args: {out_channels: 480}
+        tags: N3
+    - UpSample:
+        args: {scale_factor: 2, mode: nearest}
+    - Concat:
+        source: [-1, B4]
+    - RepNCSPELAN:
+        args: {out_channels: 360, part_channels: 360}
+        tags: N4
+    - UpSample:
+        args: {scale_factor: 2, mode: nearest}
+    - Concat:
+        source: [-1, B3]
+  head:
+    - RepNCSPELAN:
+        args: {out_channels: 240, part_channels: 240}
+        tags: P3
+    - AConv:
+        args: {out_channels: 184}
+    - Concat:
+        source: [-1, N4]
+    - RepNCSPELAN:
+        args: {out_channels: 360, part_channels: 360}
+        tags: P4
+    - AConv:
+        args: {out_channels: 240}
+    - Concat:
+        source: [-1, N3]
+    - RepNCSPELAN:
+        args: {out_channels: 480, part_channels: 480}
+        tags: P5
+  detection:
+    - MultiheadDetection:
+        source: [P3, P4, P5]
+        tags: Main
+        args:
+            reg_max: ${model.anchor.reg_max}
+        output: True
+  auxiliary:
+    - CBLinear:
+        source: B3
+        args: {out_channels: [240]}
+        tags: R3
+    - CBLinear:
+        source: B4
+        args: {out_channels: [240, 360]}
+        tags: R4
+    - CBLinear:
+        source: B5
+        args: {out_channels: [240, 360, 480]}
+        tags: R5
+    - Conv:
+        args: {out_channels: 32, kernel_size: 3, stride: 2}
+        source: 0
+    - Conv:
+        args: {out_channels: 64, kernel_size: 3, stride: 2}
+    - RepNCSPELAN:
+        args: {out_channels: 128, part_channels: 128}
+    - AConv:
+        args: {out_channels: 240}
+    - CBFuse:
+        source: [R3, R4, R5, -1]
+        args: {index: [0, 0, 0]}
+    - RepNCSPELAN:
+        args: {out_channels: 240, part_channels: 240}
+        tags: A3
+    - AConv:
+        args: {out_channels: 360}
+    - CBFuse:
+        source: [R4, R5, -1]
+        args: {index: [1, 1]}
+    - RepNCSPELAN:
+        args: {out_channels: 360, part_channels: 360}
+        tags: A4
+    - AConv:
+        args: {out_channels: 480}
+    - CBFuse:
+        source: [R5, -1]
+        args: {index: [2]}
+    - RepNCSPELAN:
+        args: {out_channels: 480, part_channels: 480}
+        tags: A5
+    - MultiheadDetection:
+        source: [A3, A4, A5]
+        tags: AUX
+        args:
+            reg_max: ${model.anchor.reg_max}
+        output: True

yolo/config/model/v9-s.yaml ADDED Viewed

	@@ -0,0 +1,134 @@

+anchor:
+  reg_max: 16
+model:
+  backbone:
+    - Conv:
+        args: {out_channels: 32, kernel_size: 3, stride: 2}
+        source: 0
+    - Conv:
+        args: {out_channels: 64, kernel_size: 3, stride: 2}
+    - ELAN:
+        args: {out_channels: 64, part_channels: 64}
+    - AConv:
+        args: {out_channels: 128}
+    - RepNCSPELAN:
+        args:
+            out_channels: 128
+            part_channels: 128
+            csp_args: {repeat_num: 3}
+        tags: B3 # 18
+    - AConv:
+        args: {out_channels: 192}
+    - RepNCSPELAN:
+        args:
+            out_channels: 192
+            part_channels: 192
+            csp_args: {repeat_num: 3}
+        tags: B4
+    - AConv:
+        args: {out_channels: 256}
+    - RepNCSPELAN:
+        args:
+            out_channels: 256
+            part_channels: 256
+            csp_args: {repeat_num: 3}
+        tags: B5
+  neck:
+    - SPPELAN:
+        args: {out_channels: 256}
+        tags: N3
+    - UpSample:
+        args: {scale_factor: 2, mode: nearest}
+    - Concat:
+        source: [-1, B4]
+    - RepNCSPELAN:
+        args:
+            out_channels: 192
+            part_channels: 192
+            csp_args: {repeat_num: 3}
+        tags: N4
+    - UpSample:
+        args: {scale_factor: 2, mode: nearest}
+    - Concat:
+        source: [-1, B3]
+    - RepNCSPELAN:
+        args:
+            out_channels: 128
+            part_channels: 128
+            csp_args: {repeat_num: 3}
+        tags: P3
+    - AConv:
+        args: {out_channels: 96}
+    - Concat:
+        source: [-1, N4]
+    - RepNCSPELAN:
+        args:
+            out_channels: 192
+            part_channels: 192
+            csp_args: {repeat_num: 3}
+        tags: P4
+    - AConv:
+        args: {out_channels: 128}
+    - Concat:
+        source: [-1, N3]
+    - RepNCSPELAN:
+        args:
+            out_channels: 256
+            part_channels: 256
+            csp_args: {repeat_num: 3}
+        tags: P5
+  detection:
+    - MultiheadDetection:
+        source: [P3, P4, P5]
+        tags: Main
+        args:
+            reg_max: ${model.anchor.reg_max}
+        output: True
+  head:
+    - SPPELAN:
+        source: B5
+        args: {out_channels: 256}
+        tags: A5
+    - UpSample:
+        args: {scale_factor: 2, mode: nearest}
+    - Concat:
+        source: [-1, B4]
+    - RepNCSPELAN:
+        args:
+            out_channels: 192
+            part_channels: 192
+            csp_args: {repeat_num: 3}
+        tags: A4
+    - UpSample:
+        args: {scale_factor: 2, mode: nearest}
+    - Concat:
+        source: [-1, B3]
+    - RepNCSPELAN:
+        args:
+            out_channels: 128
+            part_channels: 128
+            csp_args: {repeat_num: 3}
+        tags: A3
+    - MultiheadDetection:
+        source: [A3, A4, A5]
+        tags: AUX
+        args:
+            reg_max: ${model.anchor.reg_max}
+        output: True

yolo/model/module.py CHANGED Viewed

@@ -192,6 +192,36 @@ class RepNCSP(nn.Module):
         return self.conv3(torch.cat((x1, x2), dim=1))
 class RepNCSPELAN(nn.Module):
     """RepNCSPELAN block combining RepNCSP blocks with ELAN structure."""
@@ -230,6 +260,21 @@ class RepNCSPELAN(nn.Module):
         return x5
 class ADown(nn.Module):
     """Downsampling module combining average and max pooling with convolution for feature reduction."""
@@ -498,26 +543,6 @@ class CSPDark(nn.Module):
         return self.cv2(torch.cat((self.cb(y[0]), y[1]), 1))
-# ELAN
-class ELAN(nn.Module):
-    # ELAN
-    def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):
-        super().__init__()
-        h_channels = med_channels // 2
-        self.cv1 = Conv(in_channels, med_channels, 1, 1)
-        self.cb = nn.ModuleList(ConvBlock(h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
-        self.cv2 = Conv((2 + elan_repeat) * h_channels, out_channels, 1, 1)
-    def forward(self, x):
-        y = list(self.cv1(x).chunk(2, 1))
-        y.extend((m(y[-1])) for m in self.cb)
-        return self.cv2(torch.cat(y, 1))
 class CSPELAN(nn.Module):
     # ELAN
     def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):

         return self.conv3(torch.cat((x1, x2), dim=1))
+class ELAN(nn.Module):
+    """ELAN  structure."""
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        part_channels: int,
+        *,
+        process_channels: Optional[int] = None,
+        **kwargs,
+    ):
+        super().__init__()
+        if process_channels is None:
+            process_channels = part_channels // 2
+        self.conv1 = Conv(in_channels, part_channels, 1, **kwargs)
+        self.conv2 = Conv(part_channels // 2, process_channels, 3, padding=1, **kwargs)
+        self.conv3 = Conv(process_channels, process_channels, 3, padding=1, **kwargs)
+        self.conv4 = Conv(part_channels + 2 * process_channels, out_channels, 1, **kwargs)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x1, x2 = self.conv1(x).chunk(2, 1)
+        x3 = self.conv2(x2)
+        x4 = self.conv3(x3)
+        x5 = self.conv4(torch.cat([x1, x2, x3, x4], dim=1))
+        return x5
 class RepNCSPELAN(nn.Module):
     """RepNCSPELAN block combining RepNCSP blocks with ELAN structure."""
         return x5
+class AConv(nn.Module):
+    """Downsampling module combining average and max pooling with convolution for feature reduction."""
+    def __init__(self, in_channels: int, out_channels: int):
+        super().__init__()
+        mid_layer = {"kernel_size": 3, "stride": 2}
+        self.avg_pool = Pool("avg", kernel_size=2, stride=1)
+        self.conv = Conv(in_channels, out_channels, **mid_layer)
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.avg_pool(x)
+        x = self.conv(x)
+        return x
 class ADown(nn.Module):
     """Downsampling module combining average and max pooling with convolution for feature reduction."""
         return self.cv2(torch.cat((self.cb(y[0]), y[1]), 1))
 class CSPELAN(nn.Module):
     # ELAN
     def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):