Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on May 23, 2024

Commit

2ae492a

1 Parent(s): 849d290

✨ [Add] yolov9 loss function, align to origin v9

Browse files

Files changed (5) hide show

config/config.py +17 -1
config/hyper/default.yaml +16 -0
config/model/v7-base.yaml +4 -0
tools/bbox_helper.py +251 -0
utils/loss.py +182 -0

config/config.py CHANGED Viewed

@@ -2,9 +2,15 @@ from dataclasses import dataclass
 from typing import Dict, List, Union
 @dataclass
 class Model:
-    anchor: List[List[int]]
     model: Dict[str, List[Dict[str, Union[Dict, List, int]]]]
@@ -20,6 +26,8 @@ class DataLoaderConfig:
     shuffle: bool
     num_workers: int
     pin_memory: bool
 @dataclass
@@ -52,11 +60,19 @@ class EMAConfig:
     decay: float
 @dataclass
 class TrainConfig:
     optimizer: OptimizerConfig
     scheduler: SchedulerConfig
     ema: EMAConfig
 @dataclass

 from typing import Dict, List, Union
+@dataclass
+class AnchorConfig:
+    reg_max: int
+    strides: List[int]
 @dataclass
 class Model:
+    anchor: AnchorConfig
     model: Dict[str, List[Dict[str, Union[Dict, List, int]]]]
     shuffle: bool
     num_workers: int
     pin_memory: bool
+    image_size: List[int]
+    class_num: int
 @dataclass
     decay: float
+@dataclass
+class MatcherConfig:
+    iou: str
+    topk: int
+    factor: Dict[str, int]
 @dataclass
 class TrainConfig:
     optimizer: OptimizerConfig
     scheduler: SchedulerConfig
     ema: EMAConfig
+    matcher: MatcherConfig
 @dataclass

config/hyper/default.yaml CHANGED Viewed

@@ -3,12 +3,28 @@ data:
   shuffle: True
   num_workers: 4
   pin_memory: True
 train:
   optimizer:
     type: Adam
     args:
       lr: 0.001
       weight_decay: 0.0001
   scheduler:
     type: StepLR
     args:

   shuffle: True
   num_workers: 4
   pin_memory: True
+  class_num: 80
+  image_size: [640, 640]
 train:
   optimizer:
     type: Adam
     args:
       lr: 0.001
       weight_decay: 0.0001
+  loss:
+    BCELoss:
+      args:
+    BoxLoss:
+      args:
+      alpha: 0.1
+    DFLoss:
+      args:
+  matcher:
+    iou: CIoU
+    topk: 10
+    factor:
+      iou: 6.0
+      cls: 0.5
   scheduler:
     type: StepLR
     args:

config/model/v7-base.yaml CHANGED Viewed

@@ -1,5 +1,9 @@
 nc: 80
 model:
   backbone:
   - Conv:

 nc: 80
+anchor:
+  reg_max: 16
+  strides: [8, 16, 32]
 model:
   backbone:
   - Conv:

tools/bbox_helper.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import math
+from typing import List, Tuple
+import torch
+import torch.nn.functional as F
+from torch import Tensor
+from config.config import MatcherConfig
+def calculate_iou(bbox1, bbox2, metrics="iou") -> Tensor:
+    metrics = metrics.lower()
+    EPS = 1e-9
+    dtype = bbox1.dtype
+    bbox1 = bbox1.to(torch.float32)
+    bbox2 = bbox2.to(torch.float32)
+    # Expand dimensions if necessary
+    if bbox1.ndim == 2 and bbox2.ndim == 2:
+        bbox1 = bbox1.unsqueeze(1)  # (Ax4) -> (Ax1x4)
+        bbox2 = bbox2.unsqueeze(0)  # (Bx4) -> (1xBx4)
+    elif bbox1.ndim == 3 and bbox2.ndim == 3:
+        bbox1 = bbox1.unsqueeze(2)  # (BZxAx4) -> (BZxAx1x4)
+        bbox2 = bbox2.unsqueeze(1)  # (BZxBx4) -> (BZx1xBx4)
+    # Calculate intersection coordinates
+    xmin_inter = torch.max(bbox1[..., 0], bbox2[..., 0])
+    ymin_inter = torch.max(bbox1[..., 1], bbox2[..., 1])
+    xmax_inter = torch.min(bbox1[..., 2], bbox2[..., 2])
+    ymax_inter = torch.min(bbox1[..., 3], bbox2[..., 3])
+    # Calculate intersection area
+    intersection_area = torch.clamp(xmax_inter - xmin_inter, min=0) * torch.clamp(ymax_inter - ymin_inter, min=0)
+    # Calculate area of each bbox
+    area_bbox1 = (bbox1[..., 2] - bbox1[..., 0]) * (bbox1[..., 3] - bbox1[..., 1])
+    area_bbox2 = (bbox2[..., 2] - bbox2[..., 0]) * (bbox2[..., 3] - bbox2[..., 1])
+    # Calculate union area
+    union_area = area_bbox1 + area_bbox2 - intersection_area
+    # Calculate IoU
+    iou = intersection_area / (union_area + EPS)
+    if metrics == "iou":
+        return iou
+    # Calculate centroid distance
+    cx1 = (bbox1[..., 2] + bbox1[..., 0]) / 2
+    cy1 = (bbox1[..., 3] + bbox1[..., 1]) / 2
+    cx2 = (bbox2[..., 2] + bbox2[..., 0]) / 2
+    cy2 = (bbox2[..., 3] + bbox2[..., 1]) / 2
+    cent_dis = (cx1 - cx2) ** 2 + (cy1 - cy2) ** 2
+    # Calculate diagonal length of the smallest enclosing box
+    c_x = torch.max(bbox1[..., 2], bbox2[..., 2]) - torch.min(bbox1[..., 0], bbox2[..., 0])
+    c_y = torch.max(bbox1[..., 3], bbox2[..., 3]) - torch.min(bbox1[..., 1], bbox2[..., 1])
+    diag_dis = c_x**2 + c_y**2 + EPS
+    diou = iou - (cent_dis / diag_dis)
+    if metrics == "diou":
+        return diou
+    # Compute aspect ratio penalty term
+    arctan = torch.atan((bbox1[..., 2] - bbox1[..., 0]) / (bbox1[..., 3] - bbox1[..., 1] + EPS)) - torch.atan(
+        (bbox2[..., 2] - bbox2[..., 0]) / (bbox2[..., 3] - bbox2[..., 1] + EPS)
+    )
+    v = (4 / (math.pi**2)) * (arctan**2)
+    alpha = v / (v - iou + 1 + EPS)
+    # Compute CIoU
+    ciou = diou - alpha * v
+    return ciou.to(dtype)
+def transform_bbox(bbox: Tensor, indicator="xywh -> xyxy"):
+    data_type = bbox.dtype
+    in_type, out_type = indicator.replace(" ", "").split("->")
+    if in_type not in ["xyxy", "xywh", "xycwh"] or out_type not in ["xyxy", "xywh", "xycwh"]:
+        raise ValueError("Invalid input or output format")
+    if in_type == "xywh":
+        x_min = bbox[..., 0]
+        y_min = bbox[..., 1]
+        x_max = bbox[..., 0] + bbox[..., 2]
+        y_max = bbox[..., 1] + bbox[..., 3]
+    elif in_type == "xyxy":
+        x_min = bbox[..., 0]
+        y_min = bbox[..., 1]
+        x_max = bbox[..., 2]
+        y_max = bbox[..., 3]
+    elif in_type == "xycwh":
+        x_min = bbox[..., 0] - bbox[..., 2] / 2
+        y_min = bbox[..., 1] - bbox[..., 3] / 2
+        x_max = bbox[..., 0] + bbox[..., 2] / 2
+        y_max = bbox[..., 1] + bbox[..., 3] / 2
+    if out_type == "xywh":
+        bbox = torch.stack([x_min, y_min, x_max - x_min, y_max - y_min], dim=-1)
+    elif out_type == "xyxy":
+        bbox = torch.stack([x_min, y_min, x_max, y_max], dim=-1)
+    elif out_type == "xycwh":
+        bbox = torch.stack([(x_min + x_max) / 2, (y_min + y_max) / 2, x_max - x_min, y_max - y_min], dim=-1)
+    return bbox.to(dtype=data_type)
+def make_anchor(image_size: List[int], strides: List[int], device):
+    W, H = image_size
+    anchors = []
+    scaler = []
+    for stride in strides:
+        anchor_num = W // stride * H // stride
+        scaler.append(torch.full((anchor_num,), stride, device=device))
+        shift = stride // 2
+        x = torch.arange(0, W, stride, device=device) + shift
+        y = torch.arange(0, H, stride, device=device) + shift
+        anchor_x, anchor_y = torch.meshgrid(x, y, indexing="ij")
+        anchor = torch.stack([anchor_y.flatten(), anchor_x.flatten()], dim=-1)
+        anchors.append(anchor)
+    all_anchors = torch.cat(anchors, dim=0)
+    all_scalers = torch.cat(scaler, dim=0)
+    return all_anchors, all_scalers
+class BoxMatcher:
+    def __init__(self, cfg: MatcherConfig, class_num: int, anchors: Tensor) -> None:
+        self.class_num = class_num
+        self.anchors = anchors
+        for attr_name in cfg:
+            setattr(self, attr_name, cfg[attr_name])
+    def get_valid_matrix(self, target_bbox: Tensor):
+        """
+        Get a boolean mask that indicates whether each target bounding box overlaps with each anchor.
+        Args:
+            target_bbox [batch x targets x 4]: The bounding box of each targets.
+        Returns:
+            [batch x targets x anchors]: A boolean tensor indicates if target bounding box overlaps with anchors.
+        """
+        Xmin, Ymin, Xmax, Ymax = target_bbox[:, :, None].unbind(3)
+        anchors = self.anchors[None, None]  # add a axis at first, second dimension
+        anchors_x, anchors_y = anchors.unbind(dim=3)
+        target_in_x = (Xmin < anchors_x) & (anchors_x < Xmax)
+        target_in_y = (Ymin < anchors_y) & (anchors_y < Ymax)
+        target_on_anchor = target_in_x & target_in_y
+        return target_on_anchor
+    def get_cls_matrix(self, predict_cls: Tensor, target_cls: Tensor) -> Tensor:
+        """
+        Get the (predicted class' probabilities) corresponding to the target classes across all anchors
+        Args:
+            predict_cls [batch x class x anchors]: The predicted probabilities for each class across each anchor.
+            target_cls [batch x targets]: The class index for each target.
+        Returns:
+            [batch x targets x anchors]: The probabilities from `pred_cls` corresponding to the class indices specified in `target_cls`.
+        """
+        target_cls = target_cls.expand(-1, -1, 8400)
+        predict_cls = predict_cls.transpose(1, 2)
+        cls_probabilities = torch.gather(predict_cls, 1, target_cls)
+        return cls_probabilities
+    def get_iou_matrix(self, predict_bbox, target_bbox) -> Tensor:
+        """
+        Get the IoU between each target bounding box and each predicted bounding box.
+        Args:
+            predict_bbox [batch x predicts x 4]: Bounding box with [x1, y1, x2, y2].
+            target_bbox [batch x targets x 4]: Bounding box with [x1, y1, x2, y2].
+        Returns:
+            [batch x targets x predicts]: The IoU scores between each target and predicted.
+        """
+        return calculate_iou(target_bbox, predict_bbox, self.iou).clamp(0, 1)
+    def filter_topk(self, target_matrix: Tensor, topk: int = 10) -> Tuple[Tensor, Tensor]:
+        """
+        Filter the top-k suitability of targets for each anchor.
+        Args:
+            target_matrix [batch x targets x anchors]: The suitability for each targets-anchors
+            topk (int, optional): Number of top scores to retain per anchor.
+        Returns:
+            topk_targets [batch x targets x anchors]: Only leave the topk targets for each anchor
+            topk_masks [batch x targets x anchors]: A boolean mask indicating the top-k scores' positions.
+        """
+        values, indices = target_matrix.topk(topk, dim=-1)
+        topk_targets = torch.zeros_like(target_matrix, device=target_matrix.device)
+        topk_targets.scatter_(dim=-1, index=indices, src=values)
+        topk_masks = topk_targets > 0
+        return topk_targets, topk_masks
+    def filter_duplicates(self, target_matrix: Tensor):
+        """
+        Filter the maximum suitability target index of each anchor.
+        Args:
+            target_matrix [batch x targets x anchors]: The suitability for each targets-anchors
+        Returns:
+            unique_indices [batch x anchors x 1]: The index of the best targets for each anchors
+        """
+        unique_indices = target_matrix.argmax(dim=1)
+        return unique_indices[..., None]
+    def __call__(self, target: Tensor, predict: Tensor) -> Tuple[Tensor, Tensor]:
+        """
+        1. For each anchor prediction, find the highest suitability targets
+        2. Select the targets
+        2. Noramlize the class probilities of targets
+        """
+        predict_cls, predict_bbox = predict.split(self.class_num, dim=-1)  # B, HW x (C B) -> B x HW x C, B x HW x B
+        target_cls, target_bbox = target.split([1, 4], dim=-1)  # B x N x (C B) -> B x N x C, B x N x B
+        target_cls = target_cls.long()
+        # get valid matrix (each gt appear in which anchor grid)
+        grid_mask = self.get_valid_matrix(target_bbox)
+        # get iou matrix (iou with each gt bbox and each predict anchor)
+        iou_mat = self.get_iou_matrix(predict_bbox, target_bbox)
+        # get cls matrix (cls prob with each gt class and each predict class)
+        cls_mat = self.get_cls_matrix(predict_cls.sigmoid(), target_cls)
+        # TODO: alpha and beta should be set at hydra
+        target_matrix = grid_mask * (iou_mat ** self.factor["iou"]) * (cls_mat ** self.factor["cls"])
+        # choose topk
+        # TODO: topk should be set at hydra
+        topk_targets, topk_mask = self.filter_topk(target_matrix, topk=self.topk)
+        # delete one anchor pred assign to mutliple gts
+        unique_indices = self.filter_duplicates(topk_targets)
+        # TODO: do we need grid_mask? Filter the valid groud truth
+        valid_mask = (grid_mask.sum(dim=-2) * topk_mask.sum(dim=-2)).bool()
+        align_bbox = torch.gather(target_bbox, 1, unique_indices.repeat(1, 1, 4))
+        align_cls = torch.gather(target_cls, 1, unique_indices).squeeze(-1)
+        align_cls = F.one_hot(align_cls, self.class_num)
+        # normalize class ditribution
+        max_target = target_matrix.amax(dim=-1, keepdim=True)
+        max_iou = iou_mat.amax(dim=-1, keepdim=True)
+        normalize_term = (target_matrix / (max_target + 1e-9)) * max_iou
+        normalize_term = normalize_term.permute(0, 2, 1).gather(2, unique_indices)
+        align_cls = align_cls * normalize_term * valid_mask[:, :, None]
+        return torch.cat([align_cls, align_bbox], dim=-1), valid_mask.bool()

utils/loss.py CHANGED Viewed

@@ -1,2 +1,184 @@
 def get_loss_function(*args, **kwargs):
     raise NotImplementedError

+import sys
+import time
+from typing import Any, List
+import numpy as np
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from hydra import main
+from loguru import logger
+from torch import Tensor, nn
+from torch.nn import BCEWithLogitsLoss
+sys.path.append("./")
+from config.config import Config
+from tools.bbox_helper import BoxMatcher, calculate_iou, make_anchor, transform_bbox
 def get_loss_function(*args, **kwargs):
     raise NotImplementedError
+class BCELoss(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.bce = BCEWithLogitsLoss(pos_weight=torch.tensor([1.0], device=torch.device("cuda")), reduction="none")
+    def forward(self, predicts_cls: Tensor, targets_cls: Tensor, cls_norm: Tensor) -> Any:
+        return self.bce(predicts_cls, targets_cls).sum() / cls_norm
+class BoxLoss(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+    def forward(
+        self, predicts_bbox: Tensor, targets_bbox: Tensor, valid_masks: Tensor, box_norm: Tensor, cls_norm: Tensor
+    ) -> Any:
+        valid_bbox = valid_masks[..., None].expand(-1, -1, 4)
+        picked_predict = predicts_bbox[valid_bbox].view(-1, 4)
+        picked_targets = targets_bbox[valid_bbox].view(-1, 4)
+        iou = calculate_iou(picked_predict, picked_targets, "ciou").diag()
+        loss_iou = 1.0 - iou
+        loss_iou = (loss_iou * box_norm).sum() / cls_norm
+        return loss_iou
+class DFLoss(nn.Module):
+    def __init__(self, anchors: Tensor, scaler: Tensor, reg_max: int) -> None:
+        super().__init__()
+        self.anchors = anchors
+        self.scaler = scaler
+        self.reg_max = reg_max
+    def forward(
+        self, predicts_anc: Tensor, targets_bbox: Tensor, valid_masks: Tensor, box_norm: Tensor, cls_norm: Tensor
+    ) -> Any:
+        valid_bbox = valid_masks[..., None].expand(-1, -1, 4)
+        bbox_lt, bbox_rb = targets_bbox.chunk(2, -1)
+        anchors_norm = (self.anchors / self.scaler[:, None])[None]
+        targets_dist = torch.cat(((anchors_norm - bbox_lt), (bbox_rb - anchors_norm)), -1).clamp(0, self.reg_max - 1.01)
+        picked_targets = targets_dist[valid_bbox].view(-1)
+        picked_predict = predicts_anc[valid_bbox].view(-1, self.reg_max)
+        label_left, label_right = picked_targets.floor(), picked_targets.floor() + 1
+        weight_left, weight_right = label_right - picked_targets, picked_targets - label_left
+        loss_left = F.cross_entropy(picked_predict, label_left.to(torch.long), reduction="none")
+        loss_right = F.cross_entropy(picked_predict, label_right.to(torch.long), reduction="none")
+        loss_dfl = loss_left * weight_left + loss_right * weight_right
+        loss_dfl = loss_dfl.view(-1, 4).mean(-1)
+        loss_dfl = (loss_dfl * box_norm).sum() / cls_norm
+        return loss_dfl
+class YOLOLoss:
+    def __init__(self, cfg: Config) -> None:
+        self.reg_max = cfg.model.anchor.reg_max
+        self.class_num = cfg.hyper.data.class_num
+        self.image_size = list(cfg.hyper.data.image_size)
+        self.strides = cfg.model.anchor.strides
+        device = torch.device("cuda")
+        self.reverse_reg = torch.arange(self.reg_max, dtype=torch.float16, device=device)
+        self.scale_up = torch.tensor(self.image_size * 2, device=device)
+        self.anchors, self.scaler = make_anchor(self.image_size, self.strides, device)
+        self.cls = BCELoss()
+        self.dfl = DFLoss(self.anchors, self.scaler, self.reg_max)
+        self.iou = BoxLoss()
+        self.matcher = BoxMatcher(cfg.hyper.train.matcher, self.class_num, self.anchors)
+    def parse_predicts(self, predicts: List[Tensor]) -> Tensor:
+        """
+        args:
+            [B x AnchorClass x h1 x w1, B x AnchorClass x h2 x w2, B x AnchorClass x h3 x w3] // AnchorClass = 4 * 16 + 80
+        return:
+            [B x HW x ClassBbox] // HW = h1*w1 + h2*w2 + h3*w3, ClassBox = 80 + 4 (xyXY)
+        """
+        preds = []
+        for pred in predicts:
+            preds.append(rearrange(pred, "B AC h w -> B (h w) AC"))  # B x AC x h x w-> B x hw x AC
+        preds = torch.concat(preds, dim=1)  # -> B x (H W) x AC
+        preds_anc, preds_cls = torch.split(preds, (self.reg_max * 4, self.class_num), dim=-1)
+        preds_anc = rearrange(preds_anc, "B  hw (P R)-> B hw P R", P=4)
+        pred_LTRB = preds_anc.softmax(dim=-1) @ self.reverse_reg * self.scaler.view(1, -1, 1)
+        lt, rb = pred_LTRB.chunk(2, dim=-1)
+        pred_minXY = self.anchors - lt
+        pred_maxXY = self.anchors + rb
+        predicts = torch.cat([preds_cls, pred_minXY, pred_maxXY], dim=-1)
+        return predicts, preds_anc
+    def parse_targets(self, targets: Tensor, batch_size: int = 16) -> List[Tensor]:
+        """
+        return List:
+        """
+        targets[:, 2:] = transform_bbox(targets[:, 2:], "xycwh -> xyxy") * self.scale_up
+        bbox_num = targets[:, 0].int().bincount()
+        batch_targets = torch.zeros(batch_size, bbox_num.max(), 5, device=targets.device)
+        for instance_idx, bbox_num in enumerate(bbox_num):
+            instance_targets = targets[targets[:, 0] == instance_idx]
+            batch_targets[instance_idx, :bbox_num] = instance_targets[:, 1:].detach()
+        return batch_targets
+    def separate_anchor(self, anchors):
+        """
+        separate anchor and bbouding box
+        """
+        anchors_cls, anchors_box = torch.split(anchors, (self.class_num, 4), dim=-1)
+        anchors_box = anchors_box / self.scaler[None, :, None]
+        return anchors_cls, anchors_box
+    @torch.autocast("cuda")
+    def __call__(self, predicts: List[Tensor], targets: Tensor) -> Tensor:
+        # Batch_Size x (Anchor + Class) x H x W
+        tlist = [time.time()]
+        # TODO: check datatype, why targets has a little bit error with origin version
+        predicts, predicts_anc = self.parse_predicts(predicts[0])
+        targets = self.parse_targets(targets)
+        align_targets, valid_masks = self.matcher(targets, predicts)
+        # calculate loss between with instance and predict
+        targets_cls, targets_bbox = self.separate_anchor(align_targets)
+        predicts_cls, predicts_bbox = self.separate_anchor(predicts)
+        cls_norm = targets_cls.sum()
+        box_norm = targets_cls.sum(-1)[valid_masks]
+        ## -- CLS -- ##
+        loss_cls = self.cls(predicts_cls, targets_cls, cls_norm)
+        ## -- IOU -- ##
+        loss_iou = self.iou(predicts_bbox, targets_bbox, valid_masks, box_norm, cls_norm)
+        ## -- DFL -- ##
+        loss_dfl = self.dfl(predicts_anc, targets_bbox, valid_masks, box_norm, cls_norm)
+        logger.info("Loss IoU: {:.5f}, DFL: {:.5f}, CLS: {:.5f}", loss_iou, loss_dfl, loss_cls)
+        tlist.append(time.time())
+        logger.info(f"Calculate Loss Run Time {np.diff(np.array(tlist)) * 1e3} ms")
+@main(config_path="../config", config_name="config", version_base=None)
+def main(cfg):
+    losser = YOLOLoss(cfg)
+    targets = torch.load("targets.pt")
+    predicts = torch.load("predicts.pt")
+    losser(predicts, targets)
+if __name__ == "__main__":
+    import sys
+    sys.path.append("./")
+    from tools.log_helper import custom_logger
+    custom_logger()
+    main()