Spaces:
Sleeping
Sleeping
✨ [Add] dynamic image size loader
Browse files- yolo/config/config.py +1 -0
- yolo/config/task/validation.yaml +2 -1
- yolo/tools/data_augmentation.py +6 -2
- yolo/tools/data_loader.py +25 -3
- yolo/tools/solver.py +2 -2
- yolo/utils/bounding_box_utils.py +8 -1
- yolo/utils/dataset_utils.py +3 -2
- yolo/utils/model_utils.py +7 -3
yolo/config/config.py
CHANGED
|
@@ -59,6 +59,7 @@ class DataConfig:
|
|
| 59 |
image_size: List[int]
|
| 60 |
data_augment: Dict[str, int]
|
| 61 |
source: Optional[Union[str, int]]
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
@dataclass
|
|
|
|
| 59 |
image_size: List[int]
|
| 60 |
data_augment: Dict[str, int]
|
| 61 |
source: Optional[Union[str, int]]
|
| 62 |
+
dynamic_shape: Optional[bool]
|
| 63 |
|
| 64 |
|
| 65 |
@dataclass
|
yolo/config/task/validation.yaml
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
task: validation
|
| 2 |
|
| 3 |
data:
|
| 4 |
-
batch_size:
|
| 5 |
image_size: ${image_size}
|
| 6 |
cpu_num: ${cpu_num}
|
| 7 |
shuffle: False
|
| 8 |
pin_memory: True
|
| 9 |
data_augment: {}
|
|
|
|
| 10 |
nms:
|
| 11 |
min_confidence: 0.0001
|
| 12 |
min_iou: 0.7
|
|
|
|
| 1 |
task: validation
|
| 2 |
|
| 3 |
data:
|
| 4 |
+
batch_size: 32
|
| 5 |
image_size: ${image_size}
|
| 6 |
cpu_num: ${cpu_num}
|
| 7 |
shuffle: False
|
| 8 |
pin_memory: True
|
| 9 |
data_augment: {}
|
| 10 |
+
dynamic_shape: True
|
| 11 |
nms:
|
| 12 |
min_confidence: 0.0001
|
| 13 |
min_iou: 0.7
|
yolo/tools/data_augmentation.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import torch
|
| 3 |
from PIL import Image
|
|
@@ -10,8 +12,7 @@ class AugmentationComposer:
|
|
| 10 |
def __init__(self, transforms, image_size: int = [640, 640]):
|
| 11 |
self.transforms = transforms
|
| 12 |
# TODO: handle List of image_size [640, 640]
|
| 13 |
-
self.
|
| 14 |
-
self.pad_resize = PadAndResize(self.image_size)
|
| 15 |
|
| 16 |
for transform in self.transforms:
|
| 17 |
if hasattr(transform, "set_parent"):
|
|
@@ -57,6 +58,9 @@ class PadAndResize:
|
|
| 57 |
self.target_width, self.target_height = image_size
|
| 58 |
self.background_color = background_color
|
| 59 |
|
|
|
|
|
|
|
|
|
|
| 60 |
def __call__(self, image: Image, boxes):
|
| 61 |
img_width, img_height = image.size
|
| 62 |
scale = min(self.target_width / img_width, self.target_height / img_height)
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
| 5 |
from PIL import Image
|
|
|
|
| 12 |
def __init__(self, transforms, image_size: int = [640, 640]):
|
| 13 |
self.transforms = transforms
|
| 14 |
# TODO: handle List of image_size [640, 640]
|
| 15 |
+
self.pad_resize = PadAndResize(image_size)
|
|
|
|
| 16 |
|
| 17 |
for transform in self.transforms:
|
| 18 |
if hasattr(transform, "set_parent"):
|
|
|
|
| 58 |
self.target_width, self.target_height = image_size
|
| 59 |
self.background_color = background_color
|
| 60 |
|
| 61 |
+
def set_size(self, image_size: List[int]):
|
| 62 |
+
self.target_width, self.target_height = image_size
|
| 63 |
+
|
| 64 |
def __call__(self, image: Image, boxes):
|
| 65 |
img_width, img_height = image.size
|
| 66 |
scale = min(self.target_width / img_width, self.target_height / img_height)
|
yolo/tools/data_loader.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
from queue import Empty, Queue
|
|
|
|
| 3 |
from threading import Event, Thread
|
| 4 |
from typing import Generator, List, Tuple, Union
|
| 5 |
|
|
@@ -28,12 +29,14 @@ class YoloDataset(Dataset):
|
|
| 28 |
augment_cfg = data_cfg.data_augment
|
| 29 |
self.image_size = data_cfg.image_size
|
| 30 |
phase_name = dataset_cfg.get(phase, phase)
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
|
| 33 |
self.transform = AugmentationComposer(transforms, self.image_size)
|
| 34 |
self.transform.get_more_data = self.get_more_data
|
| 35 |
-
img_paths, bboxes = tensorlize(self.load_data(Path(dataset_cfg.path), phase_name))
|
| 36 |
-
self.img_paths, self.bboxes = img_paths, bboxes
|
| 37 |
|
| 38 |
def load_data(self, dataset_path: Path, phase_name: str):
|
| 39 |
"""
|
|
@@ -102,8 +105,13 @@ class YoloDataset(Dataset):
|
|
| 102 |
labels = self.load_valid_labels(image_id, image_seg_annotations)
|
| 103 |
|
| 104 |
img_path = images_path / image_name
|
| 105 |
-
|
|
|
|
|
|
|
| 106 |
valid_inputs += 1
|
|
|
|
|
|
|
|
|
|
| 107 |
logger.info(f"Recorded {valid_inputs}/{len(images_list)} valid inputs")
|
| 108 |
return data
|
| 109 |
|
|
@@ -143,8 +151,22 @@ class YoloDataset(Dataset):
|
|
| 143 |
indices = torch.randint(0, len(self), (num,))
|
| 144 |
return [self.get_data(idx)[:2] for idx in indices]
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
|
| 147 |
img, bboxes, img_path = self.get_data(idx)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
img, bboxes, rev_tensor = self.transform(img, bboxes)
|
| 149 |
bboxes[:, [1, 3]] *= self.image_size[0]
|
| 150 |
bboxes[:, [2, 4]] *= self.image_size[1]
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
from queue import Empty, Queue
|
| 3 |
+
from statistics import mean
|
| 4 |
from threading import Event, Thread
|
| 5 |
from typing import Generator, List, Tuple, Union
|
| 6 |
|
|
|
|
| 29 |
augment_cfg = data_cfg.data_augment
|
| 30 |
self.image_size = data_cfg.image_size
|
| 31 |
phase_name = dataset_cfg.get(phase, phase)
|
| 32 |
+
self.batch_size = data_cfg.batch_size
|
| 33 |
+
self.dynamic_shape = getattr(data_cfg, "dynamic_shape", True)
|
| 34 |
+
self.base_size = mean(self.image_size)
|
| 35 |
|
| 36 |
transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
|
| 37 |
self.transform = AugmentationComposer(transforms, self.image_size)
|
| 38 |
self.transform.get_more_data = self.get_more_data
|
| 39 |
+
self.img_paths, self.bboxes, self.ratios = tensorlize(self.load_data(Path(dataset_cfg.path), phase_name))
|
|
|
|
| 40 |
|
| 41 |
def load_data(self, dataset_path: Path, phase_name: str):
|
| 42 |
"""
|
|
|
|
| 105 |
labels = self.load_valid_labels(image_id, image_seg_annotations)
|
| 106 |
|
| 107 |
img_path = images_path / image_name
|
| 108 |
+
with Image.open(img_path) as img:
|
| 109 |
+
width, height = img.size
|
| 110 |
+
data.append((img_path, labels, width / height))
|
| 111 |
valid_inputs += 1
|
| 112 |
+
|
| 113 |
+
data = sorted(data, key=lambda x: x[2], reverse=True)
|
| 114 |
+
|
| 115 |
logger.info(f"Recorded {valid_inputs}/{len(images_list)} valid inputs")
|
| 116 |
return data
|
| 117 |
|
|
|
|
| 151 |
indices = torch.randint(0, len(self), (num,))
|
| 152 |
return [self.get_data(idx)[:2] for idx in indices]
|
| 153 |
|
| 154 |
+
def _update_image_size(self, idx: int) -> None:
|
| 155 |
+
"""Update image size based on dynamic shape and batch settings."""
|
| 156 |
+
batch_start_idx = (idx // self.batch_size) * self.batch_size
|
| 157 |
+
image_ratio = self.ratios[batch_start_idx]
|
| 158 |
+
|
| 159 |
+
shift = ((self.base_size / 32 * (image_ratio - 1)) // (image_ratio + 1)) * 32
|
| 160 |
+
|
| 161 |
+
self.image_size = [int(self.base_size + shift), int(self.base_size - shift)]
|
| 162 |
+
self.transform.pad_resize.set_size(self.image_size)
|
| 163 |
+
|
| 164 |
def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
|
| 165 |
img, bboxes, img_path = self.get_data(idx)
|
| 166 |
+
|
| 167 |
+
if self.dynamic_shape:
|
| 168 |
+
self._update_image_size(idx)
|
| 169 |
+
|
| 170 |
img, bboxes, rev_tensor = self.transform(img, bboxes)
|
| 171 |
bboxes[:, [1, 3]] *= self.image_size[0]
|
| 172 |
bboxes[:, [2, 4]] *= self.image_size[1]
|
yolo/tools/solver.py
CHANGED
|
@@ -45,7 +45,7 @@ class ValidateModel(BaseModel):
|
|
| 45 |
|
| 46 |
def validation_step(self, batch, batch_idx):
|
| 47 |
batch_size, images, targets, rev_tensor, img_paths = batch
|
| 48 |
-
predicts = self.post_process(self(images))
|
| 49 |
batch_metrics = self.metric(
|
| 50 |
[to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
|
| 51 |
)
|
|
@@ -127,7 +127,7 @@ class InferenceModel(BaseModel):
|
|
| 127 |
|
| 128 |
def predict_step(self, batch, batch_idx):
|
| 129 |
images, rev_tensor, origin_frame = batch
|
| 130 |
-
predicts = self.post_process(self(images), rev_tensor)
|
| 131 |
img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
|
| 132 |
if getattr(self.predict_loader, "is_stream", None):
|
| 133 |
fps = self._display_stream(img)
|
|
|
|
| 45 |
|
| 46 |
def validation_step(self, batch, batch_idx):
|
| 47 |
batch_size, images, targets, rev_tensor, img_paths = batch
|
| 48 |
+
predicts = self.post_process(self(images), image_size=images.shape[2:])
|
| 49 |
batch_metrics = self.metric(
|
| 50 |
[to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
|
| 51 |
)
|
|
|
|
| 127 |
|
| 128 |
def predict_step(self, batch, batch_idx):
|
| 129 |
images, rev_tensor, origin_frame = batch
|
| 130 |
+
predicts = self.post_process(self(images), rev_tensor=rev_tensor)
|
| 131 |
img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
|
| 132 |
if getattr(self.predict_loader, "is_stream", None):
|
| 133 |
fps = self._display_stream(img)
|
yolo/utils/bounding_box_utils.py
CHANGED
|
@@ -122,7 +122,7 @@ def generate_anchors(image_size: List[int], strides: List[int]):
|
|
| 122 |
all_anchors [HW x 2]:
|
| 123 |
all_scalers [HW]: The index of the best targets for each anchors
|
| 124 |
"""
|
| 125 |
-
|
| 126 |
anchors = []
|
| 127 |
scaler = []
|
| 128 |
for stride in strides:
|
|
@@ -308,6 +308,7 @@ class Vec2Box:
|
|
| 308 |
self.strides = self.create_auto_anchor(model, image_size)
|
| 309 |
|
| 310 |
anchor_grid, scaler = generate_anchors(image_size, self.strides)
|
|
|
|
| 311 |
self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
|
| 312 |
|
| 313 |
def create_auto_anchor(self, model: YOLO, image_size):
|
|
@@ -320,7 +321,13 @@ class Vec2Box:
|
|
| 320 |
return strides
|
| 321 |
|
| 322 |
def update(self, image_size):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
anchor_grid, scaler = generate_anchors(image_size, self.strides)
|
|
|
|
| 324 |
self.anchor_grid, self.scaler = anchor_grid.to(self.device), scaler.to(self.device)
|
| 325 |
|
| 326 |
def __call__(self, predicts):
|
|
|
|
| 122 |
all_anchors [HW x 2]:
|
| 123 |
all_scalers [HW]: The index of the best targets for each anchors
|
| 124 |
"""
|
| 125 |
+
H, W = image_size
|
| 126 |
anchors = []
|
| 127 |
scaler = []
|
| 128 |
for stride in strides:
|
|
|
|
| 308 |
self.strides = self.create_auto_anchor(model, image_size)
|
| 309 |
|
| 310 |
anchor_grid, scaler = generate_anchors(image_size, self.strides)
|
| 311 |
+
self.image_size = image_size
|
| 312 |
self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
|
| 313 |
|
| 314 |
def create_auto_anchor(self, model: YOLO, image_size):
|
|
|
|
| 321 |
return strides
|
| 322 |
|
| 323 |
def update(self, image_size):
|
| 324 |
+
"""
|
| 325 |
+
image_size: H, W
|
| 326 |
+
"""
|
| 327 |
+
if self.image_size == image_size:
|
| 328 |
+
return
|
| 329 |
anchor_grid, scaler = generate_anchors(image_size, self.strides)
|
| 330 |
+
self.image_size = image_size
|
| 331 |
self.anchor_grid, self.scaler = anchor_grid.to(self.device), scaler.to(self.device)
|
| 332 |
|
| 333 |
def __call__(self, predicts):
|
yolo/utils/dataset_utils.py
CHANGED
|
@@ -115,7 +115,7 @@ def scale_segmentation(
|
|
| 115 |
|
| 116 |
|
| 117 |
def tensorlize(data):
|
| 118 |
-
img_paths, bboxes = zip(*data)
|
| 119 |
max_box = max(bbox.size(0) for bbox in bboxes)
|
| 120 |
padded_bbox_list = []
|
| 121 |
for bbox in bboxes:
|
|
@@ -124,4 +124,5 @@ def tensorlize(data):
|
|
| 124 |
padded_bbox_list.append(padding)
|
| 125 |
bboxes = np.stack(padded_bbox_list)
|
| 126 |
img_paths = np.array(img_paths)
|
| 127 |
-
|
|
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
def tensorlize(data):
|
| 118 |
+
img_paths, bboxes, img_ratios = zip(*data)
|
| 119 |
max_box = max(bbox.size(0) for bbox in bboxes)
|
| 120 |
padded_bbox_list = []
|
| 121 |
for bbox in bboxes:
|
|
|
|
| 124 |
padded_bbox_list.append(padding)
|
| 125 |
bboxes = np.stack(padded_bbox_list)
|
| 126 |
img_paths = np.array(img_paths)
|
| 127 |
+
img_ratios = np.array(img_ratios)
|
| 128 |
+
return img_paths, bboxes, img_ratios
|
yolo/utils/model_utils.py
CHANGED
|
@@ -11,7 +11,7 @@ from torch.optim.lr_scheduler import LambdaLR, SequentialLR, _LRScheduler
|
|
| 11 |
|
| 12 |
from yolo.config.config import IDX_TO_ID, NMSConfig, OptimizerConfig, SchedulerConfig
|
| 13 |
from yolo.model.yolo import YOLO
|
| 14 |
-
from yolo.utils.bounding_box_utils import bbox_nms, transform_bbox
|
| 15 |
from yolo.utils.logger import logger
|
| 16 |
|
| 17 |
|
|
@@ -130,11 +130,15 @@ class PostProcess:
|
|
| 130 |
scale back the prediction and do nms for pred_bbox
|
| 131 |
"""
|
| 132 |
|
| 133 |
-
def __init__(self, converter, nms_cfg: NMSConfig) -> None:
|
| 134 |
self.converter = converter
|
| 135 |
self.nms = nms_cfg
|
| 136 |
|
| 137 |
-
def __call__(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
prediction = self.converter(predict["Main"])
|
| 139 |
pred_class, _, pred_bbox = prediction[:3]
|
| 140 |
pred_conf = prediction[3] if len(prediction) == 4 else None
|
|
|
|
| 11 |
|
| 12 |
from yolo.config.config import IDX_TO_ID, NMSConfig, OptimizerConfig, SchedulerConfig
|
| 13 |
from yolo.model.yolo import YOLO
|
| 14 |
+
from yolo.utils.bounding_box_utils import Anc2Box, Vec2Box, bbox_nms, transform_bbox
|
| 15 |
from yolo.utils.logger import logger
|
| 16 |
|
| 17 |
|
|
|
|
| 130 |
scale back the prediction and do nms for pred_bbox
|
| 131 |
"""
|
| 132 |
|
| 133 |
+
def __init__(self, converter: Union[Vec2Box, Anc2Box], nms_cfg: NMSConfig) -> None:
|
| 134 |
self.converter = converter
|
| 135 |
self.nms = nms_cfg
|
| 136 |
|
| 137 |
+
def __call__(
|
| 138 |
+
self, predict, rev_tensor: Optional[Tensor] = None, image_size: Optional[List[int]] = None
|
| 139 |
+
) -> List[Tensor]:
|
| 140 |
+
if image_size is not None:
|
| 141 |
+
self.converter.update(image_size)
|
| 142 |
prediction = self.converter(predict["Main"])
|
| 143 |
pred_class, _, pred_bbox = prediction[:3]
|
| 144 |
pred_conf = prediction[3] if len(prediction) == 4 else None
|