|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from pydantic import BaseModel |
|
|
from typing import List, Optional, Literal, Type |
|
|
import torch |
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" |
|
|
|
|
|
class ModelConfig(BaseModel): |
|
|
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] |
|
|
out_feature_indexes: List[int] |
|
|
dec_layers: int |
|
|
two_stage: bool = True |
|
|
projector_scale: List[Literal["P3", "P4", "P5"]] |
|
|
hidden_dim: int |
|
|
patch_size: int |
|
|
num_windows: int |
|
|
sa_nheads: int |
|
|
ca_nheads: int |
|
|
dec_n_points: int |
|
|
bbox_reparam: bool = True |
|
|
lite_refpoint_refine: bool = True |
|
|
layer_norm: bool = True |
|
|
amp: bool = True |
|
|
num_classes: int = 90 |
|
|
pretrain_weights: Optional[str] = None |
|
|
device: Literal["cpu", "cuda", "mps"] = DEVICE |
|
|
resolution: int |
|
|
group_detr: int = 13 |
|
|
gradient_checkpointing: bool = False |
|
|
positional_encoding_size: int |
|
|
|
|
|
class RFDETRBaseConfig(ModelConfig): |
|
|
""" |
|
|
The configuration for an RF-DETR Base model. |
|
|
""" |
|
|
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_small" |
|
|
hidden_dim: int = 256 |
|
|
patch_size: int = 14 |
|
|
num_windows: int = 4 |
|
|
dec_layers: int = 3 |
|
|
sa_nheads: int = 8 |
|
|
ca_nheads: int = 16 |
|
|
dec_n_points: int = 2 |
|
|
num_queries: int = 300 |
|
|
num_select: int = 300 |
|
|
projector_scale: List[Literal["P3", "P4", "P5"]] = ["P4"] |
|
|
out_feature_indexes: List[int] = [2, 5, 8, 11] |
|
|
pretrain_weights: Optional[str] = "rf-detr-base.pth" |
|
|
resolution: int = 560 |
|
|
positional_encoding_size: int = 37 |
|
|
|
|
|
class RFDETRLargeConfig(RFDETRBaseConfig): |
|
|
""" |
|
|
The configuration for an RF-DETR Large model. |
|
|
""" |
|
|
encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_base" |
|
|
hidden_dim: int = 384 |
|
|
sa_nheads: int = 12 |
|
|
ca_nheads: int = 24 |
|
|
dec_n_points: int = 4 |
|
|
projector_scale: List[Literal["P3", "P4", "P5"]] = ["P3", "P5"] |
|
|
pretrain_weights: Optional[str] = "rf-detr-large.pth" |
|
|
|
|
|
class RFDETRNanoConfig(RFDETRBaseConfig): |
|
|
""" |
|
|
The configuration for an RF-DETR Nano model. |
|
|
""" |
|
|
out_feature_indexes: List[int] = [3, 6, 9, 12] |
|
|
num_windows: int = 2 |
|
|
dec_layers: int = 2 |
|
|
patch_size: int = 16 |
|
|
resolution: int = 384 |
|
|
positional_encoding_size: int = 24 |
|
|
pretrain_weights: Optional[str] = "rf-detr-nano.pth" |
|
|
|
|
|
class RFDETRSmallConfig(RFDETRBaseConfig): |
|
|
""" |
|
|
The configuration for an RF-DETR Small model. |
|
|
""" |
|
|
out_feature_indexes: List[int] = [3, 6, 9, 12] |
|
|
num_windows: int = 2 |
|
|
dec_layers: int = 3 |
|
|
patch_size: int = 16 |
|
|
resolution: int = 512 |
|
|
positional_encoding_size: int = 32 |
|
|
pretrain_weights: Optional[str] = "rf-detr-small.pth" |
|
|
|
|
|
class RFDETRMediumConfig(RFDETRBaseConfig): |
|
|
""" |
|
|
The configuration for an RF-DETR Medium model. |
|
|
""" |
|
|
out_feature_indexes: List[int] = [3, 6, 9, 12] |
|
|
num_windows: int = 2 |
|
|
dec_layers: int = 4 |
|
|
patch_size: int = 16 |
|
|
resolution: int = 576 |
|
|
positional_encoding_size: int = 36 |
|
|
pretrain_weights: Optional[str] = "rf-detr-medium.pth" |
|
|
|
|
|
class TrainConfig(BaseModel): |
|
|
lr: float = 1e-4 |
|
|
lr_encoder: float = 1.5e-4 |
|
|
batch_size: int = 4 |
|
|
grad_accum_steps: int = 4 |
|
|
epochs: int = 100 |
|
|
ema_decay: float = 0.993 |
|
|
ema_tau: int = 100 |
|
|
lr_drop: int = 100 |
|
|
checkpoint_interval: int = 10 |
|
|
warmup_epochs: int = 0 |
|
|
lr_vit_layer_decay: float = 0.8 |
|
|
lr_component_decay: float = 0.7 |
|
|
drop_path: float = 0.0 |
|
|
group_detr: int = 13 |
|
|
ia_bce_loss: bool = True |
|
|
cls_loss_coef: float = 1.0 |
|
|
num_select: int = 300 |
|
|
dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow" |
|
|
square_resize_div_64: bool = True |
|
|
dataset_dir: str |
|
|
output_dir: str = "output" |
|
|
multi_scale: bool = True |
|
|
expanded_scales: bool = True |
|
|
do_random_resize_via_padding: bool = False |
|
|
use_ema: bool = True |
|
|
num_workers: int = 2 |
|
|
weight_decay: float = 1e-4 |
|
|
early_stopping: bool = False |
|
|
early_stopping_patience: int = 10 |
|
|
early_stopping_min_delta: float = 0.001 |
|
|
early_stopping_use_ema: bool = False |
|
|
tensorboard: bool = True |
|
|
wandb: bool = False |
|
|
project: Optional[str] = None |
|
|
run: Optional[str] = None |
|
|
class_names: List[str] = None |
|
|
run_test: bool = True |
|
|
|