Spaces:
Runtime error
Runtime error
| MODEL: | |
| META_ARCHITECTURE: "OVSegDEMO" | |
| BACKBONE: | |
| FREEZE_AT: 0 | |
| NAME: "D2SwinTransformer" | |
| SWIN: | |
| EMBED_DIM: 128 | |
| DEPTHS: [2, 2, 18, 2] | |
| NUM_HEADS: [4, 8, 16, 32] | |
| WINDOW_SIZE: 12 | |
| APE: False | |
| DROP_PATH_RATE: 0.3 | |
| PATCH_NORM: True | |
| PRETRAIN_IMG_SIZE: 384 | |
| WEIGHTS: "./ovseg_swinbase_vitL14_ft_mpt.pth" | |
| PIXEL_MEAN: [123.675, 116.280, 103.530] | |
| PIXEL_STD: [58.395, 57.120, 57.375] | |
| SEM_SEG_HEAD: | |
| NAME: "OpenVocabMaskFormerHead" | |
| IN_FEATURES: ["res2", "res3", "res4", "res5"] | |
| IGNORE_VALUE: 255 | |
| NUM_CLASSES: 171 # number of categories in training set | |
| EMBEDDING_DIM: 768 | |
| EMBED_LAYERS: 2 | |
| COMMON_STRIDE: 4 # not used, hard-coded | |
| LOSS_WEIGHT: 1.0 | |
| CONVS_DIM: 256 | |
| MASK_DIM: 256 | |
| NORM: "GN" | |
| MASK_FORMER: | |
| TRANSFORMER_IN_FEATURE: "res5" | |
| DEEP_SUPERVISION: True | |
| NO_OBJECT_WEIGHT: 0.1 | |
| DICE_WEIGHT: 1.0 | |
| MASK_WEIGHT: 20.0 | |
| HIDDEN_DIM: 256 | |
| NUM_OBJECT_QUERIES: 100 | |
| NHEADS: 8 | |
| DROPOUT: 0.1 | |
| DIM_FEEDFORWARD: 2048 | |
| ENC_LAYERS: 0 | |
| DEC_LAYERS: 6 | |
| PRE_NORM: False | |
| CLIP_ADAPTER: | |
| TEXT_TEMPLATES: "vild" | |
| CLIP_MODEL_NAME: "ViT-L/14" | |
| MASK_FILL: "mean" | |
| MASK_EXPAND_RATIO: 1.0 | |
| MASK_THR: 0.35 # choose the foreground objects | |
| MASK_MATTING: False # use soft background, default not used | |
| MASK_PROMPT_DEPTH: 3 | |
| MASK_PROMPT_FWD: True # use mask prompt during forward | |
| REGION_RESIZED: True # resize to the input of clip, e.g., 224 | |
| CLIP_ENSEMBLE: True # use ensemble of two classification branches | |
| CLIP_ENSEMBLE_WEIGHT: 0.0 | |
| DATASETS: | |
| TRAIN: ("coco_2017_train_stuff_sem_seg",) | |
| TEST: ("ade20k_sem_seg_val",) | |
| SOLVER: | |
| IMS_PER_BATCH: 32 | |
| BASE_LR: 0.00006 | |
| MAX_ITER: 120000 | |
| WARMUP_FACTOR: 1e-6 | |
| WARMUP_ITERS: 1500 | |
| WEIGHT_DECAY: 0.01 | |
| WEIGHT_DECAY_NORM: 0.0 | |
| WEIGHT_DECAY_EMBED: 0.0 | |
| BACKBONE_MULTIPLIER: 1.0 | |
| TEST_IMS_PER_BATCH: 1 | |
| CLIP_GRADIENTS: | |
| ENABLED: True | |
| CLIP_TYPE: "full_model" | |
| CLIP_VALUE: 0.01 | |
| NORM_TYPE: 2.0 | |
| INPUT: | |
| MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"] | |
| MIN_SIZE_TRAIN_SAMPLING: "choice" | |
| MIN_SIZE_TEST: 640 | |
| MAX_SIZE_TRAIN: 2560 | |
| MAX_SIZE_TEST: 2560 | |
| CROP: | |
| ENABLED: True | |
| TYPE: "absolute" | |
| SIZE: (640, 640) | |
| SINGLE_CATEGORY_MAX_AREA: 1.0 | |
| COLOR_AUG_SSD: True | |
| SIZE_DIVISIBILITY: 640 # used in dataset mapper | |
| FORMAT: "RGB" | |
| TEST: | |
| EVAL_PERIOD: 5000 | |
| AUG: | |
| ENABLED: False | |
| MIN_SIZES: [256, 384, 512, 640, 768, 896] | |
| MAX_SIZE: 3584 | |
| FLIP: True | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: True | |
| NUM_WORKERS: 4 | |
| VERSION: 2 |