MCG-NJU
/

SAM2-Plus

Transformers

Model card Files Files and versions

xet

Community

jiamingZ commited on 5 days ago

Commit

c108889

verified ·

1 Parent(s): 31140fc

Upload config.json

Browse files

Files changed (1) hide show

config.json +137 -0

config.json ADDED Viewed

	@@ -0,0 +1,137 @@

+{
+    "model": {
+        "_target_": "sam2.modeling.sam2_base.SAM2Plus",
+        "image_encoder": {
+            "_target_": "sam2.modeling.backbones.image_encoder.ImageEncoder",
+            "scalp": 1,
+            "trunk": {
+                "_target_": "sam2.modeling.backbones.hieradet.Hiera",
+                "embed_dim": 112,
+                "num_heads": 2
+            },
+            "neck": {
+                "_target_": "sam2.modeling.backbones.image_encoder.FpnNeck",
+                "position_encoding": {
+                    "_target_": "sam2.modeling.position_encoding.PositionEmbeddingSine",
+                    "num_pos_feats": 256,
+                    "normalize": true,
+                    "scale": null,
+                    "temperature": 10000
+                },
+                "d_model": 256,
+                "backbone_channel_list": [
+                    896,
+                    448,
+                    224,
+                    112
+                ],
+                "fpn_top_down_levels": [
+                    2,
+                    3
+                ],
+                "fpn_interp_model": "nearest"
+            }
+        },
+        "memory_attention": {
+            "_target_": "sam2.modeling.memory_attention.MemoryAttention",
+            "d_model": 256,
+            "pos_enc_at_input": true,
+            "layer": {
+                "_target_": "sam2.modeling.memory_attention.MemoryAttentionLayer",
+                "activation": "relu",
+                "dim_feedforward": 2048,
+                "dropout": 0.1,
+                "pos_enc_at_attn": false,
+                "self_attention": {
+                    "_target_": "sam2.modeling.sam.transformer.RoPEAttention",
+                    "rope_theta": 10000.0,
+                    "feat_sizes": [
+                        64,
+                        64
+                    ],
+                    "embedding_dim": 256,
+                    "num_heads": 1,
+                    "downsample_rate": 1,
+                    "dropout": 0.1
+                },
+                "d_model": 256,
+                "pos_enc_at_cross_attn_keys": true,
+                "pos_enc_at_cross_attn_queries": false,
+                "cross_attention": {
+                    "_target_": "sam2.modeling.sam.transformer.RoPEAttention",
+                    "rope_theta": 10000.0,
+                    "feat_sizes": [
+                        64,
+                        64
+                    ],
+                    "rope_k_repeat": true,
+                    "embedding_dim": 256,
+                    "num_heads": 1,
+                    "downsample_rate": 1,
+                    "dropout": 0.1,
+                    "kv_in_dim": 64
+                }
+            },
+            "num_layers": 4
+        },
+        "memory_encoder": {
+            "_target_": "sam2.modeling.memory_encoder.MemoryEncoder",
+            "out_dim": 64,
+            "position_encoding": {
+                "_target_": "sam2.modeling.position_encoding.PositionEmbeddingSine",
+                "num_pos_feats": 64,
+                "normalize": true,
+                "scale": null,
+                "temperature": 10000
+            },
+            "mask_downsampler": {
+                "_target_": "sam2.modeling.memory_encoder.MaskDownSampler",
+                "kernel_size": 3,
+                "stride": 2,
+                "padding": 1
+            },
+            "fuser": {
+                "_target_": "sam2.modeling.memory_encoder.Fuser",
+                "layer": {
+                    "_target_": "sam2.modeling.memory_encoder.CXBlock",
+                    "dim": 256,
+                    "kernel_size": 7,
+                    "padding": 3,
+                    "layer_scale_init_value": "1e-6",
+                    "use_dwconv": true
+                },
+                "num_layers": 2
+            }
+        },
+        "num_maskmem": 7,
+        "image_size": 1024,
+        "sigmoid_scale_for_mem_enc": 20.0,
+        "sigmoid_bias_for_mem_enc": -10.0,
+        "use_mask_input_as_output_without_sam": true,
+        "directly_add_no_mem_embed": true,
+        "no_obj_embed_spatial": true,
+        "use_high_res_features_in_sam": true,
+        "separate_image_encoder": false,
+        "separate_memory_attention": true,
+        "separate_memory_encoder": true,
+        "unified_decoder_box_head_freeze_bn": true,
+        "unified_decoder_box_head_inner_dim": 256,
+        "unified_decoder_box_head_pred_masks": true,
+        "multimask_output_in_sam": true,
+        "iou_prediction_use_sigmoid": true,
+        "use_obj_ptrs_in_encoder": true,
+        "add_tpos_enc_to_obj_ptrs": true,
+        "proj_tpos_enc_in_obj_ptrs": true,
+        "use_signed_tpos_enc_to_obj_ptrs": true,
+        "only_obj_ptrs_in_the_past_for_eval": true,
+        "pred_obj_scores": true,
+        "pred_obj_scores_mlp": true,
+        "fixed_no_obj_ptr": true,
+        "multimask_output_for_tracking": true,
+        "use_multimask_token_for_obj_ptr": true,
+        "multimask_min_pt_num": 0,
+        "multimask_max_pt_num": 1,
+        "use_mlp_for_obj_ptr_proj": true,
+        "compile_image_encoder": false
+    }
+}