Transformers
jiamingZ commited on
Commit
c108889
·
verified ·
1 Parent(s): 31140fc

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +137 -0
config.json ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "_target_": "sam2.modeling.sam2_base.SAM2Plus",
4
+ "image_encoder": {
5
+ "_target_": "sam2.modeling.backbones.image_encoder.ImageEncoder",
6
+ "scalp": 1,
7
+ "trunk": {
8
+ "_target_": "sam2.modeling.backbones.hieradet.Hiera",
9
+ "embed_dim": 112,
10
+ "num_heads": 2
11
+ },
12
+ "neck": {
13
+ "_target_": "sam2.modeling.backbones.image_encoder.FpnNeck",
14
+ "position_encoding": {
15
+ "_target_": "sam2.modeling.position_encoding.PositionEmbeddingSine",
16
+ "num_pos_feats": 256,
17
+ "normalize": true,
18
+ "scale": null,
19
+ "temperature": 10000
20
+ },
21
+ "d_model": 256,
22
+ "backbone_channel_list": [
23
+ 896,
24
+ 448,
25
+ 224,
26
+ 112
27
+ ],
28
+ "fpn_top_down_levels": [
29
+ 2,
30
+ 3
31
+ ],
32
+ "fpn_interp_model": "nearest"
33
+ }
34
+ },
35
+ "memory_attention": {
36
+ "_target_": "sam2.modeling.memory_attention.MemoryAttention",
37
+ "d_model": 256,
38
+ "pos_enc_at_input": true,
39
+ "layer": {
40
+ "_target_": "sam2.modeling.memory_attention.MemoryAttentionLayer",
41
+ "activation": "relu",
42
+ "dim_feedforward": 2048,
43
+ "dropout": 0.1,
44
+ "pos_enc_at_attn": false,
45
+ "self_attention": {
46
+ "_target_": "sam2.modeling.sam.transformer.RoPEAttention",
47
+ "rope_theta": 10000.0,
48
+ "feat_sizes": [
49
+ 64,
50
+ 64
51
+ ],
52
+ "embedding_dim": 256,
53
+ "num_heads": 1,
54
+ "downsample_rate": 1,
55
+ "dropout": 0.1
56
+ },
57
+ "d_model": 256,
58
+ "pos_enc_at_cross_attn_keys": true,
59
+ "pos_enc_at_cross_attn_queries": false,
60
+ "cross_attention": {
61
+ "_target_": "sam2.modeling.sam.transformer.RoPEAttention",
62
+ "rope_theta": 10000.0,
63
+ "feat_sizes": [
64
+ 64,
65
+ 64
66
+ ],
67
+ "rope_k_repeat": true,
68
+ "embedding_dim": 256,
69
+ "num_heads": 1,
70
+ "downsample_rate": 1,
71
+ "dropout": 0.1,
72
+ "kv_in_dim": 64
73
+ }
74
+ },
75
+ "num_layers": 4
76
+ },
77
+ "memory_encoder": {
78
+ "_target_": "sam2.modeling.memory_encoder.MemoryEncoder",
79
+ "out_dim": 64,
80
+ "position_encoding": {
81
+ "_target_": "sam2.modeling.position_encoding.PositionEmbeddingSine",
82
+ "num_pos_feats": 64,
83
+ "normalize": true,
84
+ "scale": null,
85
+ "temperature": 10000
86
+ },
87
+ "mask_downsampler": {
88
+ "_target_": "sam2.modeling.memory_encoder.MaskDownSampler",
89
+ "kernel_size": 3,
90
+ "stride": 2,
91
+ "padding": 1
92
+ },
93
+ "fuser": {
94
+ "_target_": "sam2.modeling.memory_encoder.Fuser",
95
+ "layer": {
96
+ "_target_": "sam2.modeling.memory_encoder.CXBlock",
97
+ "dim": 256,
98
+ "kernel_size": 7,
99
+ "padding": 3,
100
+ "layer_scale_init_value": "1e-6",
101
+ "use_dwconv": true
102
+ },
103
+ "num_layers": 2
104
+ }
105
+ },
106
+ "num_maskmem": 7,
107
+ "image_size": 1024,
108
+ "sigmoid_scale_for_mem_enc": 20.0,
109
+ "sigmoid_bias_for_mem_enc": -10.0,
110
+ "use_mask_input_as_output_without_sam": true,
111
+ "directly_add_no_mem_embed": true,
112
+ "no_obj_embed_spatial": true,
113
+ "use_high_res_features_in_sam": true,
114
+ "separate_image_encoder": false,
115
+ "separate_memory_attention": true,
116
+ "separate_memory_encoder": true,
117
+ "unified_decoder_box_head_freeze_bn": true,
118
+ "unified_decoder_box_head_inner_dim": 256,
119
+ "unified_decoder_box_head_pred_masks": true,
120
+ "multimask_output_in_sam": true,
121
+ "iou_prediction_use_sigmoid": true,
122
+ "use_obj_ptrs_in_encoder": true,
123
+ "add_tpos_enc_to_obj_ptrs": true,
124
+ "proj_tpos_enc_in_obj_ptrs": true,
125
+ "use_signed_tpos_enc_to_obj_ptrs": true,
126
+ "only_obj_ptrs_in_the_past_for_eval": true,
127
+ "pred_obj_scores": true,
128
+ "pred_obj_scores_mlp": true,
129
+ "fixed_no_obj_ptr": true,
130
+ "multimask_output_for_tracking": true,
131
+ "use_multimask_token_for_obj_ptr": true,
132
+ "multimask_min_pt_num": 0,
133
+ "multimask_max_pt_num": 1,
134
+ "use_mlp_for_obj_ptr_proj": true,
135
+ "compile_image_encoder": false
136
+ }
137
+ }