MogensR commited on
Commit
f1218c1
·
1 Parent(s): 22b5156

Create Configs/sam2_hiera_l.yaml

Browse files
Files changed (1) hide show
  1. Configs/sam2_hiera_l.yaml +104 -0
Configs/sam2_hiera_l.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package global
2
+ # Model
3
+ model:
4
+ target: sam2.modeling.sam2_base.SAM2Base
5
+ image_encoder:
6
+ target: sam2.modeling.backbones.image_encoder.ImageEncoder
7
+ scalp: 1
8
+ trunk:
9
+ target: sam2.modeling.backbones.hieradet.Hiera
10
+ embed_dim: 144
11
+ num_heads: 2
12
+ stages: [2, 6, 36, 4]
13
+ global_att_blocks: [23, 33, 43]
14
+ window_pos_embed_bkg_spatial_size: [7, 7]
15
+ window_spec: [8, 4, 16, 8]
16
+ neck:
17
+ target: sam2.modeling.backbones.image_encoder.FpnNeck
18
+ position_encoding:
19
+ target: sam2.modeling.position_encoding.PositionEmbeddingSine
20
+ num_pos_feats: 256
21
+ normalize: true
22
+ scale: null
23
+ temperature: 10000
24
+ d_model: 256
25
+ backbone_channel_list: [1152, 576, 288, 144]
26
+ fpn_top_down_levels: [2, 3]
27
+ fpn_interp_model: nearest
28
+ memory_attention:
29
+ target: sam2.modeling.memory_attention.MemoryAttention
30
+ d_model: 256
31
+ pos_enc_at_input: true
32
+ layer:
33
+ target: sam2.modeling.memory_attention.MemoryAttentionLayer
34
+ activation: relu
35
+ dim_feedforward: 2048
36
+ dropout: 0.1
37
+ pos_enc_at_attn: false
38
+ self_attention:
39
+ target: sam2.modeling.sam.transformer.RoPEAttention
40
+ rope_theta: 10000.0
41
+ feat_sizes: [64, 64]
42
+ embedding_dim: 256
43
+ num_heads: 1
44
+ downsample_rate: 1
45
+ dropout: 0.1
46
+ d_model: 256
47
+ pos_enc_at_cross_attn_keys: true
48
+ pos_enc_at_cross_attn_queries: false
49
+ cross_attention:
50
+ target: sam2.modeling.sam.transformer.RoPEAttention
51
+ rope_theta: 10000.0
52
+ feat_sizes: [64, 64]
53
+ rope_k_repeat: true
54
+ embedding_dim: 256
55
+ num_heads: 1
56
+ downsample_rate: 1
57
+ dropout: 0.1
58
+ kv_in_dim: 64
59
+ num_layers: 4
60
+ memory_encoder:
61
+ target: sam2.modeling.memory_encoder.MemoryEncoder
62
+ out_dim: 64
63
+ position_encoding:
64
+ target: sam2.modeling.position_encoding.PositionEmbeddingSine
65
+ num_pos_feats: 64
66
+ normalize: true
67
+ scale: null
68
+ temperature: 10000
69
+ mask_downsampler:
70
+ target: sam2.modeling.memory_encoder.MaskDownSampler
71
+ kernel_size: 3
72
+ stride: 2
73
+ padding: 1
74
+ fuser:
75
+ target: sam2.modeling.memory_encoder.Fuser
76
+ layer:
77
+ target: sam2.modeling.memory_encoder.CXBlock
78
+ dim: 256
79
+ kernel_size: 7
80
+ padding: 3
81
+ layer_scale_init_value: 1e-6
82
+ use_dwconv: true
83
+ num_layers: 2
84
+ num_maskmem: 7
85
+ image_size: 1024
86
+ sigmoid_scale_for_mem_enc: 20.0
87
+ sigmoid_bias_for_mem_enc: -10.0
88
+ use_mask_input_as_output_without_sam: true
89
+ directly_add_no_mem_embed: true
90
+ use_high_res_features_in_sam: true
91
+ multimask_output_in_sam: true
92
+ iou_prediction_use_sigmoid: true
93
+ use_obj_ptrs_in_encoder: true
94
+ add_tpos_enc_to_obj_ptrs: false
95
+ only_obj_ptrs_in_the_past_for_eval: true
96
+ pred_obj_scores: true
97
+ pred_obj_scores_mlp: true
98
+ fixed_no_obj_ptr: true
99
+ multimask_output_for_tracking: true
100
+ use_multimask_token_for_obj_ptr: true
101
+ multimask_min_pt_num: 0
102
+ multimask_max_pt_num: 1
103
+ use_mlp_for_obj_ptr_proj: true
104
+ compile_image_encoder: false