James Zhou
commited on
Commit
·
1062761
1
Parent(s):
764de60
[add] model setting
Browse files- config.yaml +49 -0
config.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_config:
|
| 2 |
+
model_name: HunyuanVideo-Foley-XXL
|
| 3 |
+
model_type: 1d
|
| 4 |
+
model_precision: bf16
|
| 5 |
+
model_kwargs:
|
| 6 |
+
depth_triple_blocks: 18
|
| 7 |
+
depth_single_blocks: 36
|
| 8 |
+
hidden_size: 1536
|
| 9 |
+
num_heads: 12
|
| 10 |
+
mlp_ratio: 4
|
| 11 |
+
mlp_act_type: "gelu_tanh"
|
| 12 |
+
qkv_bias: True
|
| 13 |
+
qk_norm: True
|
| 14 |
+
qk_norm_type: "rms"
|
| 15 |
+
attn_mode: "torch"
|
| 16 |
+
embedder_type: "default"
|
| 17 |
+
interleaved_audio_visual_rope: True
|
| 18 |
+
enable_learnable_empty_visual_feat: True
|
| 19 |
+
sync_modulation: False
|
| 20 |
+
add_sync_feat_to_audio: True
|
| 21 |
+
cross_attention: True
|
| 22 |
+
use_attention_mask: False
|
| 23 |
+
condition_projection: "linear"
|
| 24 |
+
sync_feat_dim: 768 # syncformer 768 dim
|
| 25 |
+
condition_dim: 768 # clap 768 text condition dim (clip-text)
|
| 26 |
+
clip_dim: 768 # siglip2 visual dim
|
| 27 |
+
audio_vae_latent_dim: 128
|
| 28 |
+
audio_frame_rate: 50
|
| 29 |
+
patch_size: 1
|
| 30 |
+
rope_dim_list: null
|
| 31 |
+
rope_theta: 10000
|
| 32 |
+
text_length: 77
|
| 33 |
+
clip_length: 64
|
| 34 |
+
sync_length: 192
|
| 35 |
+
use_mmaudio_singleblock: True
|
| 36 |
+
depth_triple_ssl_encoder: null
|
| 37 |
+
depth_single_ssl_encoder: 8
|
| 38 |
+
use_repa_with_audiossl: True
|
| 39 |
+
|
| 40 |
+
diffusion_config:
|
| 41 |
+
denoise_type: "flow"
|
| 42 |
+
flow_path_type: "linear"
|
| 43 |
+
flow_predict_type: "velocity"
|
| 44 |
+
flow_reverse: True
|
| 45 |
+
flow_solver: "euler"
|
| 46 |
+
sample_flow_shift: 1.0
|
| 47 |
+
sample_use_flux_shift: False
|
| 48 |
+
flux_base_shift: 0.5
|
| 49 |
+
flux_max_shift: 1.15
|