VideoCoF / config /wan2.2 /wan_civitai_t2v.yaml
XiangpengYang's picture
first commit
42a2bfa
format: civitai
pipeline: Wan
transformer_additional_kwargs:
transformer_low_noise_model_subpath: ./low_noise_model
transformer_high_noise_model_subpath: ./high_noise_model
transformer_combination_type: "moe"
boundary: 0.875
dict_mapping:
in_dim: in_channels
dim: hidden_size
vae_kwargs:
vae_type: "AutoencoderKLWan"
vae_subpath: Wan2.1_VAE.pth
temporal_compression_ratio: 4
spatial_compression_ratio: 8
text_encoder_kwargs:
text_encoder_subpath: models_t5_umt5-xxl-enc-bf16.pth
tokenizer_subpath: google/umt5-xxl
text_length: 512
vocab: 256384
dim: 4096
dim_attn: 4096
dim_ffn: 10240
num_heads: 64
num_layers: 24
num_buckets: 32
shared_pos: False
dropout: 0.0
scheduler_kwargs:
scheduler_subpath: null
num_train_timesteps: 1000
shift: 12.0
use_dynamic_shifting: false
base_shift: 0.5
max_shift: 1.15
base_image_seq_len: 256
max_image_seq_len: 4096
image_encoder_kwargs:
image_encoder_subpath: models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth