| pretrained_model_path: /content/AnimateDiff-SceneFusion/models/StableDiffusion/ |
| output_dir: /content/AnimateDiff-SceneFusion/models/Motion_Module/temp |
| train_data: |
| video_folder: data/output |
| csv_path: data/output.csv |
| sample_n_frames: 5 |
| sample_stride: 100 |
| sample_size: 256 |
| validation_data: |
| prompts: |
| - camera panning right to left, a bird's eye view of a row of buildings in a city |
| with trees in the foreground, masterpiece, best quality |
| video_length: 5 |
| width: 256 |
| height: 256 |
| num_inference_steps: 20 |
| guidance_scale: 12.5 |
| temporal_context: 24 |
| use_inv_latent: true |
| num_inv_steps: 50 |
| validation_steps: 1 |
| train_whole_module: false |
| trainable_modules: |
| - to_q |
| train_batch_size: 1 |
| max_train_steps: 36 |
| learning_rate: 0.0003 |
| scale_lr: false |
| lr_scheduler: constant |
| lr_warmup_steps: 0 |
| adam_beta1: 0.9 |
| adam_beta2: 0.999 |
| adam_weight_decay: 0.01 |
| adam_epsilon: 1.0e-08 |
| max_grad_norm: 1.0 |
| gradient_accumulation_steps: 1 |
| gradient_checkpointing: true |
| checkpointing_steps: 100 |
| start_global_step: 0 |
| resume_from_checkpoint: null |
| mixed_precision: fp16 |
| use_8bit_adam: false |
| enable_xformers_memory_efficient_attention: true |
| seed: 33 |
| motion_module: /content/AnimateDiff-SceneFusion/models/Motion_Module/mm_sd_v15.ckpt |
| inference_config_path: configs/inference/inference-v3.yaml |
| motion_module_pe_multiplier: 1 |
| dataset_class: MultiTuneAVideoDataset |
| image_finetune: false |
| name: scenefusion |
| use_wandb: true |
| launcher: launcher |
| cfg_random_null_text: true |
| cfg_random_null_text_ratio: 0.1 |
| unet_checkpoint_path: '' |
| unet_additional_kwargs: {} |
| ema_decay: 0.9999 |
| noise_scheduler_kwargs: null |
| max_train_epoch: -1 |
| validation_steps_tuple: |
| - -1 |
| num_workers: 32 |
| checkpointing_epochs: 5 |
| mixed_precision_training: true |
| global_seed: 42 |
| is_debug: false |
|
|