| generator: |
| name: ScalarModel |
| config: |
| num_bands: 1 |
| sample_rate: 24000 |
| causal: true |
| num_samples: 2 |
| downsample_factors: |
| - 2 |
| - 3 |
| - 4 |
| - 4 |
| - 5 |
| downsample_kernel_sizes: |
| - 4 |
| - 6 |
| - 8 |
| - 8 |
| - 10 |
| upsample_factors: |
| - 5 |
| - 4 |
| - 4 |
| - 3 |
| - 2 |
| upsample_kernel_sizes: |
| - 10 |
| - 8 |
| - 8 |
| - 6 |
| - 4 |
| latent_hidden_dim: 136 |
| default_kernel_size: 7 |
| delay_kernel_size: 5 |
| init_channel: 48 |
| res_kernel_size: 7 |
| d_list: |
| - mfd |
| mfd: |
| name: MultiFrequencyDiscriminator |
| config: |
| hop_lengths: |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| hidden_channels: |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 512 |
| - 512 |
| domain: double |
| mel_scale: true |
| sample_rate: 24000 |
| mpd: |
| name: MultiPeriodDiscriminator |
| config: |
| period_sizes: |
| - 2 |
| - 3 |
| - 5 |
| - 7 |
| - 11 |
| period_kernel_size: 5 |
| msd: |
| name: MultiScaleDiscriminator |
| config: |
| num_scales: 3 |
| pool_kernel_size: 4 |
| pool_stride: 2 |
| optimizer: |
| g: |
| name: AdamW |
| config: |
| lr: 0.0002 |
| betas: |
| - 0.8 |
| - 0.99 |
| eps: 1.0e-06 |
| d: |
| name: AdamW |
| config: |
| lr: 0.0002 |
| betas: |
| - 0.8 |
| - 0.99 |
| eps: 1.0e-06 |
| lr_scheduler: |
| g: |
| name: ExponentialLR |
| config: |
| gamma: 0.999 |
| d: |
| name: ExponentialLR |
| config: |
| gamma: 0.999 |
| criterion: |
| g_criterion: |
| name: losses.generator_loss.GeneratorSTFTLoss |
| config: |
| use_mel_loss: false |
| adv_criterion: MSEGLoss |
| mel_loss_weight: 45 |
| use_feature_match: true |
| feat_match_loss_weight: 20 |
| use_full_stft_loss: true |
| use_sub_stft_loss: true |
| full_stft_loss_weight: 1 |
| sub_stft_loss_weight: 1 |
| mel_scale_loss: |
| sampling_rate: 24000 |
| n_fft: 1024 |
| num_mels: 80 |
| hop_size: 160 |
| win_size: 800 |
| fmin: 0 |
| full_multi_scale_stft_loss: |
| fft_sizes: |
| - 512 |
| - 1024 |
| - 2048 |
| win_sizes: |
| - 480 |
| - 960 |
| - 1200 |
| hop_sizes: |
| - 120 |
| - 240 |
| - 300 |
| sub_multi_scale_stft_loss: |
| num_bands: 6 |
| fft_sizes: |
| - 128 |
| - 256 |
| - 256 |
| win_sizes: |
| - 80 |
| - 120 |
| - 200 |
| hop_sizes: |
| - 20 |
| - 40 |
| - 50 |
| d_criterion: |
| name: losses.discriminator_loss.MSEDiscriminatorLoss |
| config: null |
| commit_loss_weight: 1.0 |
| training_file: /home/ydc/code2/ScalartTokenizer16k_m36/data/train_v2.scp |
| validation_file: /home/ydc/code2/ScalartTokenizer16k_m36/data/val.scp |
| seed: 2333 |
| cudnn_deterministic: false |
| tensorboard: true |
| checkpoint_interval: 5000 |
| summary_interval: 100 |
| validation_interval: 5000 |
| num_epoches: 50 |
| print_freq: 10 |
| discriminator_iter_start: 0 |
| num_ckpt_keep: 10 |
| segment_size: 48000 |
| audio_norm_scale: 0.95 |
| batch_size: 16 |
| num_workers: 4 |
| num_plots: 8 |
| local_rank: -1 |
| basic_model_config: config/scalar24k_64dim.yaml |
| exp_model_config: null |
| log_dir: /data9/ydc/exp/s_codec_24k_136dim_scale9_25hz |
| hop_length: 2000 |
| ngpus_per_node: 4 |
| sample_rate: 24000 |
| model_ckpt_dir: /data9/ydc/exp/s_codec_24k_136dim_scale9_25hz/model_ckpts |
|
|