|
|
| |
| generator: |
| name: SoundStream |
| config: |
| n_filters: 32 |
| D: 256 |
| |
| target_bandwidths: [0.5, 1, 1.5, 2, 4] |
| ratios: [8, 5, 4, 2] |
| sample_rate: 16000 |
| bins: 1024 |
|
|
| |
| |
| d_list: ['mfd'] |
|
|
| mfd: |
| name: MultiFrequencyDiscriminator |
| config: |
| hop_lengths: [32, 64, 128, 256, 512, 1024] |
| hidden_channels: [64, 128, 256, 512, 512, 512] |
| domain: double |
| mel_scale: true |
| sample_rate: 16000 |
|
|
| mpd: |
| name: MultiPeriodDiscriminator |
| config: |
| period_sizes: [2, 3, 5, 7, 11] |
| period_kernel_size: 5 |
|
|
| msd: |
| name: MultiScaleDiscriminator |
| config: |
| num_scales: 3 |
| pool_kernel_size: 4 |
| pool_stride: 2 |
|
|
| |
| optimizer: |
| g: |
| name: AdamW |
| config: |
| lr: 2e-4 |
| betas: [0.8, 0.99] |
| eps: 1.0e-6 |
|
|
| d: |
| name: AdamW |
| config: |
| lr: 2e-4 |
| betas: [0.8, 0.99] |
| eps: 1.0e-6 |
|
|
| lr_scheduler: |
| g: |
| name: ExponentialLR |
| config: |
| gamma: 0.999 |
| d: |
| name: ExponentialLR |
| config: |
| gamma: 0.999 |
|
|
| |
| criterion: |
| g_criterion: |
| name: losses.generator_loss.GeneratorSTFTLoss |
| config: |
| use_mel_loss: false |
| |
| adv_criterion: MSEGLoss |
| mel_loss_weight: 45 |
| use_feature_match: true |
| feat_match_loss_weight: 20 |
| use_full_stft_loss: true |
| use_sub_stft_loss: true |
| full_stft_loss_weight: 1 |
| sub_stft_loss_weight: 1 |
| mel_scale_loss: |
| sampling_rate: 16000 |
| n_fft: 1024 |
| num_mels: 80 |
| hop_size: 160 |
| win_size: 800 |
| fmin: 0 |
| full_multi_scale_stft_loss: |
| fft_sizes: [512, 1024, 2048] |
| win_sizes: [480, 960, 1200] |
| hop_sizes: [120, 240, 300] |
| sub_multi_scale_stft_loss: |
| num_bands: 6 |
| fft_sizes: [128, 256, 256] |
| win_sizes: [80, 120, 200] |
| hop_sizes: [20, 40, 50] |
| |
| d_criterion: |
| name: losses.discriminator_loss.MSEDiscriminatorLoss |
| config: null |
|
|
| commit_loss_weight: 1. |
|
|
| |
|
|
| seed: 2333 |
| cudnn_deterministic: false |
| tensorboard: true |
| |
| |
| |
|
|
| checkpoint_interval: 5000 |
| summary_interval: 100 |
| validation_interval: 5000 |
|
|
| num_epoches: 5000 |
| print_freq: 10 |
| discriminator_iter_start: 0 |
| num_ckpt_keep: 10 |
|
|
| segment_size: 24000 |
| audio_norm_scale: 1.0 |
| batch_size: 6 |
| num_workers: 8 |
| num_plots: 8 |
|
|