| { |
| "name": "default_test_config", |
| "n_gpu": 1, |
| "text_encoder": { |
| "type": "CTCCharTextEncoder", |
| "args": { |
| "kenlm_model_path": "hw_asr/text_encoder/lower_3-gram.arpa", |
| "unigrams_path": "hw_asr/text_encoder/librispeech-fixed-vocab.txt" |
| } |
| }, |
| "preprocessing": { |
| "sr": 16000, |
| "spectrogram": { |
| "type": "MelSpectrogram", |
| "args": { |
| "n_mels": 256 |
| } |
| }, |
| "log_spec": true |
| }, |
| "augmentations": { |
| "random_apply_p": 0.6, |
| "wave": [ |
| { |
| "type": "AddColoredNoise", |
| "args": { |
| "p": 1, |
| "sample_rate": 16000 |
| } |
| }, |
| { |
| "type": "Gain", |
| "args": { |
| "p": 0.8, |
| "sample_rate": 16000 |
| } |
| }, |
| { |
| "type": "HighPassFilter", |
| "args": { |
| "p": 0, |
| "sample_rate": 16000 |
| } |
| }, |
| { |
| "type": "LowPassFilter", |
| "args": { |
| "p": 0, |
| "sample_rate": 16000 |
| } |
| }, |
| { |
| "type": "PitchShift", |
| "args": { |
| "p": 0.8, |
| "min_transpose_semitones": -2, |
| "max_transpose_semitones": 2, |
| "sample_rate": 16000 |
| } |
| }, |
| { |
| "type": "PolarityInversion", |
| "args": { |
| "p": 0.8, |
| "sample_rate": 16000 |
| } |
| }, |
| { |
| "type": "Shift", |
| "args": { |
| "p": 0.8, |
| "sample_rate": 16000 |
| } |
| } |
| ], |
| "spectrogram": [ |
| { |
| "type": "TimeMasking", |
| "args": { |
| "time_mask_param": 80, |
| "p": 0.05 |
| } |
| }, |
| { |
| "type": "FrequencyMasking", |
| "args": { |
| "freq_mask_param": 80 |
| } |
| } |
| ] |
| }, |
| "arch": { |
| "type": "DeepSpeech2Model", |
| "args": { |
| "n_feats": 256, |
| "n_rnn_layers": 6, |
| "rnn_hidden_size": 512, |
| "rnn_dropout": 0.2 |
| } |
| }, |
| "data": { |
| "test": { |
| "batch_size": 64, |
| "num_workers": 4, |
| "datasets": [ |
| { |
| "type": "LibrispeechDataset", |
| "args": { |
| "part": "test-other" |
| } |
| } |
| ] |
| } |
| }, |
| "optimizer": { |
| "type": "AdamW", |
| "args": { |
| "lr": 0.0003, |
| "weight_decay": 1e-05 |
| } |
| }, |
| "loss": { |
| "type": "CTCLoss", |
| "args": {} |
| }, |
| "metrics": [ |
| { |
| "type": "ArgmaxWERMetric", |
| "args": { |
| "name": "WER (argmax)" |
| } |
| }, |
| { |
| "type": "ArgmaxCERMetric", |
| "args": { |
| "name": "CER (argmax)" |
| } |
| }, |
| { |
| "type": "BeamSearchWERMetric", |
| "args": { |
| "beam_size": 4, |
| "name": "WER (beam search)" |
| } |
| }, |
| { |
| "type": "BeamSearchCERMetric", |
| "args": { |
| "beam_size": 4, |
| "name": "CER (beam search)" |
| } |
| }, |
| { |
| "type": "LanguageModelWERMetric", |
| "args": { |
| "name": "WER (LM)" |
| } |
| }, |
| { |
| "type": "LanguageModelCERMetric", |
| "args": { |
| "name": "CER (LM)" |
| } |
| } |
| ], |
| "lr_scheduler": { |
| "type": "OneCycleLR", |
| "args": { |
| "steps_per_epoch": 1000, |
| "epochs": 50, |
| "anneal_strategy": "cos", |
| "max_lr": 0.0003, |
| "pct_start": 0.1 |
| } |
| }, |
| "trainer": { |
| "epochs": 50, |
| "save_dir": "saved/", |
| "save_period": 5, |
| "verbosity": 2, |
| "monitor": "min val_loss", |
| "early_stop": 100, |
| "visualize": "wandb", |
| "wandb_project": "asr_project", |
| "len_epoch": 1000, |
| "grad_norm_clip": 10 |
| } |
| } |