{ "n_sample": 0, "tgt_vocab_size": 32000, "tgt_vocab": "en.eole.vocab", "tensorboard_log_dir_dated": "tensorboard/Nov-24_22-33-35", "valid_metrics": [ "BLEU" ], "src_vocab_size": 32000, "save_data": "data", "share_vocab": false, "overwrite": true, "report_every": 100, "tensorboard": true, "seed": 1234, "src_vocab": "is.eole.vocab", "vocab_size_multiple": 8, "tensorboard_log_dir": "tensorboard", "transforms": [ "sentencepiece", "filtertoolong" ], "training": { "warmup_steps": 5000, "label_smoothing": 0.1, "attention_dropout": [ 0.1 ], "decay_method": "noam", "model_path": "quickmt-is-en-eole-model", "compute_dtype": "torch.float16", "dropout": [ 0.1 ], "normalization": "tokens", "dropout_steps": [ 0 ], "param_init_method": "xavier_uniform", "train_steps": 100000, "adam_beta2": 0.998, "max_grad_norm": 0.0, "batch_type": "tokens", "accum_count": [ 20 ], "learning_rate": 3.0, "num_workers": 0, "accum_steps": [ 0 ], "bucket_size": 128000, "average_decay": 0.0001, "batch_size": 6000, "gpu_ranks": [ 0 ], "prefetch_factor": 32, "save_checkpoint_steps": 5000, "world_size": 1, "optim": "adamw", "keep_checkpoint": 4, "batch_size_multiple": 8, "valid_batch_size": 2048, "valid_steps": 5000 }, "transforms_configs": { "sentencepiece": { "src_subword_model": "${MODEL_PATH}/is.spm.model", "tgt_subword_model": "${MODEL_PATH}/en.spm.model" }, "filtertoolong": { "src_seq_length": 256, "tgt_seq_length": 256 } }, "data": { "corpus_1": { "weight": 9, "path_src": "train.is", "path_tgt": "train.en", "path_align": null, "transforms": [ "sentencepiece", "filtertoolong" ] }, "corpus_2": { "weight": 5, "path_src": "/home/mark/mt/data/newscrawl.backtrans.is", "path_tgt": "/home/mark/mt/data/newscrawl.2024.en", "path_align": null, "transforms": [ "sentencepiece", "filtertoolong" ] }, "valid": { "path_src": "valid.is", "path_tgt": "valid.en", "path_align": null, "transforms": [ "sentencepiece", "filtertoolong" ] } }, "model": { "position_encoding_type": "SinusoidalInterleaved", "hidden_size": 1024, "architecture": "transformer", "share_decoder_embeddings": true, "heads": 8, "share_embeddings": false, "transformer_ff": 4096, "encoder": { "position_encoding_type": "SinusoidalInterleaved", "hidden_size": 1024, "n_positions": null, "layers": 8, "src_word_vec_size": 1024, "encoder_type": "transformer", "heads": 8, "transformer_ff": 4096 }, "embeddings": { "position_encoding_type": "SinusoidalInterleaved", "tgt_word_vec_size": 1024, "src_word_vec_size": 1024, "word_vec_size": 1024 }, "decoder": { "position_encoding_type": "SinusoidalInterleaved", "hidden_size": 1024, "n_positions": null, "layers": 2, "tgt_word_vec_size": 1024, "decoder_type": "transformer", "heads": 8, "transformer_ff": 4096 } } }