| export CUDA_VISIBLE_DEVICES=0 |
|
|
| data_dir=${1:-"en-indic-exp"} |
| model_name=${2:-"ai4bharat/indictrans2-en-indic-dist-200M"} |
| output_dir=${3:-"output"} |
| src_lang_list=${4:-"eng_Latn"} |
| tgt_lang_list=${5:-"asm_Beng,ben_Beng,guj_Gujr,hin_Deva,kan_Knda,mal_Mlym,mar_Deva,npi_Deva,ory_Orya,pan_Guru,tam_Taml,tel_Telu,urd_Arab"} |
|
|
| python3 train_lora.py \ |
| --data_dir $data_dir \ |
| --model_name $model_name \ |
| --output_dir $output_dir \ |
| --src_lang_list $src_lang_list \ |
| --tgt_lang_list $tgt_lang_list \ |
| --save_steps 1000 \ |
| --max_steps 1000000 \ |
| --batch_size 32 \ |
| --grad_accum_steps 4 \ |
| --warmup_steps 4000 \ |
| --max_grad_norm 1.0 \ |
| --learning_rate 2e-4 \ |
| --adam_beta1 0.9 \ |
| --adam_beta2 0.98 \ |
| --optimizer adamw_torch \ |
| --lr_scheduler inverse_sqrt \ |
| --num_workers 16 \ |
| --metric_for_best_model eval_BLEU \ |
| --greater_is_better \ |
| --patience 10 \ |
| --weight_decay 0.01 \ |
| --lora_target_modules "q_proj,k_proj" \ |
| --lora_dropout 0.1 \ |
| --lora_r 16 \ |
| --lora_alpha 32 \ |
| --print_samples |