| preferences=( | |
| "0.0,0.0,1.0" | |
| ) | |
| # "0.1,0.8,0.1" | |
| # "0.1,0.1,0.8" | |
| # "0.8,0.1,0.1" | |
| # "1.0,0.0,0.0" | |
| # "0.0,0.0,1.0" | |
| # "0.0,1.0,0.0" | |
| gpu=1,2,3,4 | |
| ckpt_dir=./ckpt_morlhf | |
| general_wandb_name='morlhf_llamma3_lora' | |
| for preference in "${preferences[@]}" | |
| do | |
| preference_str=$(echo "$preference" | tr ',' '_') | |
| wandb_name=${general_wandb_name}-${preference_str}-load | |
| mkdir -p ${ckpt_dir}/${wandb_name} | |
| CUDA_VISIBLE_DEVICES=${gpu} accelerate launch morlhf-llama3.py \ | |
| --mini_batch_size 4 \ | |
| --epochs 3 \ | |
| --init_kl_coef 0.8 \ | |
| --save_directory ${ckpt_dir} \ | |
| --base_model_name /home/hector5/models/Llama-3-Base-8B-SFT/ \ | |
| --reward_names 'harmless,helpful,humor' \ | |
| --exp_type 'assistant' \ | |
| --preference ${preference} \ | |
| --wandb_name ${wandb_name} > ${ckpt_dir}/${wandb_name}/log.txt | |
| done | |