preferences=( "0.0,0.0,1.0" ) # "0.1,0.8,0.1" # "0.1,0.1,0.8" # "0.8,0.1,0.1" # "1.0,0.0,0.0" # "0.0,0.0,1.0" # "0.0,1.0,0.0" gpu=1,2,3,4 ckpt_dir=./ckpt_morlhf general_wandb_name='morlhf_llamma3_lora' for preference in "${preferences[@]}" do preference_str=$(echo "$preference" | tr ',' '_') wandb_name=${general_wandb_name}-${preference_str}-load mkdir -p ${ckpt_dir}/${wandb_name} CUDA_VISIBLE_DEVICES=${gpu} accelerate launch morlhf-llama3.py \ --mini_batch_size 4 \ --epochs 3 \ --init_kl_coef 0.8 \ --save_directory ${ckpt_dir} \ --base_model_name /home/hector5/models/Llama-3-Base-8B-SFT/ \ --reward_names 'harmless,helpful,humor' \ --exp_type 'assistant' \ --preference ${preference} \ --wandb_name ${wandb_name} > ${ckpt_dir}/${wandb_name}/log.txt done