RiC / ppo /scripts /load.sh
hour1's picture
Upload folder using huggingface_hub
f5d1134 verified
preferences=(
"0.0,0.0,1.0"
)
# "0.1,0.8,0.1"
# "0.1,0.1,0.8"
# "0.8,0.1,0.1"
# "1.0,0.0,0.0"
# "0.0,0.0,1.0"
# "0.0,1.0,0.0"
gpu=1,2,3,4
ckpt_dir=./ckpt_morlhf
general_wandb_name='morlhf_llamma3_lora'
for preference in "${preferences[@]}"
do
preference_str=$(echo "$preference" | tr ',' '_')
wandb_name=${general_wandb_name}-${preference_str}-load
mkdir -p ${ckpt_dir}/${wandb_name}
CUDA_VISIBLE_DEVICES=${gpu} accelerate launch morlhf-llama3.py \
--mini_batch_size 4 \
--epochs 3 \
--init_kl_coef 0.8 \
--save_directory ${ckpt_dir} \
--base_model_name /home/hector5/models/Llama-3-Base-8B-SFT/ \
--reward_names 'harmless,helpful,humor' \
--exp_type 'assistant' \
--preference ${preference} \
--wandb_name ${wandb_name} > ${ckpt_dir}/${wandb_name}/log.txt
done