hour1
/

RiC

Model card Files Files and versions

RiC / ppo /scripts /load.sh

hour1's picture

Upload folder using huggingface_hub

f5d1134 verified 3 months ago

history blame contribute delete

857 Bytes

	preferences=(
	"0.0,0.0,1.0"
	)
	# "0.1,0.8,0.1"
	# "0.1,0.1,0.8"
	# "0.8,0.1,0.1"
	# "1.0,0.0,0.0"
	# "0.0,0.0,1.0"
	# "0.0,1.0,0.0"

	gpu=1,2,3,4

	ckpt_dir=./ckpt_morlhf
	general_wandb_name='morlhf_llamma3_lora'

	for preference in "${preferences[@]}"
	do
	preference_str=$(echo "$preference" \| tr ',' '_')
	wandb_name=${general_wandb_name}-${preference_str}-load
	mkdir -p ${ckpt_dir}/${wandb_name}
	CUDA_VISIBLE_DEVICES=${gpu} accelerate launch morlhf-llama3.py \
	--mini_batch_size 4 \
	--epochs 3 \
	--init_kl_coef 0.8 \
	--save_directory ${ckpt_dir} \
	--base_model_name /home/hector5/models/Llama-3-Base-8B-SFT/ \
	--reward_names 'harmless,helpful,humor' \
	--exp_type 'assistant' \
	--preference ${preference} \
	--wandb_name ${wandb_name} > ${ckpt_dir}/${wandb_name}/log.txt
	done