hour1
/

RiC

Model card Files Files and versions

RiC / ppo /scripts /ppo_single_vector.sh

hour1's picture

Upload folder using huggingface_hub

f5d1134 verified 3 months ago

history blame contribute delete

498 Bytes

	gpu=4,5

	model_path=/data/public/model/Meta-Llama-3-8B

	layers=12,13,14,15,16,17,18,19,20
	ckpt_dir=./ckpt
	wandb_name='helpful_llama_3-new'
	mkdir ${ckpt_dir}/${wandb_name} -p
	ulimit -v 104857600
	CUDA_VISIBLE_DEVICES=${gpu} accelerate launch ppo.py \
	--mini_batch_size 8 \
	--save_directory ${ckpt_dir} \
	--exp_type 'assistant' \
	--reward_name 'helpful' \
	--layers ${layers} \
	--base_model_name ${model_path} \
	--wandb_name ${wandb_name} > ${ckpt_dir}/${wandb_name}/log.txt