RiC / ppo /scripts /ppo_single_vector.sh
hour1's picture
Upload folder using huggingface_hub
f5d1134 verified
gpu=4,5
model_path=/data/public/model/Meta-Llama-3-8B
layers=12,13,14,15,16,17,18,19,20
ckpt_dir=./ckpt
wandb_name='helpful_llama_3-new'
mkdir ${ckpt_dir}/${wandb_name} -p
ulimit -v 104857600
CUDA_VISIBLE_DEVICES=${gpu} accelerate launch ppo.py \
--mini_batch_size 8 \
--save_directory ${ckpt_dir} \
--exp_type 'assistant' \
--reward_name 'helpful' \
--layers ${layers} \
--base_model_name ${model_path} \
--wandb_name ${wandb_name} > ${ckpt_dir}/${wandb_name}/log.txt