| gpu=6,7 | |
| model_path=/data/public/model/Meta-Llama-3-8B | |
| layers=12,13,14,15,16,17,18,19,20 | |
| ckpt_dir=./ckpt | |
| wandb_name='test' | |
| mkdir ${ckpt_dir}/${wandb_name} -p | |
| ulimit -v 104857600 | |
| CUDA_VISIBLE_DEVICES=${gpu} accelerate launch ppo_reft.py \ | |
| --mini_batch_size 8 \ | |
| --save_directory ${ckpt_dir} \ | |
| --exp_type 'assistant' \ | |
| --reward_name 'helpful' \ | |
| --layers ${layers} \ | |
| --base_model_name ${model_path} \ | |
| --wandb_name ${wandb_name} > ${ckpt_dir}/${wandb_name}/log.txt |