model: model_name: microsoft/deberta-v3-base #facebook/opt-125m #meta-llama/Llama-2-7b-hf #"openai-community/gpt2" #EleutherAI/pythia-160m #Qwen/Qwen2.5-0.5B # model_name: facebook/opt-125m # adapter_path: "./run_all/exnr15/ft2" # adapter_path: './run_all/run_exps9/ft2' # adapter_path: "./exp395/run_ex07/ft2" data_collator_mode: 'dynamic' rotation_adapter_config: r: 5 num_rotations: 1 # target_modules: ["q_proj", "v_proj", "v_proj", "o_proj", "gate_proj","up_proj","down_proj"] target_modules: ["query_proj", "value_proj", "key_proj", 'attention.output.dense', 'intermediate.dense', 'output.dense'] task_type: "SEQ_CLS" data: dataset_name: 'math' split_ratio: 0.00258 # path: "./data/gsm8k_test.jsonl" # path: ./data/MetaMathQA-40K/MetaMathQA-40K.json path: ./data/MetaMathQA/MetaMathQA-395K.json dataset_split: train[:100000] # dataset_field: [question, answer] dataset_field: [query, response] trainer_args: learning_rate: 2e-4 warmup_ratio: 0.01 # eval_strategy: steps per_device_train_batch_size: 32 per_device_eval_batch_size: 64 # accumulate_grad_batches: 1 # save_steps: 1000 gradient_checkpointing: False # (Turn off for faster training) output_dir: "./exps/run_exps" # save_path: "runs" # report_to: wandb logging_steps: 200 # eval_steps: 1000 #dataloader_num_workers: 4 num_train_epochs: 2.0 # max_steps: 21 # torch_compile: False # device: 'cuda'