| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 183, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0547945205479452, |
| "grad_norm": 513.996826171875, |
| "learning_rate": 4.090909090909091e-07, |
| "logits/chosen": 2.990995407104492, |
| "logits/rejected": 3.0081257820129395, |
| "logps/chosen": -298.52886962890625, |
| "logps/rejected": -202.96295166015625, |
| "loss": 0.8843, |
| "rewards/accuracies": 0.526562511920929, |
| "rewards/chosen": 0.010423189960420132, |
| "rewards/margins": 0.05534166842699051, |
| "rewards/rejected": -0.0449184887111187, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1095890410958904, |
| "grad_norm": 461.9533996582031, |
| "learning_rate": 8.636363636363637e-07, |
| "logits/chosen": 2.934217929840088, |
| "logits/rejected": 2.919574737548828, |
| "logps/chosen": -276.70391845703125, |
| "logps/rejected": -200.52728271484375, |
| "loss": 0.6858, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": 0.7733574509620667, |
| "rewards/margins": 0.9247980117797852, |
| "rewards/rejected": -0.15144045650959015, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1643835616438356, |
| "grad_norm": 150.57086181640625, |
| "learning_rate": 1.3181818181818182e-06, |
| "logits/chosen": 3.0546913146972656, |
| "logits/rejected": 3.0510308742523193, |
| "logps/chosen": -289.57977294921875, |
| "logps/rejected": -218.24765014648438, |
| "loss": 0.2839, |
| "rewards/accuracies": 0.8890625238418579, |
| "rewards/chosen": 2.8807666301727295, |
| "rewards/margins": 4.351069450378418, |
| "rewards/rejected": -1.4703023433685303, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2191780821917808, |
| "grad_norm": 151.46690368652344, |
| "learning_rate": 1.7727272727272729e-06, |
| "logits/chosen": 3.088348150253296, |
| "logits/rejected": 3.1125292778015137, |
| "logps/chosen": -275.5274963378906, |
| "logps/rejected": -214.15737915039062, |
| "loss": 0.1502, |
| "rewards/accuracies": 0.9390624761581421, |
| "rewards/chosen": 5.216189384460449, |
| "rewards/margins": 8.827147483825684, |
| "rewards/rejected": -3.610957384109497, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.273972602739726, |
| "grad_norm": 130.53453063964844, |
| "learning_rate": 2.2272727272727274e-06, |
| "logits/chosen": 2.9992308616638184, |
| "logits/rejected": 3.075270175933838, |
| "logps/chosen": -259.50836181640625, |
| "logps/rejected": -203.08811950683594, |
| "loss": 0.1281, |
| "rewards/accuracies": 0.957812488079071, |
| "rewards/chosen": 6.97296667098999, |
| "rewards/margins": 12.8120698928833, |
| "rewards/rejected": -5.839103698730469, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3287671232876712, |
| "grad_norm": 110.53746032714844, |
| "learning_rate": 2.6818181818181822e-06, |
| "logits/chosen": 3.088064670562744, |
| "logits/rejected": 2.986386775970459, |
| "logps/chosen": -325.0431213378906, |
| "logps/rejected": -224.979736328125, |
| "loss": 0.1444, |
| "rewards/accuracies": 0.964062511920929, |
| "rewards/chosen": 10.117586135864258, |
| "rewards/margins": 18.425701141357422, |
| "rewards/rejected": -8.30811595916748, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3835616438356164, |
| "grad_norm": 95.7762222290039, |
| "learning_rate": 3.1363636363636367e-06, |
| "logits/chosen": 3.040494918823242, |
| "logits/rejected": 3.022307872772217, |
| "logps/chosen": -273.9972839355469, |
| "logps/rejected": -211.2712860107422, |
| "loss": 0.1443, |
| "rewards/accuracies": 0.9671875238418579, |
| "rewards/chosen": 9.627126693725586, |
| "rewards/margins": 20.590023040771484, |
| "rewards/rejected": -10.962896347045898, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4383561643835616, |
| "grad_norm": 179.24440002441406, |
| "learning_rate": 3.590909090909091e-06, |
| "logits/chosen": 3.0298266410827637, |
| "logits/rejected": 3.0730605125427246, |
| "logps/chosen": -280.2432861328125, |
| "logps/rejected": -214.884033203125, |
| "loss": 0.142, |
| "rewards/accuracies": 0.979687511920929, |
| "rewards/chosen": 10.719534873962402, |
| "rewards/margins": 23.82217788696289, |
| "rewards/rejected": -13.102640151977539, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4931506849315068, |
| "grad_norm": 2.0804860591888428, |
| "learning_rate": 4.045454545454546e-06, |
| "logits/chosen": 3.1072518825531006, |
| "logits/rejected": 3.067288875579834, |
| "logps/chosen": -294.1097106933594, |
| "logps/rejected": -219.38949584960938, |
| "loss": 0.1262, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": 12.069157600402832, |
| "rewards/margins": 29.454524993896484, |
| "rewards/rejected": -17.385366439819336, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.547945205479452, |
| "grad_norm": 134.90240478515625, |
| "learning_rate": 4.5e-06, |
| "logits/chosen": 2.9695353507995605, |
| "logits/rejected": 2.9900407791137695, |
| "logps/chosen": -270.2259826660156, |
| "logps/rejected": -210.32302856445312, |
| "loss": 0.1125, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": 8.642024040222168, |
| "rewards/margins": 26.856210708618164, |
| "rewards/rejected": -18.21418571472168, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6027397260273972, |
| "grad_norm": 237.83163452148438, |
| "learning_rate": 4.954545454545455e-06, |
| "logits/chosen": 3.001239538192749, |
| "logits/rejected": 2.9165444374084473, |
| "logps/chosen": -261.63848876953125, |
| "logps/rejected": -217.56314086914062, |
| "loss": 0.1337, |
| "rewards/accuracies": 0.9765625, |
| "rewards/chosen": 4.362582206726074, |
| "rewards/margins": 21.842912673950195, |
| "rewards/rejected": -17.480329513549805, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6575342465753424, |
| "grad_norm": 124.83686065673828, |
| "learning_rate": 4.998976350571773e-06, |
| "logits/chosen": 3.0631394386291504, |
| "logits/rejected": 3.0034124851226807, |
| "logps/chosen": -296.0355224609375, |
| "logps/rejected": -219.4881134033203, |
| "loss": 0.176, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 3.6324734687805176, |
| "rewards/margins": 20.854042053222656, |
| "rewards/rejected": -17.221569061279297, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7123287671232876, |
| "grad_norm": 110.8822250366211, |
| "learning_rate": 4.995438885558294e-06, |
| "logits/chosen": 3.0476179122924805, |
| "logits/rejected": 2.9690792560577393, |
| "logps/chosen": -292.52276611328125, |
| "logps/rejected": -210.3925018310547, |
| "loss": 0.2762, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": 5.373471736907959, |
| "rewards/margins": 26.076580047607422, |
| "rewards/rejected": -20.703105926513672, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7671232876712328, |
| "grad_norm": 72.18496704101562, |
| "learning_rate": 4.989378542821969e-06, |
| "logits/chosen": 3.0710926055908203, |
| "logits/rejected": 3.0577285289764404, |
| "logps/chosen": -284.55230712890625, |
| "logps/rejected": -230.9425506591797, |
| "loss": 0.2372, |
| "rewards/accuracies": 0.973437488079071, |
| "rewards/chosen": 5.454714775085449, |
| "rewards/margins": 30.868602752685547, |
| "rewards/rejected": -25.413890838623047, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.821917808219178, |
| "grad_norm": 104.61406707763672, |
| "learning_rate": 4.9808014493426124e-06, |
| "logits/chosen": 3.053307294845581, |
| "logits/rejected": 3.0027899742126465, |
| "logps/chosen": -282.54864501953125, |
| "logps/rejected": -236.41592407226562, |
| "loss": 0.6492, |
| "rewards/accuracies": 0.9546874761581421, |
| "rewards/chosen": 0.29179587960243225, |
| "rewards/margins": 30.52816390991211, |
| "rewards/rejected": -30.23636817932129, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8767123287671232, |
| "grad_norm": 114.0179443359375, |
| "learning_rate": 4.9697162765239595e-06, |
| "logits/chosen": 3.0813591480255127, |
| "logits/rejected": 3.093292713165283, |
| "logps/chosen": -265.5400390625, |
| "logps/rejected": -233.4171142578125, |
| "loss": 0.1703, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": 2.905339002609253, |
| "rewards/margins": 36.841583251953125, |
| "rewards/rejected": -33.936241149902344, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9315068493150684, |
| "grad_norm": 132.4547576904297, |
| "learning_rate": 4.9561342314269055e-06, |
| "logits/chosen": 3.124277114868164, |
| "logits/rejected": 3.048166036605835, |
| "logps/chosen": -262.30194091796875, |
| "logps/rejected": -232.39297485351562, |
| "loss": 0.4256, |
| "rewards/accuracies": 0.9515625238418579, |
| "rewards/chosen": -1.3073980808258057, |
| "rewards/margins": 30.139415740966797, |
| "rewards/rejected": -31.446813583374023, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9863013698630136, |
| "grad_norm": 1.0581492185592651, |
| "learning_rate": 4.940069045439226e-06, |
| "logits/chosen": 3.164407968521118, |
| "logits/rejected": 3.164742946624756, |
| "logps/chosen": -274.58673095703125, |
| "logps/rejected": -232.9495849609375, |
| "loss": 0.3806, |
| "rewards/accuracies": 0.9703124761581421, |
| "rewards/chosen": 0.4807693064212799, |
| "rewards/margins": 33.424041748046875, |
| "rewards/rejected": -32.943275451660156, |
| "step": 180 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1098, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|