| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 500, |
| "global_step": 156, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.064, |
| "grad_norm": 46.9622688293457, |
| "learning_rate": 6.249999999999999e-07, |
| "logits/chosen": -1.849869966506958, |
| "logits/rejected": -0.29363900423049927, |
| "logps/chosen": -214.13339233398438, |
| "logps/rejected": -737.3911743164062, |
| "loss": 0.7092, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": 0.004342720843851566, |
| "rewards/margins": 0.025443650782108307, |
| "rewards/rejected": -0.021100929006934166, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 13.521223068237305, |
| "learning_rate": 9.979871469976195e-07, |
| "logits/chosen": -1.8478429317474365, |
| "logits/rejected": -0.2751621603965759, |
| "logps/chosen": -240.21755981445312, |
| "logps/rejected": -844.638427734375, |
| "loss": 0.486, |
| "rewards/accuracies": 0.9468750357627869, |
| "rewards/chosen": 0.0402272529900074, |
| "rewards/margins": 0.6919995546340942, |
| "rewards/rejected": -0.6517722606658936, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.762617290019989, |
| "learning_rate": 9.755282581475767e-07, |
| "logits/chosen": -2.1506738662719727, |
| "logits/rejected": -0.8184519410133362, |
| "logps/chosen": -241.9251251220703, |
| "logps/rejected": -829.9989624023438, |
| "loss": 0.1228, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.33827781677246094, |
| "rewards/margins": 4.653146266937256, |
| "rewards/rejected": -4.991424083709717, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 1.0136756896972656, |
| "learning_rate": 9.29224396800933e-07, |
| "logits/chosen": -2.6077799797058105, |
| "logits/rejected": -1.6289113759994507, |
| "logps/chosen": -261.08819580078125, |
| "logps/rejected": -949.7811889648438, |
| "loss": 0.0522, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -2.5243332386016846, |
| "rewards/margins": 14.23307991027832, |
| "rewards/rejected": -16.75741195678711, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.15248946845531464, |
| "learning_rate": 8.613974319136957e-07, |
| "logits/chosen": -2.809011459350586, |
| "logits/rejected": -1.9814908504486084, |
| "logps/chosen": -262.7459411621094, |
| "logps/rejected": -1068.808349609375, |
| "loss": 0.0401, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -4.401086807250977, |
| "rewards/margins": 23.641544342041016, |
| "rewards/rejected": -28.04262924194336, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.12552589178085327, |
| "learning_rate": 7.754484907260512e-07, |
| "logits/chosen": -2.7779171466827393, |
| "logits/rejected": -2.0284547805786133, |
| "logps/chosen": -282.9354248046875, |
| "logps/rejected": -1090.5594482421875, |
| "loss": 0.036, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -5.416067123413086, |
| "rewards/margins": 25.74221420288086, |
| "rewards/rejected": -31.158283233642578, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.17482662200927734, |
| "learning_rate": 6.756874120406714e-07, |
| "logits/chosen": -2.7689507007598877, |
| "logits/rejected": -1.9573109149932861, |
| "logps/chosen": -263.86279296875, |
| "logps/rejected": -1074.0966796875, |
| "loss": 0.0455, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -4.39678955078125, |
| "rewards/margins": 24.96966552734375, |
| "rewards/rejected": -29.366456985473633, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.11299557238817215, |
| "learning_rate": 5.671166329088277e-07, |
| "logits/chosen": -2.643343448638916, |
| "logits/rejected": -1.8395075798034668, |
| "logps/chosen": -271.737060546875, |
| "logps/rejected": -1051.107421875, |
| "loss": 0.0341, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.8055081367492676, |
| "rewards/margins": 22.575603485107422, |
| "rewards/rejected": -26.381113052368164, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.12716512382030487, |
| "learning_rate": 4.5518034554828327e-07, |
| "logits/chosen": -2.627561092376709, |
| "logits/rejected": -1.8044594526290894, |
| "logps/chosen": -267.9153747558594, |
| "logps/rejected": -1012.906494140625, |
| "loss": 0.046, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -3.2899169921875, |
| "rewards/margins": 20.98080825805664, |
| "rewards/rejected": -24.270723342895508, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.14720195531845093, |
| "learning_rate": 3.454915028125263e-07, |
| "logits/chosen": -2.617551803588867, |
| "logits/rejected": -1.7380447387695312, |
| "logps/chosen": -254.84658813476562, |
| "logps/rejected": -996.9198608398438, |
| "loss": 0.0315, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.6352035999298096, |
| "rewards/margins": 19.858867645263672, |
| "rewards/rejected": -22.49407196044922, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.1784060150384903, |
| "learning_rate": 2.4355036129704696e-07, |
| "logits/chosen": -2.598905324935913, |
| "logits/rejected": -1.7216695547103882, |
| "logps/chosen": -248.74453735351562, |
| "logps/rejected": -993.6848754882812, |
| "loss": 0.036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.5882961750030518, |
| "rewards/margins": 18.992816925048828, |
| "rewards/rejected": -21.58111572265625, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.11419311910867691, |
| "learning_rate": 1.5446867550656767e-07, |
| "logits/chosen": -2.5767769813537598, |
| "logits/rejected": -1.681131362915039, |
| "logps/chosen": -256.5839538574219, |
| "logps/rejected": -994.3280029296875, |
| "loss": 0.0745, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.4366414546966553, |
| "rewards/margins": 18.98566436767578, |
| "rewards/rejected": -21.422306060791016, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.10517225414514542, |
| "learning_rate": 8.271337313934867e-08, |
| "logits/chosen": -2.5168538093566895, |
| "logits/rejected": -1.6293193101882935, |
| "logps/chosen": -269.16278076171875, |
| "logps/rejected": -979.6190795898438, |
| "loss": 0.0499, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -2.368694543838501, |
| "rewards/margins": 18.073989868164062, |
| "rewards/rejected": -20.442684173583984, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.1312648206949234, |
| "learning_rate": 3.188256468013139e-08, |
| "logits/chosen": -2.5698208808898926, |
| "logits/rejected": -1.6306824684143066, |
| "logps/chosen": -250.3894805908203, |
| "logps/rejected": -1000.3046875, |
| "loss": 0.0339, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -2.0379717350006104, |
| "rewards/margins": 18.880815505981445, |
| "rewards/rejected": -20.918787002563477, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.11260352283716202, |
| "learning_rate": 4.5251191160326495e-09, |
| "logits/chosen": -2.5329906940460205, |
| "logits/rejected": -1.5752902030944824, |
| "logps/chosen": -271.48443603515625, |
| "logps/rejected": -1053.84765625, |
| "loss": 0.0735, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.3061203956604004, |
| "rewards/margins": 19.025531768798828, |
| "rewards/rejected": -21.331653594970703, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9984, |
| "step": 156, |
| "total_flos": 1.1115841451898962e+18, |
| "train_loss": 0.1217762088546386, |
| "train_runtime": 6099.7709, |
| "train_samples_per_second": 0.82, |
| "train_steps_per_second": 0.026 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1115841451898962e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|