| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9989708404802744, |
| "eval_steps": 500, |
| "global_step": 728, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.090909090909091e-07, |
| "loss": 1.4022, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 1.4239, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 1.3843, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 1.3722, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 1.3411, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 1.3187, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 6.363636363636364e-06, |
| "loss": 1.284, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.272727272727273e-06, |
| "loss": 1.2492, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 8.181818181818183e-06, |
| "loss": 1.2658, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 1.2173, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1e-05, |
| "loss": 1.2302, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.0909090909090909e-05, |
| "loss": 1.2301, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.181818181818182e-05, |
| "loss": 1.1855, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.2727272727272728e-05, |
| "loss": 1.2094, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3636363636363637e-05, |
| "loss": 1.1788, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.4545454545454546e-05, |
| "loss": 1.1804, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.5454545454545454e-05, |
| "loss": 1.166, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.6363636363636366e-05, |
| "loss": 1.1256, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.7272727272727274e-05, |
| "loss": 1.1289, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 1.1392, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9090909090909094e-05, |
| "loss": 1.131, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2e-05, |
| "loss": 1.1288, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9999900994429424e-05, |
| "loss": 1.1198, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.999960397967811e-05, |
| "loss": 1.1281, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9999108961627284e-05, |
| "loss": 1.134, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9998415950078858e-05, |
| "loss": 1.1148, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9997524958755226e-05, |
| "loss": 1.1162, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9996436005299013e-05, |
| "loss": 1.12, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.999514911127271e-05, |
| "loss": 1.12, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9993664302158255e-05, |
| "loss": 1.0938, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9991981607356517e-05, |
| "loss": 1.0838, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9990101060186732e-05, |
| "loss": 1.1078, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.998802269788583e-05, |
| "loss": 1.1037, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9985746561607696e-05, |
| "loss": 1.0804, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.998327269642237e-05, |
| "loss": 1.0977, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.998060115131513e-05, |
| "loss": 1.1036, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9977731979185556e-05, |
| "loss": 1.1109, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9974665236846443e-05, |
| "loss": 1.0937, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9971400985022712e-05, |
| "loss": 1.0834, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9967939288350184e-05, |
| "loss": 1.1002, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9964280215374312e-05, |
| "loss": 1.0847, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9960423838548814e-05, |
| "loss": 1.0845, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.995637023423425e-05, |
| "loss": 1.0984, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.9952119482696504e-05, |
| "loss": 1.0836, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9947671668105185e-05, |
| "loss": 1.082, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9943026878531985e-05, |
| "loss": 1.0707, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9938185205948906e-05, |
| "loss": 1.0545, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.993314674622646e-05, |
| "loss": 1.0618, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.992791159913177e-05, |
| "loss": 1.0514, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.992247986832658e-05, |
| "loss": 1.0733, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.99168516613652e-05, |
| "loss": 1.0712, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.991102708969241e-05, |
| "loss": 1.0788, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.9905006268641212e-05, |
| "loss": 1.0744, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.9898789317430577e-05, |
| "loss": 1.0621, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9892376359163058e-05, |
| "loss": 1.0598, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9885767520822377e-05, |
| "loss": 1.095, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9878962933270896e-05, |
| "loss": 1.0666, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.987196273124703e-05, |
| "loss": 1.0657, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.986476705336258e-05, |
| "loss": 1.0691, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.9857376042099982e-05, |
| "loss": 1.0663, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.9849789843809496e-05, |
| "loss": 1.0476, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9842008608706295e-05, |
| "loss": 1.0509, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.983403249086751e-05, |
| "loss": 1.0622, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9825861648229154e-05, |
| "loss": 1.0708, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.981749624258302e-05, |
| "loss": 1.0672, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.9808936439573455e-05, |
| "loss": 1.0627, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.9800182408694096e-05, |
| "loss": 1.0726, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.9791234323284515e-05, |
| "loss": 1.0558, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.9782092360526763e-05, |
| "loss": 1.0677, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.977275670144189e-05, |
| "loss": 1.0422, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.9763227530886348e-05, |
| "loss": 1.0364, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.9753505037548334e-05, |
| "loss": 1.0475, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.974358941394404e-05, |
| "loss": 1.0508, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.973348085641387e-05, |
| "loss": 1.0595, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.972317956511852e-05, |
| "loss": 1.0528, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.971268574403503e-05, |
| "loss": 1.0562, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.970199960095276e-05, |
| "loss": 1.0329, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.9691121347469235e-05, |
| "loss": 1.045, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9680051198986004e-05, |
| "loss": 1.0561, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9668789374704337e-05, |
| "loss": 1.0449, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9657336097620904e-05, |
| "loss": 1.0359, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.964569159452335e-05, |
| "loss": 1.0359, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.963385609598581e-05, |
| "loss": 1.0271, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9621829836364335e-05, |
| "loss": 1.0563, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9609613053792276e-05, |
| "loss": 1.0416, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9597205990175528e-05, |
| "loss": 1.0578, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.958460889118778e-05, |
| "loss": 1.0461, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.9571822006265623e-05, |
| "loss": 1.0262, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9558845588603625e-05, |
| "loss": 1.0254, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9545679895149315e-05, |
| "loss": 1.0642, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9532325186598093e-05, |
| "loss": 1.0456, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.951878172738806e-05, |
| "loss": 1.0358, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.9505049785694803e-05, |
| "loss": 1.0409, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.9491129633426068e-05, |
| "loss": 1.0382, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.9477021546216376e-05, |
| "loss": 1.0415, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9462725803421566e-05, |
| "loss": 1.0308, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9448242688113286e-05, |
| "loss": 1.0376, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9433572487073343e-05, |
| "loss": 1.0259, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.9418715490788066e-05, |
| "loss": 1.0496, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.9403671993442534e-05, |
| "loss": 1.0519, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9388442292914754e-05, |
| "loss": 1.0418, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.937302669076976e-05, |
| "loss": 1.0372, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9357425492253662e-05, |
| "loss": 1.0347, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.934163900628756e-05, |
| "loss": 1.0253, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.9325667545461466e-05, |
| "loss": 1.0401, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.9309511426028105e-05, |
| "loss": 1.0282, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.9293170967896632e-05, |
| "loss": 1.0306, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.9276646494626333e-05, |
| "loss": 1.0313, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.9259938333420183e-05, |
| "loss": 1.0433, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.9243046815118387e-05, |
| "loss": 1.0232, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.922597227419183e-05, |
| "loss": 1.0222, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.9208715048735446e-05, |
| "loss": 1.0186, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.9191275480461525e-05, |
| "loss": 1.033, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.9173653914692947e-05, |
| "loss": 1.0342, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.9155850700356345e-05, |
| "loss": 1.035, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.91378661899752e-05, |
| "loss": 1.0206, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.9119700739662857e-05, |
| "loss": 1.0435, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.910135470911547e-05, |
| "loss": 1.0181, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.908282846160488e-05, |
| "loss": 1.0267, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.9064122363971426e-05, |
| "loss": 1.0365, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.904523678661669e-05, |
| "loss": 1.0381, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.9026172103496138e-05, |
| "loss": 1.0048, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.900692869211174e-05, |
| "loss": 1.0392, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.898750693350447e-05, |
| "loss": 1.0278, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.8967907212246803e-05, |
| "loss": 1.013, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.8948129916435048e-05, |
| "loss": 1.0385, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.8928175437681698e-05, |
| "loss": 1.0168, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.8908044171107658e-05, |
| "loss": 1.0123, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.8887736515334443e-05, |
| "loss": 1.015, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.8867252872476255e-05, |
| "loss": 1.0265, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.884659364813205e-05, |
| "loss": 0.9997, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.8825759251377484e-05, |
| "loss": 1.0109, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.8804750094756827e-05, |
| "loss": 1.0199, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.8783566594274783e-05, |
| "loss": 0.9998, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.8762209169388262e-05, |
| "loss": 1.0088, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.8740678242998077e-05, |
| "loss": 1.0022, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.8718974241440552e-05, |
| "loss": 1.0216, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.8697097594479103e-05, |
| "loss": 1.0248, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.867504873529571e-05, |
| "loss": 0.9974, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.865282810048235e-05, |
| "loss": 1.0138, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.8630436130032353e-05, |
| "loss": 1.0004, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.860787326733168e-05, |
| "loss": 1.0081, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.8585139959150144e-05, |
| "loss": 1.0238, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.856223665563258e-05, |
| "loss": 1.0328, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.8539163810289914e-05, |
| "loss": 1.0071, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.8515921879990187e-05, |
| "loss": 1.0134, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.8492511324949516e-05, |
| "loss": 1.0181, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.8468932608722975e-05, |
| "loss": 1.0363, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.8445186198195406e-05, |
| "loss": 1.0011, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.8421272563572202e-05, |
| "loss": 0.9993, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.8397192178369965e-05, |
| "loss": 1.0201, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.837294551940716e-05, |
| "loss": 0.987, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.834853306679464e-05, |
| "loss": 1.0106, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8323955303926165e-05, |
| "loss": 1.0034, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.8299212717468825e-05, |
| "loss": 1.0095, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.8274305797353397e-05, |
| "loss": 0.9921, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.824923503676465e-05, |
| "loss": 0.9859, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.822400093213157e-05, |
| "loss": 1.017, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.8198603983117546e-05, |
| "loss": 1.0118, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.8173044692610466e-05, |
| "loss": 0.9912, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.8147323566712755e-05, |
| "loss": 1.0162, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.8121441114731366e-05, |
| "loss": 1.0089, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.809539784916768e-05, |
| "loss": 0.9752, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.806919428570737e-05, |
| "loss": 1.007, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.804283094321019e-05, |
| "loss": 1.0145, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.8016308343699686e-05, |
| "loss": 1.0008, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.798962701235289e-05, |
| "loss": 1.0067, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.796278747748988e-05, |
| "loss": 1.0017, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.7935790270563345e-05, |
| "loss": 1.0086, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.790863592614807e-05, |
| "loss": 0.9884, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.788132498193032e-05, |
| "loss": 1.0028, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.7853857978697223e-05, |
| "loss": 1.0055, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.7826235460326043e-05, |
| "loss": 1.005, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.7798457973773418e-05, |
| "loss": 1.002, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.7770526069064525e-05, |
| "loss": 0.9838, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.7742440299282203e-05, |
| "loss": 1.001, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7714201220555982e-05, |
| "loss": 0.9984, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7685809392051084e-05, |
| "loss": 1.0035, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.765726537595734e-05, |
| "loss": 1.0076, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.7628569737478076e-05, |
| "loss": 0.9936, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7599723044818898e-05, |
| "loss": 1.0053, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7570725869176468e-05, |
| "loss": 0.9968, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7541578784727163e-05, |
| "loss": 1.0059, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.751228236861573e-05, |
| "loss": 1.0059, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.7482837200943845e-05, |
| "loss": 1.0081, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.7453243864758638e-05, |
| "loss": 1.0215, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.7423502946041133e-05, |
| "loss": 0.9935, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.739361503369466e-05, |
| "loss": 0.9945, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7363580719533173e-05, |
| "loss": 0.9926, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.733340059826956e-05, |
| "loss": 0.9946, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7303075267503845e-05, |
| "loss": 1.0079, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7272605327711364e-05, |
| "loss": 1.0212, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7241991382230872e-05, |
| "loss": 0.993, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.72112340372526e-05, |
| "loss": 0.9843, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.718033390180624e-05, |
| "loss": 0.9837, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.71492915877489e-05, |
| "loss": 0.959, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7118107709752986e-05, |
| "loss": 0.9895, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7086782885294026e-05, |
| "loss": 0.99, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7055317734638444e-05, |
| "loss": 1.006, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.702371288083127e-05, |
| "loss": 1.0009, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.6991968949683835e-05, |
| "loss": 0.9758, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.6960086569761332e-05, |
| "loss": 0.9801, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.6928066372370407e-05, |
| "loss": 0.9833, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.689590899154664e-05, |
| "loss": 0.9846, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.6863615064042003e-05, |
| "loss": 0.9752, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.6831185229312237e-05, |
| "loss": 0.9869, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.67986201295042e-05, |
| "loss": 0.9869, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.676592040944315e-05, |
| "loss": 0.9878, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.6733086716619976e-05, |
| "loss": 0.9938, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.6700119701178378e-05, |
| "loss": 1.0045, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.666702001590199e-05, |
| "loss": 1.0088, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.6633788316201455e-05, |
| "loss": 0.998, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.6600425260101453e-05, |
| "loss": 1.0017, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.6566931508227663e-05, |
| "loss": 0.9995, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.6533307723793688e-05, |
| "loss": 1.0012, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.649955457258792e-05, |
| "loss": 0.9807, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.6465672722960365e-05, |
| "loss": 0.9664, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.6431662845809388e-05, |
| "loss": 0.9707, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.6397525614568446e-05, |
| "loss": 0.983, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.6363261705192757e-05, |
| "loss": 1.0061, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.6328871796145894e-05, |
| "loss": 0.9899, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.629435656838637e-05, |
| "loss": 0.9795, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.6259716705354154e-05, |
| "loss": 1.0002, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.6224952892957122e-05, |
| "loss": 0.9837, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.6190065819557496e-05, |
| "loss": 0.9872, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.615505617595819e-05, |
| "loss": 0.9797, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.6119924655389158e-05, |
| "loss": 0.9926, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6084671953493645e-05, |
| "loss": 0.9884, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6049298768314425e-05, |
| "loss": 0.9918, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.6013805800279977e-05, |
| "loss": 0.9829, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5978193752190607e-05, |
| "loss": 0.9854, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.5942463329204546e-05, |
| "loss": 0.9751, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.5906615238823974e-05, |
| "loss": 0.9945, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.5870650190881023e-05, |
| "loss": 0.9957, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.583456889752371e-05, |
| "loss": 1.0047, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.579837207320184e-05, |
| "loss": 0.9921, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5762060434652863e-05, |
| "loss": 0.9839, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.572563470088768e-05, |
| "loss": 0.9922, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.56890955931764e-05, |
| "loss": 0.9752, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.565244383503407e-05, |
| "loss": 0.9778, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.5615680152206324e-05, |
| "loss": 0.9795, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.557880527265505e-05, |
| "loss": 0.9774, |
| "step": 726 |
| } |
| ], |
| "logging_steps": 3, |
| "max_steps": 2184, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500.0, |
| "total_flos": 4.694048596218085e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|