| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1985.4716981132076, |
| "eval_steps": 500, |
| "global_step": 52000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 30.673773492701116, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 9.2984, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.509433962264151, |
| "grad_norm": 14.373295694588162, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 7.8844, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.2641509433962264, |
| "grad_norm": 19.800138134391982, |
| "learning_rate": 7.99999818716091e-05, |
| "loss": 6.0394, |
| "step": 60 |
| }, |
| { |
| "epoch": 3.018867924528302, |
| "grad_norm": 2.662186456025038, |
| "learning_rate": 7.9999836844587e-05, |
| "loss": 4.373, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.7735849056603774, |
| "grad_norm": 2.9944135324779984, |
| "learning_rate": 7.999954679110365e-05, |
| "loss": 3.93, |
| "step": 100 |
| }, |
| { |
| "epoch": 4.528301886792453, |
| "grad_norm": 6.144462097933044, |
| "learning_rate": 7.999911171228081e-05, |
| "loss": 3.8796, |
| "step": 120 |
| }, |
| { |
| "epoch": 5.283018867924528, |
| "grad_norm": 1.885170861907592, |
| "learning_rate": 7.999853160980113e-05, |
| "loss": 3.7437, |
| "step": 140 |
| }, |
| { |
| "epoch": 6.037735849056604, |
| "grad_norm": 1.794246585785847, |
| "learning_rate": 7.999780648590806e-05, |
| "loss": 3.6892, |
| "step": 160 |
| }, |
| { |
| "epoch": 6.7924528301886795, |
| "grad_norm": 12.766182271547313, |
| "learning_rate": 7.999693634340594e-05, |
| "loss": 3.6124, |
| "step": 180 |
| }, |
| { |
| "epoch": 7.547169811320755, |
| "grad_norm": 3.014805105995273, |
| "learning_rate": 7.999592118565996e-05, |
| "loss": 3.4224, |
| "step": 200 |
| }, |
| { |
| "epoch": 8.30188679245283, |
| "grad_norm": 1.6809726247876995, |
| "learning_rate": 7.999476101659613e-05, |
| "loss": 3.2811, |
| "step": 220 |
| }, |
| { |
| "epoch": 9.056603773584905, |
| "grad_norm": 1.7074366581291722, |
| "learning_rate": 7.999345584070125e-05, |
| "loss": 3.2012, |
| "step": 240 |
| }, |
| { |
| "epoch": 9.81132075471698, |
| "grad_norm": 1.6971425854714368, |
| "learning_rate": 7.999200566302298e-05, |
| "loss": 3.1281, |
| "step": 260 |
| }, |
| { |
| "epoch": 10.566037735849056, |
| "grad_norm": 2.2456748845663155, |
| "learning_rate": 7.999041048916969e-05, |
| "loss": 3.1133, |
| "step": 280 |
| }, |
| { |
| "epoch": 11.320754716981131, |
| "grad_norm": 2.301731796932263, |
| "learning_rate": 7.998867032531055e-05, |
| "loss": 3.0835, |
| "step": 300 |
| }, |
| { |
| "epoch": 12.075471698113208, |
| "grad_norm": 1.9076791695070954, |
| "learning_rate": 7.998678517817546e-05, |
| "loss": 3.0189, |
| "step": 320 |
| }, |
| { |
| "epoch": 12.830188679245284, |
| "grad_norm": 1.8142406149549974, |
| "learning_rate": 7.9984755055055e-05, |
| "loss": 3.0019, |
| "step": 340 |
| }, |
| { |
| "epoch": 13.584905660377359, |
| "grad_norm": 1.747017241682793, |
| "learning_rate": 7.998257996380048e-05, |
| "loss": 2.9866, |
| "step": 360 |
| }, |
| { |
| "epoch": 14.339622641509434, |
| "grad_norm": 1.430663205424301, |
| "learning_rate": 7.998025991282377e-05, |
| "loss": 3.0026, |
| "step": 380 |
| }, |
| { |
| "epoch": 15.09433962264151, |
| "grad_norm": 2.0885624099422566, |
| "learning_rate": 7.997779491109745e-05, |
| "loss": 2.946, |
| "step": 400 |
| }, |
| { |
| "epoch": 15.849056603773585, |
| "grad_norm": 1.858513295415329, |
| "learning_rate": 7.997518496815465e-05, |
| "loss": 2.9293, |
| "step": 420 |
| }, |
| { |
| "epoch": 16.60377358490566, |
| "grad_norm": 4.071990410305379, |
| "learning_rate": 7.9972430094089e-05, |
| "loss": 2.8812, |
| "step": 440 |
| }, |
| { |
| "epoch": 17.358490566037737, |
| "grad_norm": 1.7862953717408494, |
| "learning_rate": 7.996953029955468e-05, |
| "loss": 2.9285, |
| "step": 460 |
| }, |
| { |
| "epoch": 18.11320754716981, |
| "grad_norm": 1.6791350922351642, |
| "learning_rate": 7.996648559576633e-05, |
| "loss": 2.9062, |
| "step": 480 |
| }, |
| { |
| "epoch": 18.867924528301888, |
| "grad_norm": 2.14320095701687, |
| "learning_rate": 7.996329599449902e-05, |
| "loss": 2.8974, |
| "step": 500 |
| }, |
| { |
| "epoch": 19.62264150943396, |
| "grad_norm": 1.645724273362961, |
| "learning_rate": 7.995996150808815e-05, |
| "loss": 2.842, |
| "step": 520 |
| }, |
| { |
| "epoch": 20.37735849056604, |
| "grad_norm": 1.6833734047290758, |
| "learning_rate": 7.99564821494295e-05, |
| "loss": 2.8603, |
| "step": 540 |
| }, |
| { |
| "epoch": 21.132075471698112, |
| "grad_norm": 1.680341673033382, |
| "learning_rate": 7.995285793197909e-05, |
| "loss": 2.8275, |
| "step": 560 |
| }, |
| { |
| "epoch": 21.88679245283019, |
| "grad_norm": 1.5669198141799856, |
| "learning_rate": 7.994908886975317e-05, |
| "loss": 2.8495, |
| "step": 580 |
| }, |
| { |
| "epoch": 22.641509433962263, |
| "grad_norm": 1.572130529899407, |
| "learning_rate": 7.99451749773282e-05, |
| "loss": 2.7967, |
| "step": 600 |
| }, |
| { |
| "epoch": 23.39622641509434, |
| "grad_norm": 2.0314221097665413, |
| "learning_rate": 7.994111626984069e-05, |
| "loss": 2.8023, |
| "step": 620 |
| }, |
| { |
| "epoch": 24.150943396226417, |
| "grad_norm": 2.524880591484311, |
| "learning_rate": 7.993691276298728e-05, |
| "loss": 2.7928, |
| "step": 640 |
| }, |
| { |
| "epoch": 24.90566037735849, |
| "grad_norm": 3.1215250835282884, |
| "learning_rate": 7.993256447302454e-05, |
| "loss": 2.7998, |
| "step": 660 |
| }, |
| { |
| "epoch": 25.660377358490567, |
| "grad_norm": 1.2555800634223058, |
| "learning_rate": 7.9928071416769e-05, |
| "loss": 2.7862, |
| "step": 680 |
| }, |
| { |
| "epoch": 26.41509433962264, |
| "grad_norm": 1.5918196349327507, |
| "learning_rate": 7.992343361159705e-05, |
| "loss": 2.7825, |
| "step": 700 |
| }, |
| { |
| "epoch": 27.169811320754718, |
| "grad_norm": 1.4446288187541776, |
| "learning_rate": 7.991865107544492e-05, |
| "loss": 2.7566, |
| "step": 720 |
| }, |
| { |
| "epoch": 27.92452830188679, |
| "grad_norm": 1.5498155400201465, |
| "learning_rate": 7.991372382680851e-05, |
| "loss": 2.7341, |
| "step": 740 |
| }, |
| { |
| "epoch": 28.67924528301887, |
| "grad_norm": 1.9707678993278974, |
| "learning_rate": 7.99086518847434e-05, |
| "loss": 2.7315, |
| "step": 760 |
| }, |
| { |
| "epoch": 29.433962264150942, |
| "grad_norm": 1.663827192167774, |
| "learning_rate": 7.99034352688648e-05, |
| "loss": 2.733, |
| "step": 780 |
| }, |
| { |
| "epoch": 30.18867924528302, |
| "grad_norm": 1.9883280378959602, |
| "learning_rate": 7.989807399934738e-05, |
| "loss": 2.7323, |
| "step": 800 |
| }, |
| { |
| "epoch": 30.943396226415093, |
| "grad_norm": 1.535721464256961, |
| "learning_rate": 7.989256809692524e-05, |
| "loss": 2.7081, |
| "step": 820 |
| }, |
| { |
| "epoch": 31.69811320754717, |
| "grad_norm": 1.4966431407465013, |
| "learning_rate": 7.988691758289184e-05, |
| "loss": 2.694, |
| "step": 840 |
| }, |
| { |
| "epoch": 32.45283018867924, |
| "grad_norm": 1.9684497666022975, |
| "learning_rate": 7.988112247909996e-05, |
| "loss": 2.6947, |
| "step": 860 |
| }, |
| { |
| "epoch": 33.20754716981132, |
| "grad_norm": 1.2512316865338262, |
| "learning_rate": 7.987518280796148e-05, |
| "loss": 2.7216, |
| "step": 880 |
| }, |
| { |
| "epoch": 33.9622641509434, |
| "grad_norm": 1.2566119692549285, |
| "learning_rate": 7.986909859244743e-05, |
| "loss": 2.6675, |
| "step": 900 |
| }, |
| { |
| "epoch": 34.716981132075475, |
| "grad_norm": 1.8282134080395354, |
| "learning_rate": 7.986286985608782e-05, |
| "loss": 2.6712, |
| "step": 920 |
| }, |
| { |
| "epoch": 35.471698113207545, |
| "grad_norm": 1.3075929724419728, |
| "learning_rate": 7.985649662297164e-05, |
| "loss": 2.668, |
| "step": 940 |
| }, |
| { |
| "epoch": 36.22641509433962, |
| "grad_norm": 1.4411328906456615, |
| "learning_rate": 7.984997891774664e-05, |
| "loss": 2.6937, |
| "step": 960 |
| }, |
| { |
| "epoch": 36.9811320754717, |
| "grad_norm": 1.8430700250981429, |
| "learning_rate": 7.984331676561932e-05, |
| "loss": 2.6798, |
| "step": 980 |
| }, |
| { |
| "epoch": 37.735849056603776, |
| "grad_norm": 1.4511454692578831, |
| "learning_rate": 7.983651019235483e-05, |
| "loss": 2.6637, |
| "step": 1000 |
| }, |
| { |
| "epoch": 38.490566037735846, |
| "grad_norm": 1.502704294446435, |
| "learning_rate": 7.982955922427681e-05, |
| "loss": 2.6688, |
| "step": 1020 |
| }, |
| { |
| "epoch": 39.24528301886792, |
| "grad_norm": 1.4028762887194124, |
| "learning_rate": 7.982246388826741e-05, |
| "loss": 2.6086, |
| "step": 1040 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 1.539721758237447, |
| "learning_rate": 7.981522421176697e-05, |
| "loss": 2.6084, |
| "step": 1060 |
| }, |
| { |
| "epoch": 40.75471698113208, |
| "grad_norm": 1.77576635302677, |
| "learning_rate": 7.980784022277421e-05, |
| "loss": 2.6216, |
| "step": 1080 |
| }, |
| { |
| "epoch": 41.509433962264154, |
| "grad_norm": 1.4874495954369062, |
| "learning_rate": 7.980031194984588e-05, |
| "loss": 2.6328, |
| "step": 1100 |
| }, |
| { |
| "epoch": 42.264150943396224, |
| "grad_norm": 1.8846242927156294, |
| "learning_rate": 7.979263942209669e-05, |
| "loss": 2.6427, |
| "step": 1120 |
| }, |
| { |
| "epoch": 43.0188679245283, |
| "grad_norm": 1.6981877996408483, |
| "learning_rate": 7.978482266919936e-05, |
| "loss": 2.6224, |
| "step": 1140 |
| }, |
| { |
| "epoch": 43.77358490566038, |
| "grad_norm": 1.3746555989630926, |
| "learning_rate": 7.977686172138426e-05, |
| "loss": 2.6011, |
| "step": 1160 |
| }, |
| { |
| "epoch": 44.528301886792455, |
| "grad_norm": 1.377657678804025, |
| "learning_rate": 7.97687566094395e-05, |
| "loss": 2.6086, |
| "step": 1180 |
| }, |
| { |
| "epoch": 45.283018867924525, |
| "grad_norm": 1.2094636718352942, |
| "learning_rate": 7.976050736471069e-05, |
| "loss": 2.582, |
| "step": 1200 |
| }, |
| { |
| "epoch": 46.0377358490566, |
| "grad_norm": 1.4433837187551148, |
| "learning_rate": 7.975211401910087e-05, |
| "loss": 2.6294, |
| "step": 1220 |
| }, |
| { |
| "epoch": 46.79245283018868, |
| "grad_norm": 1.5026382784404573, |
| "learning_rate": 7.97435766050704e-05, |
| "loss": 2.5993, |
| "step": 1240 |
| }, |
| { |
| "epoch": 47.54716981132076, |
| "grad_norm": 1.2094136471599368, |
| "learning_rate": 7.973489515563676e-05, |
| "loss": 2.6164, |
| "step": 1260 |
| }, |
| { |
| "epoch": 48.301886792452834, |
| "grad_norm": 1.394688364908413, |
| "learning_rate": 7.972606970437446e-05, |
| "loss": 2.6056, |
| "step": 1280 |
| }, |
| { |
| "epoch": 49.056603773584904, |
| "grad_norm": 1.271568801692499, |
| "learning_rate": 7.971710028541502e-05, |
| "loss": 2.5755, |
| "step": 1300 |
| }, |
| { |
| "epoch": 49.81132075471698, |
| "grad_norm": 1.4259670316825253, |
| "learning_rate": 7.970798693344663e-05, |
| "loss": 2.5759, |
| "step": 1320 |
| }, |
| { |
| "epoch": 50.56603773584906, |
| "grad_norm": 1.3071538390073274, |
| "learning_rate": 7.969872968371418e-05, |
| "loss": 2.6031, |
| "step": 1340 |
| }, |
| { |
| "epoch": 51.320754716981135, |
| "grad_norm": 1.2595773412735998, |
| "learning_rate": 7.968932857201907e-05, |
| "loss": 2.5711, |
| "step": 1360 |
| }, |
| { |
| "epoch": 52.075471698113205, |
| "grad_norm": 2.0429570729259714, |
| "learning_rate": 7.967978363471901e-05, |
| "loss": 2.5662, |
| "step": 1380 |
| }, |
| { |
| "epoch": 52.83018867924528, |
| "grad_norm": 1.6700659590709221, |
| "learning_rate": 7.967009490872805e-05, |
| "loss": 2.5618, |
| "step": 1400 |
| }, |
| { |
| "epoch": 53.58490566037736, |
| "grad_norm": 1.33856858087749, |
| "learning_rate": 7.966026243151624e-05, |
| "loss": 2.5351, |
| "step": 1420 |
| }, |
| { |
| "epoch": 54.339622641509436, |
| "grad_norm": 1.514257636366029, |
| "learning_rate": 7.965028624110956e-05, |
| "loss": 2.5686, |
| "step": 1440 |
| }, |
| { |
| "epoch": 55.094339622641506, |
| "grad_norm": 1.5655072381428023, |
| "learning_rate": 7.964016637608987e-05, |
| "loss": 2.5329, |
| "step": 1460 |
| }, |
| { |
| "epoch": 55.84905660377358, |
| "grad_norm": 1.3215439672221574, |
| "learning_rate": 7.96299028755946e-05, |
| "loss": 2.5701, |
| "step": 1480 |
| }, |
| { |
| "epoch": 56.60377358490566, |
| "grad_norm": 1.4294366995579832, |
| "learning_rate": 7.961949577931671e-05, |
| "loss": 2.5143, |
| "step": 1500 |
| }, |
| { |
| "epoch": 57.35849056603774, |
| "grad_norm": 1.1676370114885968, |
| "learning_rate": 7.960894512750449e-05, |
| "loss": 2.5653, |
| "step": 1520 |
| }, |
| { |
| "epoch": 58.113207547169814, |
| "grad_norm": 1.2026735621707902, |
| "learning_rate": 7.95982509609614e-05, |
| "loss": 2.5161, |
| "step": 1540 |
| }, |
| { |
| "epoch": 58.867924528301884, |
| "grad_norm": 1.2479764772455937, |
| "learning_rate": 7.958741332104596e-05, |
| "loss": 2.508, |
| "step": 1560 |
| }, |
| { |
| "epoch": 59.62264150943396, |
| "grad_norm": 1.1961953679380617, |
| "learning_rate": 7.957643224967155e-05, |
| "loss": 2.5009, |
| "step": 1580 |
| }, |
| { |
| "epoch": 60.37735849056604, |
| "grad_norm": 1.2497242021619674, |
| "learning_rate": 7.956530778930622e-05, |
| "loss": 2.5059, |
| "step": 1600 |
| }, |
| { |
| "epoch": 61.132075471698116, |
| "grad_norm": 1.3171737588939698, |
| "learning_rate": 7.955403998297261e-05, |
| "loss": 2.4988, |
| "step": 1620 |
| }, |
| { |
| "epoch": 61.886792452830186, |
| "grad_norm": 1.4834839050766762, |
| "learning_rate": 7.95426288742477e-05, |
| "loss": 2.4981, |
| "step": 1640 |
| }, |
| { |
| "epoch": 62.64150943396226, |
| "grad_norm": 1.5715059944491987, |
| "learning_rate": 7.953107450726267e-05, |
| "loss": 2.5151, |
| "step": 1660 |
| }, |
| { |
| "epoch": 63.39622641509434, |
| "grad_norm": 1.3272413313721245, |
| "learning_rate": 7.95193769267028e-05, |
| "loss": 2.4963, |
| "step": 1680 |
| }, |
| { |
| "epoch": 64.15094339622641, |
| "grad_norm": 1.0349543461606097, |
| "learning_rate": 7.950753617780715e-05, |
| "loss": 2.4829, |
| "step": 1700 |
| }, |
| { |
| "epoch": 64.90566037735849, |
| "grad_norm": 1.9240255477140202, |
| "learning_rate": 7.949555230636851e-05, |
| "loss": 2.4943, |
| "step": 1720 |
| }, |
| { |
| "epoch": 65.66037735849056, |
| "grad_norm": 1.3252737004710828, |
| "learning_rate": 7.948342535873318e-05, |
| "loss": 2.4642, |
| "step": 1740 |
| }, |
| { |
| "epoch": 66.41509433962264, |
| "grad_norm": 1.4539890356994254, |
| "learning_rate": 7.947115538180077e-05, |
| "loss": 2.4609, |
| "step": 1760 |
| }, |
| { |
| "epoch": 67.16981132075472, |
| "grad_norm": 1.4560503030341407, |
| "learning_rate": 7.945874242302408e-05, |
| "loss": 2.5209, |
| "step": 1780 |
| }, |
| { |
| "epoch": 67.9245283018868, |
| "grad_norm": 1.344513424004851, |
| "learning_rate": 7.944618653040883e-05, |
| "loss": 2.4993, |
| "step": 1800 |
| }, |
| { |
| "epoch": 68.67924528301887, |
| "grad_norm": 1.8832922124286062, |
| "learning_rate": 7.943348775251356e-05, |
| "loss": 2.4646, |
| "step": 1820 |
| }, |
| { |
| "epoch": 69.43396226415095, |
| "grad_norm": 1.2399208576659413, |
| "learning_rate": 7.942064613844938e-05, |
| "loss": 2.4849, |
| "step": 1840 |
| }, |
| { |
| "epoch": 70.18867924528301, |
| "grad_norm": 1.3243651575141913, |
| "learning_rate": 7.940766173787979e-05, |
| "loss": 2.4599, |
| "step": 1860 |
| }, |
| { |
| "epoch": 70.94339622641509, |
| "grad_norm": 1.556289335581103, |
| "learning_rate": 7.939453460102055e-05, |
| "loss": 2.4888, |
| "step": 1880 |
| }, |
| { |
| "epoch": 71.69811320754717, |
| "grad_norm": 1.2063108835158236, |
| "learning_rate": 7.93812647786394e-05, |
| "loss": 2.4403, |
| "step": 1900 |
| }, |
| { |
| "epoch": 72.45283018867924, |
| "grad_norm": 1.381601728211994, |
| "learning_rate": 7.936785232205587e-05, |
| "loss": 2.4616, |
| "step": 1920 |
| }, |
| { |
| "epoch": 73.20754716981132, |
| "grad_norm": 1.1962254019464496, |
| "learning_rate": 7.935429728314119e-05, |
| "loss": 2.4594, |
| "step": 1940 |
| }, |
| { |
| "epoch": 73.9622641509434, |
| "grad_norm": 1.5037943758052086, |
| "learning_rate": 7.934059971431796e-05, |
| "loss": 2.4767, |
| "step": 1960 |
| }, |
| { |
| "epoch": 74.71698113207547, |
| "grad_norm": 1.3520028552568202, |
| "learning_rate": 7.932675966856001e-05, |
| "loss": 2.4627, |
| "step": 1980 |
| }, |
| { |
| "epoch": 75.47169811320755, |
| "grad_norm": 1.1447719169505226, |
| "learning_rate": 7.931277719939217e-05, |
| "loss": 2.4434, |
| "step": 2000 |
| }, |
| { |
| "epoch": 76.22641509433963, |
| "grad_norm": 1.264362441844072, |
| "learning_rate": 7.92986523608901e-05, |
| "loss": 2.4532, |
| "step": 2020 |
| }, |
| { |
| "epoch": 76.98113207547169, |
| "grad_norm": 1.3287557477036405, |
| "learning_rate": 7.928438520768005e-05, |
| "loss": 2.4227, |
| "step": 2040 |
| }, |
| { |
| "epoch": 77.73584905660377, |
| "grad_norm": 1.3638216226843092, |
| "learning_rate": 7.926997579493864e-05, |
| "loss": 2.4124, |
| "step": 2060 |
| }, |
| { |
| "epoch": 78.49056603773585, |
| "grad_norm": 1.8271009676844974, |
| "learning_rate": 7.925542417839267e-05, |
| "loss": 2.4564, |
| "step": 2080 |
| }, |
| { |
| "epoch": 79.24528301886792, |
| "grad_norm": 3.54259081233157, |
| "learning_rate": 7.924073041431895e-05, |
| "loss": 2.4369, |
| "step": 2100 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 1.4017324354742142, |
| "learning_rate": 7.922589455954394e-05, |
| "loss": 2.4464, |
| "step": 2120 |
| }, |
| { |
| "epoch": 80.75471698113208, |
| "grad_norm": 1.2911982587049995, |
| "learning_rate": 7.921091667144366e-05, |
| "loss": 2.4513, |
| "step": 2140 |
| }, |
| { |
| "epoch": 81.50943396226415, |
| "grad_norm": 1.6022339076405718, |
| "learning_rate": 7.919579680794347e-05, |
| "loss": 2.4203, |
| "step": 2160 |
| }, |
| { |
| "epoch": 82.26415094339623, |
| "grad_norm": 1.192345448314673, |
| "learning_rate": 7.918053502751772e-05, |
| "loss": 2.4254, |
| "step": 2180 |
| }, |
| { |
| "epoch": 83.01886792452831, |
| "grad_norm": 1.6383611170040047, |
| "learning_rate": 7.916513138918968e-05, |
| "loss": 2.4271, |
| "step": 2200 |
| }, |
| { |
| "epoch": 83.77358490566037, |
| "grad_norm": 1.2342470221196802, |
| "learning_rate": 7.91495859525312e-05, |
| "loss": 2.4079, |
| "step": 2220 |
| }, |
| { |
| "epoch": 84.52830188679245, |
| "grad_norm": 1.0846993450602334, |
| "learning_rate": 7.913389877766257e-05, |
| "loss": 2.4383, |
| "step": 2240 |
| }, |
| { |
| "epoch": 85.28301886792453, |
| "grad_norm": 1.6823249556637492, |
| "learning_rate": 7.911806992525215e-05, |
| "loss": 2.4146, |
| "step": 2260 |
| }, |
| { |
| "epoch": 86.0377358490566, |
| "grad_norm": 1.1641636008270617, |
| "learning_rate": 7.91020994565163e-05, |
| "loss": 2.4208, |
| "step": 2280 |
| }, |
| { |
| "epoch": 86.79245283018868, |
| "grad_norm": 1.2267450186018727, |
| "learning_rate": 7.9085987433219e-05, |
| "loss": 2.4123, |
| "step": 2300 |
| }, |
| { |
| "epoch": 87.54716981132076, |
| "grad_norm": 1.3570826999644423, |
| "learning_rate": 7.906973391767178e-05, |
| "loss": 2.3968, |
| "step": 2320 |
| }, |
| { |
| "epoch": 88.30188679245283, |
| "grad_norm": 1.4751948166402733, |
| "learning_rate": 7.905333897273327e-05, |
| "loss": 2.4266, |
| "step": 2340 |
| }, |
| { |
| "epoch": 89.05660377358491, |
| "grad_norm": 1.6442713319159463, |
| "learning_rate": 7.903680266180908e-05, |
| "loss": 2.4226, |
| "step": 2360 |
| }, |
| { |
| "epoch": 89.81132075471699, |
| "grad_norm": 1.3132724406404779, |
| "learning_rate": 7.90201250488516e-05, |
| "loss": 2.419, |
| "step": 2380 |
| }, |
| { |
| "epoch": 90.56603773584905, |
| "grad_norm": 1.4073019579145547, |
| "learning_rate": 7.900330619835963e-05, |
| "loss": 2.3689, |
| "step": 2400 |
| }, |
| { |
| "epoch": 91.32075471698113, |
| "grad_norm": 1.2366514839120522, |
| "learning_rate": 7.89863461753782e-05, |
| "loss": 2.4054, |
| "step": 2420 |
| }, |
| { |
| "epoch": 92.0754716981132, |
| "grad_norm": 1.2825349652701765, |
| "learning_rate": 7.896924504549836e-05, |
| "loss": 2.4019, |
| "step": 2440 |
| }, |
| { |
| "epoch": 92.83018867924528, |
| "grad_norm": 1.836162542809911, |
| "learning_rate": 7.895200287485676e-05, |
| "loss": 2.4177, |
| "step": 2460 |
| }, |
| { |
| "epoch": 93.58490566037736, |
| "grad_norm": 1.1862449779023223, |
| "learning_rate": 7.893461973013567e-05, |
| "loss": 2.417, |
| "step": 2480 |
| }, |
| { |
| "epoch": 94.33962264150944, |
| "grad_norm": 1.4267902121087415, |
| "learning_rate": 7.891709567856242e-05, |
| "loss": 2.3877, |
| "step": 2500 |
| }, |
| { |
| "epoch": 95.09433962264151, |
| "grad_norm": 1.2628527153576017, |
| "learning_rate": 7.889943078790934e-05, |
| "loss": 2.3893, |
| "step": 2520 |
| }, |
| { |
| "epoch": 95.84905660377359, |
| "grad_norm": 1.2789710243072507, |
| "learning_rate": 7.888162512649344e-05, |
| "loss": 2.3747, |
| "step": 2540 |
| }, |
| { |
| "epoch": 96.60377358490567, |
| "grad_norm": 1.2286761119774143, |
| "learning_rate": 7.886367876317615e-05, |
| "loss": 2.3835, |
| "step": 2560 |
| }, |
| { |
| "epoch": 97.35849056603773, |
| "grad_norm": 1.1142509789518844, |
| "learning_rate": 7.884559176736305e-05, |
| "loss": 2.3751, |
| "step": 2580 |
| }, |
| { |
| "epoch": 98.11320754716981, |
| "grad_norm": 1.4479112681435136, |
| "learning_rate": 7.882736420900357e-05, |
| "loss": 2.3885, |
| "step": 2600 |
| }, |
| { |
| "epoch": 98.86792452830188, |
| "grad_norm": 1.363147415477506, |
| "learning_rate": 7.880899615859078e-05, |
| "loss": 2.3738, |
| "step": 2620 |
| }, |
| { |
| "epoch": 99.62264150943396, |
| "grad_norm": 1.1387365076919822, |
| "learning_rate": 7.879048768716105e-05, |
| "loss": 2.3476, |
| "step": 2640 |
| }, |
| { |
| "epoch": 100.37735849056604, |
| "grad_norm": 1.1944352338174065, |
| "learning_rate": 7.87718388662939e-05, |
| "loss": 2.3729, |
| "step": 2660 |
| }, |
| { |
| "epoch": 101.13207547169812, |
| "grad_norm": 1.1017143695500988, |
| "learning_rate": 7.875304976811153e-05, |
| "loss": 2.3846, |
| "step": 2680 |
| }, |
| { |
| "epoch": 101.88679245283019, |
| "grad_norm": 1.250014546065029, |
| "learning_rate": 7.873412046527873e-05, |
| "loss": 2.3928, |
| "step": 2700 |
| }, |
| { |
| "epoch": 102.64150943396227, |
| "grad_norm": 1.4448571670529484, |
| "learning_rate": 7.871505103100243e-05, |
| "loss": 2.3464, |
| "step": 2720 |
| }, |
| { |
| "epoch": 103.39622641509433, |
| "grad_norm": 1.1242909760207218, |
| "learning_rate": 7.869584153903159e-05, |
| "loss": 2.3739, |
| "step": 2740 |
| }, |
| { |
| "epoch": 104.15094339622641, |
| "grad_norm": 2.2842982833142176, |
| "learning_rate": 7.86764920636568e-05, |
| "loss": 2.348, |
| "step": 2760 |
| }, |
| { |
| "epoch": 104.90566037735849, |
| "grad_norm": 1.377894286349549, |
| "learning_rate": 7.865700267970997e-05, |
| "loss": 2.3888, |
| "step": 2780 |
| }, |
| { |
| "epoch": 105.66037735849056, |
| "grad_norm": 1.889252338819464, |
| "learning_rate": 7.863737346256416e-05, |
| "loss": 2.339, |
| "step": 2800 |
| }, |
| { |
| "epoch": 106.41509433962264, |
| "grad_norm": 1.2007024366101338, |
| "learning_rate": 7.861760448813318e-05, |
| "loss": 2.3518, |
| "step": 2820 |
| }, |
| { |
| "epoch": 107.16981132075472, |
| "grad_norm": 1.3150471864332571, |
| "learning_rate": 7.859769583287136e-05, |
| "loss": 2.3755, |
| "step": 2840 |
| }, |
| { |
| "epoch": 107.9245283018868, |
| "grad_norm": 1.3488307619297817, |
| "learning_rate": 7.857764757377321e-05, |
| "loss": 2.3613, |
| "step": 2860 |
| }, |
| { |
| "epoch": 108.67924528301887, |
| "grad_norm": 1.1271224750447038, |
| "learning_rate": 7.855745978837316e-05, |
| "loss": 2.3434, |
| "step": 2880 |
| }, |
| { |
| "epoch": 109.43396226415095, |
| "grad_norm": 1.2792788627087681, |
| "learning_rate": 7.85371325547452e-05, |
| "loss": 2.3475, |
| "step": 2900 |
| }, |
| { |
| "epoch": 110.18867924528301, |
| "grad_norm": 1.1278269502097389, |
| "learning_rate": 7.851666595150267e-05, |
| "loss": 2.3561, |
| "step": 2920 |
| }, |
| { |
| "epoch": 110.94339622641509, |
| "grad_norm": 1.2221588824212564, |
| "learning_rate": 7.849606005779789e-05, |
| "loss": 2.345, |
| "step": 2940 |
| }, |
| { |
| "epoch": 111.69811320754717, |
| "grad_norm": 1.2272636691471697, |
| "learning_rate": 7.84753149533219e-05, |
| "loss": 2.3491, |
| "step": 2960 |
| }, |
| { |
| "epoch": 112.45283018867924, |
| "grad_norm": 1.4379769660358386, |
| "learning_rate": 7.845443071830403e-05, |
| "loss": 2.3703, |
| "step": 2980 |
| }, |
| { |
| "epoch": 113.20754716981132, |
| "grad_norm": 1.1938598523408401, |
| "learning_rate": 7.843340743351179e-05, |
| "loss": 2.3514, |
| "step": 3000 |
| }, |
| { |
| "epoch": 113.9622641509434, |
| "grad_norm": 1.1633264713108291, |
| "learning_rate": 7.841224518025038e-05, |
| "loss": 2.3396, |
| "step": 3020 |
| }, |
| { |
| "epoch": 114.71698113207547, |
| "grad_norm": 1.1889386134705129, |
| "learning_rate": 7.839094404036246e-05, |
| "loss": 2.3654, |
| "step": 3040 |
| }, |
| { |
| "epoch": 115.47169811320755, |
| "grad_norm": 1.2210304404269434, |
| "learning_rate": 7.836950409622788e-05, |
| "loss": 2.3827, |
| "step": 3060 |
| }, |
| { |
| "epoch": 116.22641509433963, |
| "grad_norm": 1.2063342612399106, |
| "learning_rate": 7.834792543076318e-05, |
| "loss": 2.3316, |
| "step": 3080 |
| }, |
| { |
| "epoch": 116.98113207547169, |
| "grad_norm": 1.1263568091149723, |
| "learning_rate": 7.832620812742149e-05, |
| "loss": 2.3483, |
| "step": 3100 |
| }, |
| { |
| "epoch": 117.73584905660377, |
| "grad_norm": 1.1259514670897872, |
| "learning_rate": 7.830435227019208e-05, |
| "loss": 2.3125, |
| "step": 3120 |
| }, |
| { |
| "epoch": 118.49056603773585, |
| "grad_norm": 1.4031978763279247, |
| "learning_rate": 7.828235794360003e-05, |
| "loss": 2.3509, |
| "step": 3140 |
| }, |
| { |
| "epoch": 119.24528301886792, |
| "grad_norm": 1.1004874238643756, |
| "learning_rate": 7.826022523270598e-05, |
| "loss": 2.2975, |
| "step": 3160 |
| }, |
| { |
| "epoch": 120.0, |
| "grad_norm": 1.1440875702771847, |
| "learning_rate": 7.823795422310573e-05, |
| "loss": 2.3048, |
| "step": 3180 |
| }, |
| { |
| "epoch": 120.75471698113208, |
| "grad_norm": 1.254578833443374, |
| "learning_rate": 7.821554500092995e-05, |
| "loss": 2.3253, |
| "step": 3200 |
| }, |
| { |
| "epoch": 121.50943396226415, |
| "grad_norm": 1.3020705320626609, |
| "learning_rate": 7.819299765284377e-05, |
| "loss": 2.32, |
| "step": 3220 |
| }, |
| { |
| "epoch": 122.26415094339623, |
| "grad_norm": 1.144219025307704, |
| "learning_rate": 7.817031226604663e-05, |
| "loss": 2.3338, |
| "step": 3240 |
| }, |
| { |
| "epoch": 123.01886792452831, |
| "grad_norm": 1.4392091454771268, |
| "learning_rate": 7.814748892827171e-05, |
| "loss": 2.3081, |
| "step": 3260 |
| }, |
| { |
| "epoch": 123.77358490566037, |
| "grad_norm": 1.57104334995189, |
| "learning_rate": 7.812452772778576e-05, |
| "loss": 2.3044, |
| "step": 3280 |
| }, |
| { |
| "epoch": 124.52830188679245, |
| "grad_norm": 1.2140782445458616, |
| "learning_rate": 7.810142875338864e-05, |
| "loss": 2.3162, |
| "step": 3300 |
| }, |
| { |
| "epoch": 125.28301886792453, |
| "grad_norm": 1.1430293000699974, |
| "learning_rate": 7.807819209441311e-05, |
| "loss": 2.3349, |
| "step": 3320 |
| }, |
| { |
| "epoch": 126.0377358490566, |
| "grad_norm": 1.3717461598648188, |
| "learning_rate": 7.805481784072435e-05, |
| "loss": 2.3048, |
| "step": 3340 |
| }, |
| { |
| "epoch": 126.79245283018868, |
| "grad_norm": 1.413324243222593, |
| "learning_rate": 7.803130608271972e-05, |
| "loss": 2.2987, |
| "step": 3360 |
| }, |
| { |
| "epoch": 127.54716981132076, |
| "grad_norm": 1.4726228615781376, |
| "learning_rate": 7.80076569113283e-05, |
| "loss": 2.3164, |
| "step": 3380 |
| }, |
| { |
| "epoch": 128.30188679245282, |
| "grad_norm": 1.4760960932985028, |
| "learning_rate": 7.798387041801066e-05, |
| "loss": 2.3314, |
| "step": 3400 |
| }, |
| { |
| "epoch": 129.0566037735849, |
| "grad_norm": 1.429175780411594, |
| "learning_rate": 7.795994669475842e-05, |
| "loss": 2.2752, |
| "step": 3420 |
| }, |
| { |
| "epoch": 129.81132075471697, |
| "grad_norm": 1.1413240245586067, |
| "learning_rate": 7.793588583409394e-05, |
| "loss": 2.333, |
| "step": 3440 |
| }, |
| { |
| "epoch": 130.56603773584905, |
| "grad_norm": 1.298767089765165, |
| "learning_rate": 7.791168792906992e-05, |
| "loss": 2.3227, |
| "step": 3460 |
| }, |
| { |
| "epoch": 131.32075471698113, |
| "grad_norm": 1.2359046339523858, |
| "learning_rate": 7.788735307326908e-05, |
| "loss": 2.3108, |
| "step": 3480 |
| }, |
| { |
| "epoch": 132.0754716981132, |
| "grad_norm": 1.1866095738297588, |
| "learning_rate": 7.786288136080376e-05, |
| "loss": 2.274, |
| "step": 3500 |
| }, |
| { |
| "epoch": 132.83018867924528, |
| "grad_norm": 1.1104279322428132, |
| "learning_rate": 7.78382728863156e-05, |
| "loss": 2.2888, |
| "step": 3520 |
| }, |
| { |
| "epoch": 133.58490566037736, |
| "grad_norm": 2.2680957450657537, |
| "learning_rate": 7.781352774497518e-05, |
| "loss": 2.2938, |
| "step": 3540 |
| }, |
| { |
| "epoch": 134.33962264150944, |
| "grad_norm": 1.6397138146409036, |
| "learning_rate": 7.778864603248155e-05, |
| "loss": 2.3068, |
| "step": 3560 |
| }, |
| { |
| "epoch": 135.0943396226415, |
| "grad_norm": 1.3552905083817133, |
| "learning_rate": 7.7763627845062e-05, |
| "loss": 2.3155, |
| "step": 3580 |
| }, |
| { |
| "epoch": 135.8490566037736, |
| "grad_norm": 1.1820209128101842, |
| "learning_rate": 7.773847327947157e-05, |
| "loss": 2.2937, |
| "step": 3600 |
| }, |
| { |
| "epoch": 136.60377358490567, |
| "grad_norm": 1.2309654111909685, |
| "learning_rate": 7.771318243299278e-05, |
| "loss": 2.2887, |
| "step": 3620 |
| }, |
| { |
| "epoch": 137.35849056603774, |
| "grad_norm": 1.3795089340342572, |
| "learning_rate": 7.768775540343515e-05, |
| "loss": 2.2961, |
| "step": 3640 |
| }, |
| { |
| "epoch": 138.11320754716982, |
| "grad_norm": 1.3662606951792517, |
| "learning_rate": 7.766219228913492e-05, |
| "loss": 2.288, |
| "step": 3660 |
| }, |
| { |
| "epoch": 138.8679245283019, |
| "grad_norm": 1.1081358756463113, |
| "learning_rate": 7.763649318895459e-05, |
| "loss": 2.3193, |
| "step": 3680 |
| }, |
| { |
| "epoch": 139.62264150943398, |
| "grad_norm": 1.3054965758516237, |
| "learning_rate": 7.761065820228258e-05, |
| "loss": 2.2904, |
| "step": 3700 |
| }, |
| { |
| "epoch": 140.37735849056602, |
| "grad_norm": 1.4052953203319152, |
| "learning_rate": 7.758468742903284e-05, |
| "loss": 2.2803, |
| "step": 3720 |
| }, |
| { |
| "epoch": 141.1320754716981, |
| "grad_norm": 1.310015833541638, |
| "learning_rate": 7.755858096964445e-05, |
| "loss": 2.2891, |
| "step": 3740 |
| }, |
| { |
| "epoch": 141.88679245283018, |
| "grad_norm": 1.0645192580358254, |
| "learning_rate": 7.753233892508125e-05, |
| "loss": 2.2982, |
| "step": 3760 |
| }, |
| { |
| "epoch": 142.64150943396226, |
| "grad_norm": 1.0898474528650213, |
| "learning_rate": 7.750596139683145e-05, |
| "loss": 2.2711, |
| "step": 3780 |
| }, |
| { |
| "epoch": 143.39622641509433, |
| "grad_norm": 1.2074165473918712, |
| "learning_rate": 7.747944848690719e-05, |
| "loss": 2.2592, |
| "step": 3800 |
| }, |
| { |
| "epoch": 144.1509433962264, |
| "grad_norm": 1.0959283058664937, |
| "learning_rate": 7.745280029784423e-05, |
| "loss": 2.2813, |
| "step": 3820 |
| }, |
| { |
| "epoch": 144.9056603773585, |
| "grad_norm": 1.2139556681199035, |
| "learning_rate": 7.742601693270148e-05, |
| "loss": 2.2564, |
| "step": 3840 |
| }, |
| { |
| "epoch": 145.66037735849056, |
| "grad_norm": 1.2374163658098694, |
| "learning_rate": 7.739909849506064e-05, |
| "loss": 2.2972, |
| "step": 3860 |
| }, |
| { |
| "epoch": 146.41509433962264, |
| "grad_norm": 1.212582172960113, |
| "learning_rate": 7.737204508902578e-05, |
| "loss": 2.2683, |
| "step": 3880 |
| }, |
| { |
| "epoch": 147.16981132075472, |
| "grad_norm": 1.064638273683967, |
| "learning_rate": 7.734485681922295e-05, |
| "loss": 2.2643, |
| "step": 3900 |
| }, |
| { |
| "epoch": 147.9245283018868, |
| "grad_norm": 1.5185500205423423, |
| "learning_rate": 7.731753379079976e-05, |
| "loss": 2.2825, |
| "step": 3920 |
| }, |
| { |
| "epoch": 148.67924528301887, |
| "grad_norm": 1.2571175031602655, |
| "learning_rate": 7.7290076109425e-05, |
| "loss": 2.2838, |
| "step": 3940 |
| }, |
| { |
| "epoch": 149.43396226415095, |
| "grad_norm": 1.1185096216789012, |
| "learning_rate": 7.726248388128821e-05, |
| "loss": 2.2713, |
| "step": 3960 |
| }, |
| { |
| "epoch": 150.18867924528303, |
| "grad_norm": 1.283741452573828, |
| "learning_rate": 7.723475721309926e-05, |
| "loss": 2.2578, |
| "step": 3980 |
| }, |
| { |
| "epoch": 150.9433962264151, |
| "grad_norm": 1.1735101055664479, |
| "learning_rate": 7.720689621208799e-05, |
| "loss": 2.2584, |
| "step": 4000 |
| }, |
| { |
| "epoch": 151.69811320754718, |
| "grad_norm": 1.1931741706657397, |
| "learning_rate": 7.717890098600371e-05, |
| "loss": 2.2439, |
| "step": 4020 |
| }, |
| { |
| "epoch": 152.45283018867926, |
| "grad_norm": 1.0510206287412838, |
| "learning_rate": 7.715077164311486e-05, |
| "loss": 2.2646, |
| "step": 4040 |
| }, |
| { |
| "epoch": 153.20754716981133, |
| "grad_norm": 1.2236996476850626, |
| "learning_rate": 7.712250829220856e-05, |
| "loss": 2.2518, |
| "step": 4060 |
| }, |
| { |
| "epoch": 153.96226415094338, |
| "grad_norm": 1.4295022161938338, |
| "learning_rate": 7.70941110425902e-05, |
| "loss": 2.2445, |
| "step": 4080 |
| }, |
| { |
| "epoch": 154.71698113207546, |
| "grad_norm": 1.2608108045607223, |
| "learning_rate": 7.706558000408294e-05, |
| "loss": 2.2504, |
| "step": 4100 |
| }, |
| { |
| "epoch": 155.47169811320754, |
| "grad_norm": 1.4378816608236173, |
| "learning_rate": 7.703691528702747e-05, |
| "loss": 2.2433, |
| "step": 4120 |
| }, |
| { |
| "epoch": 156.22641509433961, |
| "grad_norm": 1.3122607821127985, |
| "learning_rate": 7.700811700228138e-05, |
| "loss": 2.2593, |
| "step": 4140 |
| }, |
| { |
| "epoch": 156.9811320754717, |
| "grad_norm": 1.1677763203213758, |
| "learning_rate": 7.697918526121882e-05, |
| "loss": 2.2521, |
| "step": 4160 |
| }, |
| { |
| "epoch": 157.73584905660377, |
| "grad_norm": 1.1304212534843256, |
| "learning_rate": 7.695012017573013e-05, |
| "loss": 2.2743, |
| "step": 4180 |
| }, |
| { |
| "epoch": 158.49056603773585, |
| "grad_norm": 1.2157344056650818, |
| "learning_rate": 7.692092185822129e-05, |
| "loss": 2.2405, |
| "step": 4200 |
| }, |
| { |
| "epoch": 159.24528301886792, |
| "grad_norm": 1.2521062422528308, |
| "learning_rate": 7.689159042161356e-05, |
| "loss": 2.258, |
| "step": 4220 |
| }, |
| { |
| "epoch": 160.0, |
| "grad_norm": 1.417021221810849, |
| "learning_rate": 7.686212597934299e-05, |
| "loss": 2.2187, |
| "step": 4240 |
| }, |
| { |
| "epoch": 160.75471698113208, |
| "grad_norm": 1.0987738687082824, |
| "learning_rate": 7.68325286453601e-05, |
| "loss": 2.2155, |
| "step": 4260 |
| }, |
| { |
| "epoch": 161.50943396226415, |
| "grad_norm": 1.4771801969035276, |
| "learning_rate": 7.680279853412924e-05, |
| "loss": 2.27, |
| "step": 4280 |
| }, |
| { |
| "epoch": 162.26415094339623, |
| "grad_norm": 1.1956274528883593, |
| "learning_rate": 7.677293576062836e-05, |
| "loss": 2.2717, |
| "step": 4300 |
| }, |
| { |
| "epoch": 163.0188679245283, |
| "grad_norm": 1.1219859338242828, |
| "learning_rate": 7.674294044034839e-05, |
| "loss": 2.2487, |
| "step": 4320 |
| }, |
| { |
| "epoch": 163.77358490566039, |
| "grad_norm": 1.255744824066408, |
| "learning_rate": 7.671281268929293e-05, |
| "loss": 2.2366, |
| "step": 4340 |
| }, |
| { |
| "epoch": 164.52830188679246, |
| "grad_norm": 1.112451658029252, |
| "learning_rate": 7.668255262397772e-05, |
| "loss": 2.2377, |
| "step": 4360 |
| }, |
| { |
| "epoch": 165.28301886792454, |
| "grad_norm": 1.1131032086265853, |
| "learning_rate": 7.66521603614302e-05, |
| "loss": 2.2483, |
| "step": 4380 |
| }, |
| { |
| "epoch": 166.03773584905662, |
| "grad_norm": 1.2568117014241036, |
| "learning_rate": 7.662163601918907e-05, |
| "loss": 2.2637, |
| "step": 4400 |
| }, |
| { |
| "epoch": 166.79245283018867, |
| "grad_norm": 1.0763275712599132, |
| "learning_rate": 7.659097971530385e-05, |
| "loss": 2.2275, |
| "step": 4420 |
| }, |
| { |
| "epoch": 167.54716981132074, |
| "grad_norm": 1.0880356132513982, |
| "learning_rate": 7.656019156833438e-05, |
| "loss": 2.227, |
| "step": 4440 |
| }, |
| { |
| "epoch": 168.30188679245282, |
| "grad_norm": 1.0805504953865772, |
| "learning_rate": 7.652927169735042e-05, |
| "loss": 2.2205, |
| "step": 4460 |
| }, |
| { |
| "epoch": 169.0566037735849, |
| "grad_norm": 1.0979536600508317, |
| "learning_rate": 7.649822022193114e-05, |
| "loss": 2.2008, |
| "step": 4480 |
| }, |
| { |
| "epoch": 169.81132075471697, |
| "grad_norm": 1.0424485855679975, |
| "learning_rate": 7.646703726216467e-05, |
| "loss": 2.235, |
| "step": 4500 |
| }, |
| { |
| "epoch": 170.56603773584905, |
| "grad_norm": 1.1541609361962377, |
| "learning_rate": 7.643572293864766e-05, |
| "loss": 2.2297, |
| "step": 4520 |
| }, |
| { |
| "epoch": 171.32075471698113, |
| "grad_norm": 1.1630212513509717, |
| "learning_rate": 7.640427737248479e-05, |
| "loss": 2.2295, |
| "step": 4540 |
| }, |
| { |
| "epoch": 172.0754716981132, |
| "grad_norm": 1.5088805287099432, |
| "learning_rate": 7.637270068528828e-05, |
| "loss": 2.2445, |
| "step": 4560 |
| }, |
| { |
| "epoch": 172.83018867924528, |
| "grad_norm": 1.66773080303759, |
| "learning_rate": 7.634099299917748e-05, |
| "loss": 2.2336, |
| "step": 4580 |
| }, |
| { |
| "epoch": 173.58490566037736, |
| "grad_norm": 1.4239223646642891, |
| "learning_rate": 7.630915443677834e-05, |
| "loss": 2.2128, |
| "step": 4600 |
| }, |
| { |
| "epoch": 174.33962264150944, |
| "grad_norm": 1.2623270496447048, |
| "learning_rate": 7.627718512122297e-05, |
| "loss": 2.2253, |
| "step": 4620 |
| }, |
| { |
| "epoch": 175.0943396226415, |
| "grad_norm": 1.2406324767245749, |
| "learning_rate": 7.624508517614919e-05, |
| "loss": 2.2131, |
| "step": 4640 |
| }, |
| { |
| "epoch": 175.8490566037736, |
| "grad_norm": 1.3130455463591448, |
| "learning_rate": 7.621285472569993e-05, |
| "loss": 2.1944, |
| "step": 4660 |
| }, |
| { |
| "epoch": 176.60377358490567, |
| "grad_norm": 1.1413419622441512, |
| "learning_rate": 7.61804938945229e-05, |
| "loss": 2.2243, |
| "step": 4680 |
| }, |
| { |
| "epoch": 177.35849056603774, |
| "grad_norm": 1.2146654711035267, |
| "learning_rate": 7.614800280777005e-05, |
| "loss": 2.2172, |
| "step": 4700 |
| }, |
| { |
| "epoch": 178.11320754716982, |
| "grad_norm": 1.3634898063511693, |
| "learning_rate": 7.611538159109703e-05, |
| "loss": 2.205, |
| "step": 4720 |
| }, |
| { |
| "epoch": 178.8679245283019, |
| "grad_norm": 1.2311721419826, |
| "learning_rate": 7.608263037066277e-05, |
| "loss": 2.2252, |
| "step": 4740 |
| }, |
| { |
| "epoch": 179.62264150943398, |
| "grad_norm": 1.0908914570592438, |
| "learning_rate": 7.6049749273129e-05, |
| "loss": 2.2138, |
| "step": 4760 |
| }, |
| { |
| "epoch": 180.37735849056602, |
| "grad_norm": 1.1038829505990149, |
| "learning_rate": 7.601673842565972e-05, |
| "loss": 2.1939, |
| "step": 4780 |
| }, |
| { |
| "epoch": 181.1320754716981, |
| "grad_norm": 1.1236372724431538, |
| "learning_rate": 7.598359795592073e-05, |
| "loss": 2.2382, |
| "step": 4800 |
| }, |
| { |
| "epoch": 181.88679245283018, |
| "grad_norm": 1.3232451908070362, |
| "learning_rate": 7.59503279920791e-05, |
| "loss": 2.201, |
| "step": 4820 |
| }, |
| { |
| "epoch": 182.64150943396226, |
| "grad_norm": 1.3292125597941664, |
| "learning_rate": 7.591692866280274e-05, |
| "loss": 2.2058, |
| "step": 4840 |
| }, |
| { |
| "epoch": 183.39622641509433, |
| "grad_norm": 1.1970310296785942, |
| "learning_rate": 7.588340009725985e-05, |
| "loss": 2.206, |
| "step": 4860 |
| }, |
| { |
| "epoch": 184.1509433962264, |
| "grad_norm": 1.055682897860096, |
| "learning_rate": 7.584974242511845e-05, |
| "loss": 2.2148, |
| "step": 4880 |
| }, |
| { |
| "epoch": 184.9056603773585, |
| "grad_norm": 1.1655929048666676, |
| "learning_rate": 7.581595577654584e-05, |
| "loss": 2.2146, |
| "step": 4900 |
| }, |
| { |
| "epoch": 185.66037735849056, |
| "grad_norm": 1.2197862783964168, |
| "learning_rate": 7.578204028220814e-05, |
| "loss": 2.2023, |
| "step": 4920 |
| }, |
| { |
| "epoch": 186.41509433962264, |
| "grad_norm": 1.1536947546834515, |
| "learning_rate": 7.574799607326977e-05, |
| "loss": 2.2074, |
| "step": 4940 |
| }, |
| { |
| "epoch": 187.16981132075472, |
| "grad_norm": 1.1570044860516948, |
| "learning_rate": 7.571382328139293e-05, |
| "loss": 2.2057, |
| "step": 4960 |
| }, |
| { |
| "epoch": 187.9245283018868, |
| "grad_norm": 2.2251854969672165, |
| "learning_rate": 7.56795220387371e-05, |
| "loss": 2.1975, |
| "step": 4980 |
| }, |
| { |
| "epoch": 188.67924528301887, |
| "grad_norm": 1.16489093753128, |
| "learning_rate": 7.564509247795854e-05, |
| "loss": 2.1947, |
| "step": 5000 |
| }, |
| { |
| "epoch": 189.43396226415095, |
| "grad_norm": 1.1610456984999162, |
| "learning_rate": 7.561053473220977e-05, |
| "loss": 2.1861, |
| "step": 5020 |
| }, |
| { |
| "epoch": 190.18867924528303, |
| "grad_norm": 1.173342232590181, |
| "learning_rate": 7.557584893513902e-05, |
| "loss": 2.1997, |
| "step": 5040 |
| }, |
| { |
| "epoch": 190.9433962264151, |
| "grad_norm": 1.1873135989990635, |
| "learning_rate": 7.554103522088976e-05, |
| "loss": 2.1841, |
| "step": 5060 |
| }, |
| { |
| "epoch": 191.69811320754718, |
| "grad_norm": 1.4263143797188473, |
| "learning_rate": 7.550609372410018e-05, |
| "loss": 2.1823, |
| "step": 5080 |
| }, |
| { |
| "epoch": 192.45283018867926, |
| "grad_norm": 1.0849530063111787, |
| "learning_rate": 7.547102457990266e-05, |
| "loss": 2.1842, |
| "step": 5100 |
| }, |
| { |
| "epoch": 193.20754716981133, |
| "grad_norm": 1.1497288768060088, |
| "learning_rate": 7.54358279239232e-05, |
| "loss": 2.2258, |
| "step": 5120 |
| }, |
| { |
| "epoch": 193.96226415094338, |
| "grad_norm": 1.2795496420829302, |
| "learning_rate": 7.540050389228099e-05, |
| "loss": 2.192, |
| "step": 5140 |
| }, |
| { |
| "epoch": 194.71698113207546, |
| "grad_norm": 1.0700549445449614, |
| "learning_rate": 7.536505262158779e-05, |
| "loss": 2.1913, |
| "step": 5160 |
| }, |
| { |
| "epoch": 195.47169811320754, |
| "grad_norm": 1.3697359389801924, |
| "learning_rate": 7.532947424894744e-05, |
| "loss": 2.2044, |
| "step": 5180 |
| }, |
| { |
| "epoch": 196.22641509433961, |
| "grad_norm": 1.0721264053082575, |
| "learning_rate": 7.52937689119554e-05, |
| "loss": 2.1916, |
| "step": 5200 |
| }, |
| { |
| "epoch": 196.9811320754717, |
| "grad_norm": 1.2325173290768243, |
| "learning_rate": 7.525793674869805e-05, |
| "loss": 2.1738, |
| "step": 5220 |
| }, |
| { |
| "epoch": 197.73584905660377, |
| "grad_norm": 1.078471360885739, |
| "learning_rate": 7.522197789775235e-05, |
| "loss": 2.2043, |
| "step": 5240 |
| }, |
| { |
| "epoch": 198.49056603773585, |
| "grad_norm": 1.508079711738152, |
| "learning_rate": 7.518589249818516e-05, |
| "loss": 2.2159, |
| "step": 5260 |
| }, |
| { |
| "epoch": 199.24528301886792, |
| "grad_norm": 1.0511550659614401, |
| "learning_rate": 7.514968068955273e-05, |
| "loss": 2.168, |
| "step": 5280 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 1.0585993296644824, |
| "learning_rate": 7.511334261190026e-05, |
| "loss": 2.1847, |
| "step": 5300 |
| }, |
| { |
| "epoch": 200.75471698113208, |
| "grad_norm": 1.19584254290663, |
| "learning_rate": 7.507687840576123e-05, |
| "loss": 2.1953, |
| "step": 5320 |
| }, |
| { |
| "epoch": 201.50943396226415, |
| "grad_norm": 1.360707266271236, |
| "learning_rate": 7.504028821215686e-05, |
| "loss": 2.1866, |
| "step": 5340 |
| }, |
| { |
| "epoch": 202.26415094339623, |
| "grad_norm": 1.0326833677791634, |
| "learning_rate": 7.500357217259573e-05, |
| "loss": 2.1889, |
| "step": 5360 |
| }, |
| { |
| "epoch": 203.0188679245283, |
| "grad_norm": 1.1966698046584427, |
| "learning_rate": 7.496673042907302e-05, |
| "loss": 2.204, |
| "step": 5380 |
| }, |
| { |
| "epoch": 203.77358490566039, |
| "grad_norm": 1.1792590946885393, |
| "learning_rate": 7.492976312407011e-05, |
| "loss": 2.1679, |
| "step": 5400 |
| }, |
| { |
| "epoch": 204.52830188679246, |
| "grad_norm": 1.0821551276306904, |
| "learning_rate": 7.489267040055393e-05, |
| "loss": 2.172, |
| "step": 5420 |
| }, |
| { |
| "epoch": 205.28301886792454, |
| "grad_norm": 1.1206896992927644, |
| "learning_rate": 7.48554524019765e-05, |
| "loss": 2.1558, |
| "step": 5440 |
| }, |
| { |
| "epoch": 206.03773584905662, |
| "grad_norm": 1.164481519584628, |
| "learning_rate": 7.481810927227427e-05, |
| "loss": 2.1707, |
| "step": 5460 |
| }, |
| { |
| "epoch": 206.79245283018867, |
| "grad_norm": 1.045173494578065, |
| "learning_rate": 7.47806411558677e-05, |
| "loss": 2.1454, |
| "step": 5480 |
| }, |
| { |
| "epoch": 207.54716981132074, |
| "grad_norm": 1.3037299893846073, |
| "learning_rate": 7.474304819766053e-05, |
| "loss": 2.1735, |
| "step": 5500 |
| }, |
| { |
| "epoch": 208.30188679245282, |
| "grad_norm": 1.1799164756908072, |
| "learning_rate": 7.470533054303937e-05, |
| "loss": 2.1678, |
| "step": 5520 |
| }, |
| { |
| "epoch": 209.0566037735849, |
| "grad_norm": 1.23204534029245, |
| "learning_rate": 7.46674883378731e-05, |
| "loss": 2.18, |
| "step": 5540 |
| }, |
| { |
| "epoch": 209.81132075471697, |
| "grad_norm": 1.1705040244332197, |
| "learning_rate": 7.462952172851219e-05, |
| "loss": 2.1638, |
| "step": 5560 |
| }, |
| { |
| "epoch": 210.56603773584905, |
| "grad_norm": 1.115647376955501, |
| "learning_rate": 7.459143086178838e-05, |
| "loss": 2.1517, |
| "step": 5580 |
| }, |
| { |
| "epoch": 211.32075471698113, |
| "grad_norm": 1.095644914375309, |
| "learning_rate": 7.455321588501378e-05, |
| "loss": 2.1624, |
| "step": 5600 |
| }, |
| { |
| "epoch": 212.0754716981132, |
| "grad_norm": 1.2461377018123299, |
| "learning_rate": 7.451487694598063e-05, |
| "loss": 2.1795, |
| "step": 5620 |
| }, |
| { |
| "epoch": 212.83018867924528, |
| "grad_norm": 1.0808714278402736, |
| "learning_rate": 7.447641419296051e-05, |
| "loss": 2.1857, |
| "step": 5640 |
| }, |
| { |
| "epoch": 213.58490566037736, |
| "grad_norm": 1.175783749152713, |
| "learning_rate": 7.443782777470388e-05, |
| "loss": 2.1489, |
| "step": 5660 |
| }, |
| { |
| "epoch": 214.33962264150944, |
| "grad_norm": 1.0323602107911023, |
| "learning_rate": 7.43991178404394e-05, |
| "loss": 2.1814, |
| "step": 5680 |
| }, |
| { |
| "epoch": 215.0943396226415, |
| "grad_norm": 1.4371901693782694, |
| "learning_rate": 7.436028453987343e-05, |
| "loss": 2.1607, |
| "step": 5700 |
| }, |
| { |
| "epoch": 215.8490566037736, |
| "grad_norm": 1.2749189929859621, |
| "learning_rate": 7.432132802318953e-05, |
| "loss": 2.1344, |
| "step": 5720 |
| }, |
| { |
| "epoch": 216.60377358490567, |
| "grad_norm": 1.7991005001893379, |
| "learning_rate": 7.428224844104763e-05, |
| "loss": 2.1705, |
| "step": 5740 |
| }, |
| { |
| "epoch": 217.35849056603774, |
| "grad_norm": 1.0763947355182082, |
| "learning_rate": 7.424304594458374e-05, |
| "loss": 2.1681, |
| "step": 5760 |
| }, |
| { |
| "epoch": 218.11320754716982, |
| "grad_norm": 1.147647175883896, |
| "learning_rate": 7.420372068540913e-05, |
| "loss": 2.1792, |
| "step": 5780 |
| }, |
| { |
| "epoch": 218.8679245283019, |
| "grad_norm": 1.232169418468151, |
| "learning_rate": 7.41642728156099e-05, |
| "loss": 2.1143, |
| "step": 5800 |
| }, |
| { |
| "epoch": 219.62264150943398, |
| "grad_norm": 1.3992234444810514, |
| "learning_rate": 7.41247024877463e-05, |
| "loss": 2.1612, |
| "step": 5820 |
| }, |
| { |
| "epoch": 220.37735849056602, |
| "grad_norm": 1.2478978185410232, |
| "learning_rate": 7.40850098548522e-05, |
| "loss": 2.1749, |
| "step": 5840 |
| }, |
| { |
| "epoch": 221.1320754716981, |
| "grad_norm": 1.0796153228438745, |
| "learning_rate": 7.404519507043443e-05, |
| "loss": 2.1345, |
| "step": 5860 |
| }, |
| { |
| "epoch": 221.88679245283018, |
| "grad_norm": 1.164330548160425, |
| "learning_rate": 7.40052582884723e-05, |
| "loss": 2.1573, |
| "step": 5880 |
| }, |
| { |
| "epoch": 222.64150943396226, |
| "grad_norm": 1.2041551436276394, |
| "learning_rate": 7.396519966341684e-05, |
| "loss": 2.162, |
| "step": 5900 |
| }, |
| { |
| "epoch": 223.39622641509433, |
| "grad_norm": 1.2780053810145304, |
| "learning_rate": 7.392501935019036e-05, |
| "loss": 2.1524, |
| "step": 5920 |
| }, |
| { |
| "epoch": 224.1509433962264, |
| "grad_norm": 1.069497717017709, |
| "learning_rate": 7.388471750418576e-05, |
| "loss": 2.1427, |
| "step": 5940 |
| }, |
| { |
| "epoch": 224.9056603773585, |
| "grad_norm": 1.1790523262171884, |
| "learning_rate": 7.384429428126599e-05, |
| "loss": 2.1693, |
| "step": 5960 |
| }, |
| { |
| "epoch": 225.66037735849056, |
| "grad_norm": 1.0727940077044007, |
| "learning_rate": 7.380374983776333e-05, |
| "loss": 2.1146, |
| "step": 5980 |
| }, |
| { |
| "epoch": 226.41509433962264, |
| "grad_norm": 1.0481198314836597, |
| "learning_rate": 7.376308433047898e-05, |
| "loss": 2.1563, |
| "step": 6000 |
| }, |
| { |
| "epoch": 227.16981132075472, |
| "grad_norm": 1.3874056107583248, |
| "learning_rate": 7.372229791668223e-05, |
| "loss": 2.1456, |
| "step": 6020 |
| }, |
| { |
| "epoch": 227.9245283018868, |
| "grad_norm": 1.3153838535909976, |
| "learning_rate": 7.368139075411003e-05, |
| "loss": 2.1575, |
| "step": 6040 |
| }, |
| { |
| "epoch": 228.67924528301887, |
| "grad_norm": 1.1788160013410025, |
| "learning_rate": 7.364036300096631e-05, |
| "loss": 2.1437, |
| "step": 6060 |
| }, |
| { |
| "epoch": 229.43396226415095, |
| "grad_norm": 1.1169312984810649, |
| "learning_rate": 7.359921481592136e-05, |
| "loss": 2.1568, |
| "step": 6080 |
| }, |
| { |
| "epoch": 230.18867924528303, |
| "grad_norm": 1.1435068374715258, |
| "learning_rate": 7.355794635811118e-05, |
| "loss": 2.1503, |
| "step": 6100 |
| }, |
| { |
| "epoch": 230.9433962264151, |
| "grad_norm": 1.7552469517638039, |
| "learning_rate": 7.3516557787137e-05, |
| "loss": 2.128, |
| "step": 6120 |
| }, |
| { |
| "epoch": 231.69811320754718, |
| "grad_norm": 1.0779669989000775, |
| "learning_rate": 7.347504926306452e-05, |
| "loss": 2.1485, |
| "step": 6140 |
| }, |
| { |
| "epoch": 232.45283018867926, |
| "grad_norm": 1.186788194688993, |
| "learning_rate": 7.343342094642333e-05, |
| "loss": 2.1576, |
| "step": 6160 |
| }, |
| { |
| "epoch": 233.20754716981133, |
| "grad_norm": 1.0594086679490557, |
| "learning_rate": 7.339167299820636e-05, |
| "loss": 2.1492, |
| "step": 6180 |
| }, |
| { |
| "epoch": 233.96226415094338, |
| "grad_norm": 1.1917321731840318, |
| "learning_rate": 7.334980557986916e-05, |
| "loss": 2.1482, |
| "step": 6200 |
| }, |
| { |
| "epoch": 234.71698113207546, |
| "grad_norm": 1.083198692826801, |
| "learning_rate": 7.330781885332932e-05, |
| "loss": 2.1461, |
| "step": 6220 |
| }, |
| { |
| "epoch": 235.47169811320754, |
| "grad_norm": 1.3139182121317998, |
| "learning_rate": 7.326571298096586e-05, |
| "loss": 2.156, |
| "step": 6240 |
| }, |
| { |
| "epoch": 236.22641509433961, |
| "grad_norm": 1.0854207170845476, |
| "learning_rate": 7.322348812561857e-05, |
| "loss": 2.1258, |
| "step": 6260 |
| }, |
| { |
| "epoch": 236.9811320754717, |
| "grad_norm": 1.5015227061373095, |
| "learning_rate": 7.318114445058739e-05, |
| "loss": 2.1439, |
| "step": 6280 |
| }, |
| { |
| "epoch": 237.73584905660377, |
| "grad_norm": 1.2347794021289429, |
| "learning_rate": 7.313868211963179e-05, |
| "loss": 2.1317, |
| "step": 6300 |
| }, |
| { |
| "epoch": 238.49056603773585, |
| "grad_norm": 1.442835177639965, |
| "learning_rate": 7.309610129697015e-05, |
| "loss": 2.113, |
| "step": 6320 |
| }, |
| { |
| "epoch": 239.24528301886792, |
| "grad_norm": 1.236255276661992, |
| "learning_rate": 7.305340214727905e-05, |
| "loss": 2.1378, |
| "step": 6340 |
| }, |
| { |
| "epoch": 240.0, |
| "grad_norm": 1.205183440308278, |
| "learning_rate": 7.301058483569271e-05, |
| "loss": 2.1336, |
| "step": 6360 |
| }, |
| { |
| "epoch": 240.75471698113208, |
| "grad_norm": 1.3246584618487252, |
| "learning_rate": 7.296764952780239e-05, |
| "loss": 2.1221, |
| "step": 6380 |
| }, |
| { |
| "epoch": 241.50943396226415, |
| "grad_norm": 1.0168670687272512, |
| "learning_rate": 7.292459638965558e-05, |
| "loss": 2.1188, |
| "step": 6400 |
| }, |
| { |
| "epoch": 242.26415094339623, |
| "grad_norm": 1.3467491151924502, |
| "learning_rate": 7.288142558775552e-05, |
| "loss": 2.101, |
| "step": 6420 |
| }, |
| { |
| "epoch": 243.0188679245283, |
| "grad_norm": 1.261074686560294, |
| "learning_rate": 7.283813728906054e-05, |
| "loss": 2.1411, |
| "step": 6440 |
| }, |
| { |
| "epoch": 243.77358490566039, |
| "grad_norm": 1.2485690805022434, |
| "learning_rate": 7.27947316609833e-05, |
| "loss": 2.1277, |
| "step": 6460 |
| }, |
| { |
| "epoch": 244.52830188679246, |
| "grad_norm": 1.3241322758759912, |
| "learning_rate": 7.275120887139026e-05, |
| "loss": 2.1363, |
| "step": 6480 |
| }, |
| { |
| "epoch": 245.28301886792454, |
| "grad_norm": 1.5599386219671891, |
| "learning_rate": 7.270756908860098e-05, |
| "loss": 2.1089, |
| "step": 6500 |
| }, |
| { |
| "epoch": 246.03773584905662, |
| "grad_norm": 1.2114819177389966, |
| "learning_rate": 7.266381248138751e-05, |
| "loss": 2.1089, |
| "step": 6520 |
| }, |
| { |
| "epoch": 246.79245283018867, |
| "grad_norm": 1.1306582742382014, |
| "learning_rate": 7.261993921897364e-05, |
| "loss": 2.1079, |
| "step": 6540 |
| }, |
| { |
| "epoch": 247.54716981132074, |
| "grad_norm": 1.2673326383282852, |
| "learning_rate": 7.257594947103438e-05, |
| "loss": 2.1266, |
| "step": 6560 |
| }, |
| { |
| "epoch": 248.30188679245282, |
| "grad_norm": 1.4019469970816203, |
| "learning_rate": 7.253184340769518e-05, |
| "loss": 2.1481, |
| "step": 6580 |
| }, |
| { |
| "epoch": 249.0566037735849, |
| "grad_norm": 1.0449709444069573, |
| "learning_rate": 7.248762119953135e-05, |
| "loss": 2.1158, |
| "step": 6600 |
| }, |
| { |
| "epoch": 249.81132075471697, |
| "grad_norm": 1.1593445705123036, |
| "learning_rate": 7.244328301756737e-05, |
| "loss": 2.13, |
| "step": 6620 |
| }, |
| { |
| "epoch": 250.56603773584905, |
| "grad_norm": 1.2635129121192081, |
| "learning_rate": 7.23988290332763e-05, |
| "loss": 2.1167, |
| "step": 6640 |
| }, |
| { |
| "epoch": 251.32075471698113, |
| "grad_norm": 1.0527620092255492, |
| "learning_rate": 7.235425941857891e-05, |
| "loss": 2.114, |
| "step": 6660 |
| }, |
| { |
| "epoch": 252.0754716981132, |
| "grad_norm": 1.3035661165388843, |
| "learning_rate": 7.230957434584331e-05, |
| "loss": 2.0928, |
| "step": 6680 |
| }, |
| { |
| "epoch": 252.83018867924528, |
| "grad_norm": 1.0136550616355096, |
| "learning_rate": 7.226477398788402e-05, |
| "loss": 2.0987, |
| "step": 6700 |
| }, |
| { |
| "epoch": 253.58490566037736, |
| "grad_norm": 1.258957796854538, |
| "learning_rate": 7.22198585179615e-05, |
| "loss": 2.1032, |
| "step": 6720 |
| }, |
| { |
| "epoch": 254.33962264150944, |
| "grad_norm": 1.2937771749668925, |
| "learning_rate": 7.21748281097813e-05, |
| "loss": 2.1003, |
| "step": 6740 |
| }, |
| { |
| "epoch": 255.0943396226415, |
| "grad_norm": 1.0533802729958242, |
| "learning_rate": 7.212968293749357e-05, |
| "loss": 2.1201, |
| "step": 6760 |
| }, |
| { |
| "epoch": 255.8490566037736, |
| "grad_norm": 1.0065996122655994, |
| "learning_rate": 7.208442317569225e-05, |
| "loss": 2.1119, |
| "step": 6780 |
| }, |
| { |
| "epoch": 256.60377358490564, |
| "grad_norm": 1.1726423865130644, |
| "learning_rate": 7.203904899941444e-05, |
| "loss": 2.0967, |
| "step": 6800 |
| }, |
| { |
| "epoch": 257.35849056603774, |
| "grad_norm": 1.4137580376820904, |
| "learning_rate": 7.199356058413975e-05, |
| "loss": 2.1297, |
| "step": 6820 |
| }, |
| { |
| "epoch": 258.1132075471698, |
| "grad_norm": 1.2534212871623691, |
| "learning_rate": 7.194795810578956e-05, |
| "loss": 2.1142, |
| "step": 6840 |
| }, |
| { |
| "epoch": 258.8679245283019, |
| "grad_norm": 1.1760816154209972, |
| "learning_rate": 7.190224174072643e-05, |
| "loss": 2.1524, |
| "step": 6860 |
| }, |
| { |
| "epoch": 259.62264150943395, |
| "grad_norm": 1.1576937144547554, |
| "learning_rate": 7.185641166575331e-05, |
| "loss": 2.0873, |
| "step": 6880 |
| }, |
| { |
| "epoch": 260.37735849056605, |
| "grad_norm": 1.18595129264392, |
| "learning_rate": 7.181046805811294e-05, |
| "loss": 2.1118, |
| "step": 6900 |
| }, |
| { |
| "epoch": 261.1320754716981, |
| "grad_norm": 1.1205604370668647, |
| "learning_rate": 7.176441109548715e-05, |
| "loss": 2.0986, |
| "step": 6920 |
| }, |
| { |
| "epoch": 261.8867924528302, |
| "grad_norm": 1.1884901674285933, |
| "learning_rate": 7.171824095599609e-05, |
| "loss": 2.1109, |
| "step": 6940 |
| }, |
| { |
| "epoch": 262.64150943396226, |
| "grad_norm": 1.123509221114028, |
| "learning_rate": 7.167195781819768e-05, |
| "loss": 2.1047, |
| "step": 6960 |
| }, |
| { |
| "epoch": 263.39622641509436, |
| "grad_norm": 1.1260075560640628, |
| "learning_rate": 7.162556186108684e-05, |
| "loss": 2.0972, |
| "step": 6980 |
| }, |
| { |
| "epoch": 264.1509433962264, |
| "grad_norm": 1.191570785218505, |
| "learning_rate": 7.157905326409477e-05, |
| "loss": 2.0938, |
| "step": 7000 |
| }, |
| { |
| "epoch": 264.9056603773585, |
| "grad_norm": 1.1009948219165815, |
| "learning_rate": 7.153243220708831e-05, |
| "loss": 2.1084, |
| "step": 7020 |
| }, |
| { |
| "epoch": 265.66037735849056, |
| "grad_norm": 1.0984593800759155, |
| "learning_rate": 7.148569887036923e-05, |
| "loss": 2.0989, |
| "step": 7040 |
| }, |
| { |
| "epoch": 266.41509433962267, |
| "grad_norm": 1.2379578619669414, |
| "learning_rate": 7.143885343467355e-05, |
| "loss": 2.1166, |
| "step": 7060 |
| }, |
| { |
| "epoch": 267.1698113207547, |
| "grad_norm": 1.1064036960932773, |
| "learning_rate": 7.139189608117077e-05, |
| "loss": 2.1104, |
| "step": 7080 |
| }, |
| { |
| "epoch": 267.92452830188677, |
| "grad_norm": 1.0772108392111555, |
| "learning_rate": 7.134482699146328e-05, |
| "loss": 2.0897, |
| "step": 7100 |
| }, |
| { |
| "epoch": 268.6792452830189, |
| "grad_norm": 1.1292302199915438, |
| "learning_rate": 7.129764634758554e-05, |
| "loss": 2.1157, |
| "step": 7120 |
| }, |
| { |
| "epoch": 269.4339622641509, |
| "grad_norm": 1.1278390668879588, |
| "learning_rate": 7.125035433200346e-05, |
| "loss": 2.0932, |
| "step": 7140 |
| }, |
| { |
| "epoch": 270.188679245283, |
| "grad_norm": 1.0414531139729244, |
| "learning_rate": 7.120295112761368e-05, |
| "loss": 2.1151, |
| "step": 7160 |
| }, |
| { |
| "epoch": 270.9433962264151, |
| "grad_norm": 1.1545750967690267, |
| "learning_rate": 7.115543691774282e-05, |
| "loss": 2.1131, |
| "step": 7180 |
| }, |
| { |
| "epoch": 271.6981132075472, |
| "grad_norm": 1.204421852849513, |
| "learning_rate": 7.110781188614684e-05, |
| "loss": 2.0802, |
| "step": 7200 |
| }, |
| { |
| "epoch": 272.45283018867923, |
| "grad_norm": 1.7831981359411682, |
| "learning_rate": 7.106007621701024e-05, |
| "loss": 2.0798, |
| "step": 7220 |
| }, |
| { |
| "epoch": 273.20754716981133, |
| "grad_norm": 1.3197751132016162, |
| "learning_rate": 7.101223009494545e-05, |
| "loss": 2.0992, |
| "step": 7240 |
| }, |
| { |
| "epoch": 273.9622641509434, |
| "grad_norm": 1.2400242729400996, |
| "learning_rate": 7.096427370499204e-05, |
| "loss": 2.0864, |
| "step": 7260 |
| }, |
| { |
| "epoch": 274.7169811320755, |
| "grad_norm": 1.0912978575620245, |
| "learning_rate": 7.091620723261605e-05, |
| "loss": 2.0923, |
| "step": 7280 |
| }, |
| { |
| "epoch": 275.47169811320754, |
| "grad_norm": 1.231133204650358, |
| "learning_rate": 7.086803086370918e-05, |
| "loss": 2.0795, |
| "step": 7300 |
| }, |
| { |
| "epoch": 276.22641509433964, |
| "grad_norm": 1.2282758399906704, |
| "learning_rate": 7.081974478458825e-05, |
| "loss": 2.0761, |
| "step": 7320 |
| }, |
| { |
| "epoch": 276.9811320754717, |
| "grad_norm": 1.3460196947110317, |
| "learning_rate": 7.077134918199428e-05, |
| "loss": 2.0752, |
| "step": 7340 |
| }, |
| { |
| "epoch": 277.7358490566038, |
| "grad_norm": 1.080960097565614, |
| "learning_rate": 7.072284424309193e-05, |
| "loss": 2.0889, |
| "step": 7360 |
| }, |
| { |
| "epoch": 278.49056603773585, |
| "grad_norm": 1.3111047591517453, |
| "learning_rate": 7.067423015546863e-05, |
| "loss": 2.0839, |
| "step": 7380 |
| }, |
| { |
| "epoch": 279.24528301886795, |
| "grad_norm": 1.5401314919739673, |
| "learning_rate": 7.0625507107134e-05, |
| "loss": 2.0927, |
| "step": 7400 |
| }, |
| { |
| "epoch": 280.0, |
| "grad_norm": 1.1041636245431063, |
| "learning_rate": 7.057667528651904e-05, |
| "loss": 2.0803, |
| "step": 7420 |
| }, |
| { |
| "epoch": 280.75471698113205, |
| "grad_norm": 1.4056508604045173, |
| "learning_rate": 7.052773488247539e-05, |
| "loss": 2.0668, |
| "step": 7440 |
| }, |
| { |
| "epoch": 281.50943396226415, |
| "grad_norm": 1.046923519873644, |
| "learning_rate": 7.047868608427462e-05, |
| "loss": 2.082, |
| "step": 7460 |
| }, |
| { |
| "epoch": 282.2641509433962, |
| "grad_norm": 1.3029328700653047, |
| "learning_rate": 7.042952908160754e-05, |
| "loss": 2.0556, |
| "step": 7480 |
| }, |
| { |
| "epoch": 283.0188679245283, |
| "grad_norm": 1.227982067650406, |
| "learning_rate": 7.03802640645834e-05, |
| "loss": 2.0478, |
| "step": 7500 |
| }, |
| { |
| "epoch": 283.77358490566036, |
| "grad_norm": 1.1015625311453152, |
| "learning_rate": 7.033089122372919e-05, |
| "loss": 2.0773, |
| "step": 7520 |
| }, |
| { |
| "epoch": 284.52830188679246, |
| "grad_norm": 1.316103623119528, |
| "learning_rate": 7.028141074998891e-05, |
| "loss": 2.0756, |
| "step": 7540 |
| }, |
| { |
| "epoch": 285.2830188679245, |
| "grad_norm": 1.255257016262856, |
| "learning_rate": 7.023182283472277e-05, |
| "loss": 2.0866, |
| "step": 7560 |
| }, |
| { |
| "epoch": 286.0377358490566, |
| "grad_norm": 1.030736862070767, |
| "learning_rate": 7.018212766970658e-05, |
| "loss": 2.0723, |
| "step": 7580 |
| }, |
| { |
| "epoch": 286.79245283018867, |
| "grad_norm": 1.293105711429154, |
| "learning_rate": 7.013232544713086e-05, |
| "loss": 2.0759, |
| "step": 7600 |
| }, |
| { |
| "epoch": 287.54716981132077, |
| "grad_norm": 1.538845550854816, |
| "learning_rate": 7.008241635960018e-05, |
| "loss": 2.0238, |
| "step": 7620 |
| }, |
| { |
| "epoch": 288.3018867924528, |
| "grad_norm": 1.1376778839977162, |
| "learning_rate": 7.003240060013241e-05, |
| "loss": 2.0895, |
| "step": 7640 |
| }, |
| { |
| "epoch": 289.0566037735849, |
| "grad_norm": 1.1524198390846205, |
| "learning_rate": 6.998227836215794e-05, |
| "loss": 2.0712, |
| "step": 7660 |
| }, |
| { |
| "epoch": 289.811320754717, |
| "grad_norm": 1.1269455096899952, |
| "learning_rate": 6.9932049839519e-05, |
| "loss": 2.0785, |
| "step": 7680 |
| }, |
| { |
| "epoch": 290.5660377358491, |
| "grad_norm": 1.1947929898571277, |
| "learning_rate": 6.98817152264688e-05, |
| "loss": 2.0789, |
| "step": 7700 |
| }, |
| { |
| "epoch": 291.3207547169811, |
| "grad_norm": 1.05216691350341, |
| "learning_rate": 6.983127471767088e-05, |
| "loss": 2.0721, |
| "step": 7720 |
| }, |
| { |
| "epoch": 292.07547169811323, |
| "grad_norm": 1.0659553585546824, |
| "learning_rate": 6.978072850819832e-05, |
| "loss": 2.0897, |
| "step": 7740 |
| }, |
| { |
| "epoch": 292.8301886792453, |
| "grad_norm": 1.4999207869643305, |
| "learning_rate": 6.9730076793533e-05, |
| "loss": 2.0875, |
| "step": 7760 |
| }, |
| { |
| "epoch": 293.58490566037733, |
| "grad_norm": 1.051015475094539, |
| "learning_rate": 6.967931976956479e-05, |
| "loss": 2.0572, |
| "step": 7780 |
| }, |
| { |
| "epoch": 294.33962264150944, |
| "grad_norm": 1.4062884543226315, |
| "learning_rate": 6.962845763259084e-05, |
| "loss": 2.0783, |
| "step": 7800 |
| }, |
| { |
| "epoch": 295.0943396226415, |
| "grad_norm": 1.3344933104485628, |
| "learning_rate": 6.957749057931486e-05, |
| "loss": 2.0491, |
| "step": 7820 |
| }, |
| { |
| "epoch": 295.8490566037736, |
| "grad_norm": 1.2853456909301206, |
| "learning_rate": 6.952641880684623e-05, |
| "loss": 2.0589, |
| "step": 7840 |
| }, |
| { |
| "epoch": 296.60377358490564, |
| "grad_norm": 0.9567533052896401, |
| "learning_rate": 6.947524251269942e-05, |
| "loss": 2.0638, |
| "step": 7860 |
| }, |
| { |
| "epoch": 297.35849056603774, |
| "grad_norm": 1.0295871659614384, |
| "learning_rate": 6.942396189479305e-05, |
| "loss": 2.0452, |
| "step": 7880 |
| }, |
| { |
| "epoch": 298.1132075471698, |
| "grad_norm": 1.0385361931014787, |
| "learning_rate": 6.937257715144922e-05, |
| "loss": 2.0693, |
| "step": 7900 |
| }, |
| { |
| "epoch": 298.8679245283019, |
| "grad_norm": 1.0809377854877955, |
| "learning_rate": 6.932108848139274e-05, |
| "loss": 2.0657, |
| "step": 7920 |
| }, |
| { |
| "epoch": 299.62264150943395, |
| "grad_norm": 1.2262962690135735, |
| "learning_rate": 6.926949608375031e-05, |
| "loss": 2.0333, |
| "step": 7940 |
| }, |
| { |
| "epoch": 300.37735849056605, |
| "grad_norm": 1.2494078282001366, |
| "learning_rate": 6.921780015804983e-05, |
| "loss": 2.0611, |
| "step": 7960 |
| }, |
| { |
| "epoch": 301.1320754716981, |
| "grad_norm": 1.2191785732688871, |
| "learning_rate": 6.916600090421955e-05, |
| "loss": 2.0414, |
| "step": 7980 |
| }, |
| { |
| "epoch": 301.8867924528302, |
| "grad_norm": 1.3922552836876412, |
| "learning_rate": 6.911409852258734e-05, |
| "loss": 2.0344, |
| "step": 8000 |
| }, |
| { |
| "epoch": 302.64150943396226, |
| "grad_norm": 1.001774801425353, |
| "learning_rate": 6.906209321387992e-05, |
| "loss": 2.0745, |
| "step": 8020 |
| }, |
| { |
| "epoch": 303.39622641509436, |
| "grad_norm": 1.1104337640037032, |
| "learning_rate": 6.900998517922203e-05, |
| "loss": 2.0593, |
| "step": 8040 |
| }, |
| { |
| "epoch": 304.1509433962264, |
| "grad_norm": 1.825335695980726, |
| "learning_rate": 6.895777462013575e-05, |
| "loss": 2.0459, |
| "step": 8060 |
| }, |
| { |
| "epoch": 304.9056603773585, |
| "grad_norm": 1.157162721000543, |
| "learning_rate": 6.89054617385396e-05, |
| "loss": 2.0565, |
| "step": 8080 |
| }, |
| { |
| "epoch": 305.66037735849056, |
| "grad_norm": 1.0813317731646406, |
| "learning_rate": 6.885304673674785e-05, |
| "loss": 2.0647, |
| "step": 8100 |
| }, |
| { |
| "epoch": 306.41509433962267, |
| "grad_norm": 1.1711257270339308, |
| "learning_rate": 6.880052981746973e-05, |
| "loss": 2.0779, |
| "step": 8120 |
| }, |
| { |
| "epoch": 307.1698113207547, |
| "grad_norm": 1.063366248076951, |
| "learning_rate": 6.874791118380859e-05, |
| "loss": 2.0299, |
| "step": 8140 |
| }, |
| { |
| "epoch": 307.92452830188677, |
| "grad_norm": 1.4839153895893722, |
| "learning_rate": 6.869519103926117e-05, |
| "loss": 2.0689, |
| "step": 8160 |
| }, |
| { |
| "epoch": 308.6792452830189, |
| "grad_norm": 1.0599730190677705, |
| "learning_rate": 6.864236958771677e-05, |
| "loss": 2.0559, |
| "step": 8180 |
| }, |
| { |
| "epoch": 309.4339622641509, |
| "grad_norm": 1.1000743640073944, |
| "learning_rate": 6.85894470334565e-05, |
| "loss": 2.0814, |
| "step": 8200 |
| }, |
| { |
| "epoch": 310.188679245283, |
| "grad_norm": 1.1134888630426287, |
| "learning_rate": 6.853642358115248e-05, |
| "loss": 2.0619, |
| "step": 8220 |
| }, |
| { |
| "epoch": 310.9433962264151, |
| "grad_norm": 1.4109893718513755, |
| "learning_rate": 6.848329943586703e-05, |
| "loss": 2.0478, |
| "step": 8240 |
| }, |
| { |
| "epoch": 311.6981132075472, |
| "grad_norm": 1.4005508549478216, |
| "learning_rate": 6.843007480305188e-05, |
| "loss": 2.0451, |
| "step": 8260 |
| }, |
| { |
| "epoch": 312.45283018867923, |
| "grad_norm": 1.1506459796822934, |
| "learning_rate": 6.83767498885474e-05, |
| "loss": 2.0496, |
| "step": 8280 |
| }, |
| { |
| "epoch": 313.20754716981133, |
| "grad_norm": 1.1846013546521996, |
| "learning_rate": 6.832332489858181e-05, |
| "loss": 2.0503, |
| "step": 8300 |
| }, |
| { |
| "epoch": 313.9622641509434, |
| "grad_norm": 1.083146150872066, |
| "learning_rate": 6.826980003977029e-05, |
| "loss": 2.0411, |
| "step": 8320 |
| }, |
| { |
| "epoch": 314.7169811320755, |
| "grad_norm": 1.1083923007981826, |
| "learning_rate": 6.821617551911432e-05, |
| "loss": 2.059, |
| "step": 8340 |
| }, |
| { |
| "epoch": 315.47169811320754, |
| "grad_norm": 1.7089827022606041, |
| "learning_rate": 6.816245154400081e-05, |
| "loss": 2.0316, |
| "step": 8360 |
| }, |
| { |
| "epoch": 316.22641509433964, |
| "grad_norm": 1.2036464145657677, |
| "learning_rate": 6.810862832220125e-05, |
| "loss": 2.0383, |
| "step": 8380 |
| }, |
| { |
| "epoch": 316.9811320754717, |
| "grad_norm": 1.0678492328292477, |
| "learning_rate": 6.8054706061871e-05, |
| "loss": 2.0357, |
| "step": 8400 |
| }, |
| { |
| "epoch": 317.7358490566038, |
| "grad_norm": 1.1510123829327024, |
| "learning_rate": 6.800068497154838e-05, |
| "loss": 2.0509, |
| "step": 8420 |
| }, |
| { |
| "epoch": 318.49056603773585, |
| "grad_norm": 1.1744519756591179, |
| "learning_rate": 6.794656526015402e-05, |
| "loss": 2.0362, |
| "step": 8440 |
| }, |
| { |
| "epoch": 319.24528301886795, |
| "grad_norm": 1.0951767070535987, |
| "learning_rate": 6.78923471369899e-05, |
| "loss": 2.0261, |
| "step": 8460 |
| }, |
| { |
| "epoch": 320.0, |
| "grad_norm": 1.40278574496307, |
| "learning_rate": 6.783803081173856e-05, |
| "loss": 2.0041, |
| "step": 8480 |
| }, |
| { |
| "epoch": 320.75471698113205, |
| "grad_norm": 1.2731462205629138, |
| "learning_rate": 6.778361649446238e-05, |
| "loss": 2.0455, |
| "step": 8500 |
| }, |
| { |
| "epoch": 321.50943396226415, |
| "grad_norm": 1.1686588861352702, |
| "learning_rate": 6.772910439560273e-05, |
| "loss": 2.0328, |
| "step": 8520 |
| }, |
| { |
| "epoch": 322.2641509433962, |
| "grad_norm": 1.0989551544372271, |
| "learning_rate": 6.767449472597907e-05, |
| "loss": 2.0495, |
| "step": 8540 |
| }, |
| { |
| "epoch": 323.0188679245283, |
| "grad_norm": 1.3624805761549945, |
| "learning_rate": 6.761978769678828e-05, |
| "loss": 2.0447, |
| "step": 8560 |
| }, |
| { |
| "epoch": 323.77358490566036, |
| "grad_norm": 1.1444357160826135, |
| "learning_rate": 6.75649835196037e-05, |
| "loss": 2.0663, |
| "step": 8580 |
| }, |
| { |
| "epoch": 324.52830188679246, |
| "grad_norm": 1.1946574491976927, |
| "learning_rate": 6.75100824063744e-05, |
| "loss": 2.0483, |
| "step": 8600 |
| }, |
| { |
| "epoch": 325.2830188679245, |
| "grad_norm": 1.0504702569050626, |
| "learning_rate": 6.745508456942438e-05, |
| "loss": 1.9978, |
| "step": 8620 |
| }, |
| { |
| "epoch": 326.0377358490566, |
| "grad_norm": 0.9767612100068984, |
| "learning_rate": 6.739999022145167e-05, |
| "loss": 2.0382, |
| "step": 8640 |
| }, |
| { |
| "epoch": 326.79245283018867, |
| "grad_norm": 1.0291078738332238, |
| "learning_rate": 6.734479957552753e-05, |
| "loss": 2.0298, |
| "step": 8660 |
| }, |
| { |
| "epoch": 327.54716981132077, |
| "grad_norm": 1.244368475618607, |
| "learning_rate": 6.72895128450957e-05, |
| "loss": 2.005, |
| "step": 8680 |
| }, |
| { |
| "epoch": 328.3018867924528, |
| "grad_norm": 1.1290954094741668, |
| "learning_rate": 6.723413024397144e-05, |
| "loss": 2.0569, |
| "step": 8700 |
| }, |
| { |
| "epoch": 329.0566037735849, |
| "grad_norm": 1.0915375487825718, |
| "learning_rate": 6.717865198634082e-05, |
| "loss": 2.0447, |
| "step": 8720 |
| }, |
| { |
| "epoch": 329.811320754717, |
| "grad_norm": 1.1768398401350053, |
| "learning_rate": 6.71230782867599e-05, |
| "loss": 2.0217, |
| "step": 8740 |
| }, |
| { |
| "epoch": 330.5660377358491, |
| "grad_norm": 1.104835402612007, |
| "learning_rate": 6.706740936015375e-05, |
| "loss": 2.0386, |
| "step": 8760 |
| }, |
| { |
| "epoch": 331.3207547169811, |
| "grad_norm": 1.1248015036534322, |
| "learning_rate": 6.70116454218158e-05, |
| "loss": 2.0103, |
| "step": 8780 |
| }, |
| { |
| "epoch": 332.07547169811323, |
| "grad_norm": 1.2169922349555569, |
| "learning_rate": 6.69557866874069e-05, |
| "loss": 2.0241, |
| "step": 8800 |
| }, |
| { |
| "epoch": 332.8301886792453, |
| "grad_norm": 1.214613807170357, |
| "learning_rate": 6.689983337295448e-05, |
| "loss": 2.0188, |
| "step": 8820 |
| }, |
| { |
| "epoch": 333.58490566037733, |
| "grad_norm": 1.790201434963867, |
| "learning_rate": 6.684378569485181e-05, |
| "loss": 1.9779, |
| "step": 8840 |
| }, |
| { |
| "epoch": 334.33962264150944, |
| "grad_norm": 1.1294537346241684, |
| "learning_rate": 6.678764386985706e-05, |
| "loss": 2.0288, |
| "step": 8860 |
| }, |
| { |
| "epoch": 335.0943396226415, |
| "grad_norm": 1.023220014865738, |
| "learning_rate": 6.673140811509254e-05, |
| "loss": 2.0388, |
| "step": 8880 |
| }, |
| { |
| "epoch": 335.8490566037736, |
| "grad_norm": 1.184810167823652, |
| "learning_rate": 6.667507864804373e-05, |
| "loss": 2.0527, |
| "step": 8900 |
| }, |
| { |
| "epoch": 336.60377358490564, |
| "grad_norm": 1.4108637345041166, |
| "learning_rate": 6.661865568655867e-05, |
| "loss": 2.0521, |
| "step": 8920 |
| }, |
| { |
| "epoch": 337.35849056603774, |
| "grad_norm": 1.1099661578909887, |
| "learning_rate": 6.656213944884687e-05, |
| "loss": 2.0142, |
| "step": 8940 |
| }, |
| { |
| "epoch": 338.1132075471698, |
| "grad_norm": 1.2550646623744501, |
| "learning_rate": 6.650553015347861e-05, |
| "loss": 2.0234, |
| "step": 8960 |
| }, |
| { |
| "epoch": 338.8679245283019, |
| "grad_norm": 1.3226209109800835, |
| "learning_rate": 6.64488280193841e-05, |
| "loss": 2.0026, |
| "step": 8980 |
| }, |
| { |
| "epoch": 339.62264150943395, |
| "grad_norm": 1.2466974723773605, |
| "learning_rate": 6.639203326585253e-05, |
| "loss": 2.0505, |
| "step": 9000 |
| }, |
| { |
| "epoch": 340.37735849056605, |
| "grad_norm": 1.2259925868881607, |
| "learning_rate": 6.633514611253129e-05, |
| "loss": 1.989, |
| "step": 9020 |
| }, |
| { |
| "epoch": 341.1320754716981, |
| "grad_norm": 1.2616555953485367, |
| "learning_rate": 6.627816677942518e-05, |
| "loss": 2.0172, |
| "step": 9040 |
| }, |
| { |
| "epoch": 341.8867924528302, |
| "grad_norm": 1.0660372107925478, |
| "learning_rate": 6.622109548689542e-05, |
| "loss": 2.0235, |
| "step": 9060 |
| }, |
| { |
| "epoch": 342.64150943396226, |
| "grad_norm": 1.2995047263783295, |
| "learning_rate": 6.616393245565893e-05, |
| "loss": 2.0116, |
| "step": 9080 |
| }, |
| { |
| "epoch": 343.39622641509436, |
| "grad_norm": 1.2947984731633606, |
| "learning_rate": 6.610667790678738e-05, |
| "loss": 2.0241, |
| "step": 9100 |
| }, |
| { |
| "epoch": 344.1509433962264, |
| "grad_norm": 1.008247863003288, |
| "learning_rate": 6.60493320617064e-05, |
| "loss": 1.984, |
| "step": 9120 |
| }, |
| { |
| "epoch": 344.9056603773585, |
| "grad_norm": 1.5922178618355085, |
| "learning_rate": 6.599189514219469e-05, |
| "loss": 1.989, |
| "step": 9140 |
| }, |
| { |
| "epoch": 345.66037735849056, |
| "grad_norm": 1.0785537649724395, |
| "learning_rate": 6.593436737038316e-05, |
| "loss": 2.0135, |
| "step": 9160 |
| }, |
| { |
| "epoch": 346.41509433962267, |
| "grad_norm": 1.1766322003509095, |
| "learning_rate": 6.58767489687541e-05, |
| "loss": 2.0021, |
| "step": 9180 |
| }, |
| { |
| "epoch": 347.1698113207547, |
| "grad_norm": 1.3777596137615202, |
| "learning_rate": 6.581904016014026e-05, |
| "loss": 1.9988, |
| "step": 9200 |
| }, |
| { |
| "epoch": 347.92452830188677, |
| "grad_norm": 1.5517987219865874, |
| "learning_rate": 6.57612411677241e-05, |
| "loss": 2.0309, |
| "step": 9220 |
| }, |
| { |
| "epoch": 348.6792452830189, |
| "grad_norm": 1.0560275221648643, |
| "learning_rate": 6.570335221503679e-05, |
| "loss": 1.9923, |
| "step": 9240 |
| }, |
| { |
| "epoch": 349.4339622641509, |
| "grad_norm": 1.3504991405267055, |
| "learning_rate": 6.564537352595744e-05, |
| "loss": 1.9739, |
| "step": 9260 |
| }, |
| { |
| "epoch": 350.188679245283, |
| "grad_norm": 1.153039781830911, |
| "learning_rate": 6.558730532471219e-05, |
| "loss": 1.9803, |
| "step": 9280 |
| }, |
| { |
| "epoch": 350.9433962264151, |
| "grad_norm": 0.9434571532030971, |
| "learning_rate": 6.55291478358734e-05, |
| "loss": 1.9677, |
| "step": 9300 |
| }, |
| { |
| "epoch": 351.6981132075472, |
| "grad_norm": 1.8784899020425583, |
| "learning_rate": 6.547090128435869e-05, |
| "loss": 1.9988, |
| "step": 9320 |
| }, |
| { |
| "epoch": 352.45283018867923, |
| "grad_norm": 1.449139419473746, |
| "learning_rate": 6.541256589543013e-05, |
| "loss": 1.9974, |
| "step": 9340 |
| }, |
| { |
| "epoch": 353.20754716981133, |
| "grad_norm": 1.3936924715065266, |
| "learning_rate": 6.53541418946934e-05, |
| "loss": 2.016, |
| "step": 9360 |
| }, |
| { |
| "epoch": 353.9622641509434, |
| "grad_norm": 1.6114753123601104, |
| "learning_rate": 6.529562950809679e-05, |
| "loss": 2.0021, |
| "step": 9380 |
| }, |
| { |
| "epoch": 354.7169811320755, |
| "grad_norm": 1.109612991369577, |
| "learning_rate": 6.523702896193052e-05, |
| "loss": 1.9928, |
| "step": 9400 |
| }, |
| { |
| "epoch": 355.47169811320754, |
| "grad_norm": 1.2128714849575388, |
| "learning_rate": 6.517834048282572e-05, |
| "loss": 1.9908, |
| "step": 9420 |
| }, |
| { |
| "epoch": 356.22641509433964, |
| "grad_norm": 1.6910853942561526, |
| "learning_rate": 6.511956429775353e-05, |
| "loss": 1.998, |
| "step": 9440 |
| }, |
| { |
| "epoch": 356.9811320754717, |
| "grad_norm": 1.1430018991497974, |
| "learning_rate": 6.506070063402434e-05, |
| "loss": 1.9726, |
| "step": 9460 |
| }, |
| { |
| "epoch": 357.7358490566038, |
| "grad_norm": 1.1413458296675405, |
| "learning_rate": 6.500174971928684e-05, |
| "loss": 1.9972, |
| "step": 9480 |
| }, |
| { |
| "epoch": 358.49056603773585, |
| "grad_norm": 1.1439825840759497, |
| "learning_rate": 6.494271178152717e-05, |
| "loss": 1.9965, |
| "step": 9500 |
| }, |
| { |
| "epoch": 359.24528301886795, |
| "grad_norm": 1.1294761259383999, |
| "learning_rate": 6.488358704906799e-05, |
| "loss": 1.9651, |
| "step": 9520 |
| }, |
| { |
| "epoch": 360.0, |
| "grad_norm": 1.0691230669285636, |
| "learning_rate": 6.482437575056767e-05, |
| "loss": 1.9559, |
| "step": 9540 |
| }, |
| { |
| "epoch": 360.75471698113205, |
| "grad_norm": 1.1830173398938235, |
| "learning_rate": 6.476507811501933e-05, |
| "loss": 2.0035, |
| "step": 9560 |
| }, |
| { |
| "epoch": 361.50943396226415, |
| "grad_norm": 1.0463609194131098, |
| "learning_rate": 6.470569437175001e-05, |
| "loss": 2.0062, |
| "step": 9580 |
| }, |
| { |
| "epoch": 362.2641509433962, |
| "grad_norm": 1.0991234719266971, |
| "learning_rate": 6.464622475041972e-05, |
| "loss": 1.9775, |
| "step": 9600 |
| }, |
| { |
| "epoch": 363.0188679245283, |
| "grad_norm": 1.198408129328553, |
| "learning_rate": 6.458666948102068e-05, |
| "loss": 1.9684, |
| "step": 9620 |
| }, |
| { |
| "epoch": 363.77358490566036, |
| "grad_norm": 1.2518491777745682, |
| "learning_rate": 6.452702879387625e-05, |
| "loss": 2.0052, |
| "step": 9640 |
| }, |
| { |
| "epoch": 364.52830188679246, |
| "grad_norm": 1.1372899117638453, |
| "learning_rate": 6.44673029196402e-05, |
| "loss": 2.005, |
| "step": 9660 |
| }, |
| { |
| "epoch": 365.2830188679245, |
| "grad_norm": 1.1412736472546972, |
| "learning_rate": 6.44074920892957e-05, |
| "loss": 1.9545, |
| "step": 9680 |
| }, |
| { |
| "epoch": 366.0377358490566, |
| "grad_norm": 1.295474241313066, |
| "learning_rate": 6.434759653415454e-05, |
| "loss": 1.9943, |
| "step": 9700 |
| }, |
| { |
| "epoch": 366.79245283018867, |
| "grad_norm": 1.6454682745260736, |
| "learning_rate": 6.42876164858561e-05, |
| "loss": 1.9831, |
| "step": 9720 |
| }, |
| { |
| "epoch": 367.54716981132077, |
| "grad_norm": 1.3152256044573012, |
| "learning_rate": 6.42275521763666e-05, |
| "loss": 1.9898, |
| "step": 9740 |
| }, |
| { |
| "epoch": 368.3018867924528, |
| "grad_norm": 1.1398547695205932, |
| "learning_rate": 6.416740383797806e-05, |
| "loss": 2.0018, |
| "step": 9760 |
| }, |
| { |
| "epoch": 369.0566037735849, |
| "grad_norm": 1.560025618616493, |
| "learning_rate": 6.410717170330754e-05, |
| "loss": 1.9774, |
| "step": 9780 |
| }, |
| { |
| "epoch": 369.811320754717, |
| "grad_norm": 1.1084036628854508, |
| "learning_rate": 6.404685600529614e-05, |
| "loss": 1.9898, |
| "step": 9800 |
| }, |
| { |
| "epoch": 370.5660377358491, |
| "grad_norm": 1.064043317797065, |
| "learning_rate": 6.398645697720813e-05, |
| "loss": 1.9683, |
| "step": 9820 |
| }, |
| { |
| "epoch": 371.3207547169811, |
| "grad_norm": 1.0561133254372814, |
| "learning_rate": 6.392597485263005e-05, |
| "loss": 1.9892, |
| "step": 9840 |
| }, |
| { |
| "epoch": 372.07547169811323, |
| "grad_norm": 1.8498750890966262, |
| "learning_rate": 6.386540986546981e-05, |
| "loss": 2.0028, |
| "step": 9860 |
| }, |
| { |
| "epoch": 372.8301886792453, |
| "grad_norm": 1.1839290834136853, |
| "learning_rate": 6.38047622499558e-05, |
| "loss": 2.0067, |
| "step": 9880 |
| }, |
| { |
| "epoch": 373.58490566037733, |
| "grad_norm": 1.0860526304824587, |
| "learning_rate": 6.374403224063593e-05, |
| "loss": 1.9771, |
| "step": 9900 |
| }, |
| { |
| "epoch": 374.33962264150944, |
| "grad_norm": 1.1746289875773082, |
| "learning_rate": 6.368322007237679e-05, |
| "loss": 1.9693, |
| "step": 9920 |
| }, |
| { |
| "epoch": 375.0943396226415, |
| "grad_norm": 1.3082714258157306, |
| "learning_rate": 6.36223259803627e-05, |
| "loss": 1.9737, |
| "step": 9940 |
| }, |
| { |
| "epoch": 375.8490566037736, |
| "grad_norm": 1.1647865395498773, |
| "learning_rate": 6.356135020009478e-05, |
| "loss": 1.9619, |
| "step": 9960 |
| }, |
| { |
| "epoch": 376.60377358490564, |
| "grad_norm": 0.947059541183795, |
| "learning_rate": 6.350029296739012e-05, |
| "loss": 1.975, |
| "step": 9980 |
| }, |
| { |
| "epoch": 377.35849056603774, |
| "grad_norm": 1.2248902808010191, |
| "learning_rate": 6.343915451838081e-05, |
| "loss": 1.9628, |
| "step": 10000 |
| }, |
| { |
| "epoch": 378.1132075471698, |
| "grad_norm": 1.097611241891744, |
| "learning_rate": 6.337793508951301e-05, |
| "loss": 1.9775, |
| "step": 10020 |
| }, |
| { |
| "epoch": 378.8679245283019, |
| "grad_norm": 1.2529669087878597, |
| "learning_rate": 6.331663491754607e-05, |
| "loss": 1.9468, |
| "step": 10040 |
| }, |
| { |
| "epoch": 379.62264150943395, |
| "grad_norm": 1.1767271144174725, |
| "learning_rate": 6.325525423955162e-05, |
| "loss": 1.9413, |
| "step": 10060 |
| }, |
| { |
| "epoch": 380.37735849056605, |
| "grad_norm": 1.282222785156654, |
| "learning_rate": 6.319379329291262e-05, |
| "loss": 1.9655, |
| "step": 10080 |
| }, |
| { |
| "epoch": 381.1320754716981, |
| "grad_norm": 0.9819686841799513, |
| "learning_rate": 6.313225231532246e-05, |
| "loss": 1.9537, |
| "step": 10100 |
| }, |
| { |
| "epoch": 381.8867924528302, |
| "grad_norm": 1.206003307363446, |
| "learning_rate": 6.307063154478407e-05, |
| "loss": 1.9387, |
| "step": 10120 |
| }, |
| { |
| "epoch": 382.64150943396226, |
| "grad_norm": 1.236739142400694, |
| "learning_rate": 6.300893121960891e-05, |
| "loss": 1.9478, |
| "step": 10140 |
| }, |
| { |
| "epoch": 383.39622641509436, |
| "grad_norm": 1.0017771325975895, |
| "learning_rate": 6.294715157841618e-05, |
| "loss": 1.9714, |
| "step": 10160 |
| }, |
| { |
| "epoch": 384.1509433962264, |
| "grad_norm": 1.0637999951499557, |
| "learning_rate": 6.28852928601318e-05, |
| "loss": 1.9905, |
| "step": 10180 |
| }, |
| { |
| "epoch": 384.9056603773585, |
| "grad_norm": 1.0944082795368726, |
| "learning_rate": 6.282335530398746e-05, |
| "loss": 1.9586, |
| "step": 10200 |
| }, |
| { |
| "epoch": 385.66037735849056, |
| "grad_norm": 1.0420947581782276, |
| "learning_rate": 6.276133914951982e-05, |
| "loss": 2.0008, |
| "step": 10220 |
| }, |
| { |
| "epoch": 386.41509433962267, |
| "grad_norm": 1.2531335945397626, |
| "learning_rate": 6.26992446365695e-05, |
| "loss": 1.9718, |
| "step": 10240 |
| }, |
| { |
| "epoch": 387.1698113207547, |
| "grad_norm": 1.0272789455614961, |
| "learning_rate": 6.26370720052801e-05, |
| "loss": 1.9741, |
| "step": 10260 |
| }, |
| { |
| "epoch": 387.92452830188677, |
| "grad_norm": 1.1543574176007045, |
| "learning_rate": 6.25748214960974e-05, |
| "loss": 1.9508, |
| "step": 10280 |
| }, |
| { |
| "epoch": 388.6792452830189, |
| "grad_norm": 1.123008926585049, |
| "learning_rate": 6.251249334976835e-05, |
| "loss": 1.9238, |
| "step": 10300 |
| }, |
| { |
| "epoch": 389.4339622641509, |
| "grad_norm": 1.1351605673087415, |
| "learning_rate": 6.245008780734015e-05, |
| "loss": 1.9379, |
| "step": 10320 |
| }, |
| { |
| "epoch": 390.188679245283, |
| "grad_norm": 1.139914151072252, |
| "learning_rate": 6.238760511015928e-05, |
| "loss": 1.9863, |
| "step": 10340 |
| }, |
| { |
| "epoch": 390.9433962264151, |
| "grad_norm": 1.3069434137417522, |
| "learning_rate": 6.232504549987069e-05, |
| "loss": 1.9569, |
| "step": 10360 |
| }, |
| { |
| "epoch": 391.6981132075472, |
| "grad_norm": 1.7598014364780348, |
| "learning_rate": 6.22624092184167e-05, |
| "loss": 1.9389, |
| "step": 10380 |
| }, |
| { |
| "epoch": 392.45283018867923, |
| "grad_norm": 1.0862334208555093, |
| "learning_rate": 6.21996965080362e-05, |
| "loss": 1.9744, |
| "step": 10400 |
| }, |
| { |
| "epoch": 393.20754716981133, |
| "grad_norm": 1.1400427784758083, |
| "learning_rate": 6.213690761126365e-05, |
| "loss": 1.9563, |
| "step": 10420 |
| }, |
| { |
| "epoch": 393.9622641509434, |
| "grad_norm": 1.171092319320692, |
| "learning_rate": 6.207404277092816e-05, |
| "loss": 1.9268, |
| "step": 10440 |
| }, |
| { |
| "epoch": 394.7169811320755, |
| "grad_norm": 1.2187674621534166, |
| "learning_rate": 6.201110223015247e-05, |
| "loss": 1.9141, |
| "step": 10460 |
| }, |
| { |
| "epoch": 395.47169811320754, |
| "grad_norm": 1.1182747577783947, |
| "learning_rate": 6.19480862323522e-05, |
| "loss": 1.9498, |
| "step": 10480 |
| }, |
| { |
| "epoch": 396.22641509433964, |
| "grad_norm": 1.2189637302318261, |
| "learning_rate": 6.188499502123471e-05, |
| "loss": 1.9563, |
| "step": 10500 |
| }, |
| { |
| "epoch": 396.9811320754717, |
| "grad_norm": 1.0928304287739772, |
| "learning_rate": 6.18218288407983e-05, |
| "loss": 1.976, |
| "step": 10520 |
| }, |
| { |
| "epoch": 397.7358490566038, |
| "grad_norm": 1.0366879822409767, |
| "learning_rate": 6.17585879353311e-05, |
| "loss": 1.9804, |
| "step": 10540 |
| }, |
| { |
| "epoch": 398.49056603773585, |
| "grad_norm": 1.153371078643115, |
| "learning_rate": 6.169527254941035e-05, |
| "loss": 1.987, |
| "step": 10560 |
| }, |
| { |
| "epoch": 399.24528301886795, |
| "grad_norm": 1.2467206603942558, |
| "learning_rate": 6.163188292790129e-05, |
| "loss": 1.958, |
| "step": 10580 |
| }, |
| { |
| "epoch": 400.0, |
| "grad_norm": 1.222097823579558, |
| "learning_rate": 6.156841931595623e-05, |
| "loss": 1.9838, |
| "step": 10600 |
| }, |
| { |
| "epoch": 400.75471698113205, |
| "grad_norm": 1.1722193895632427, |
| "learning_rate": 6.150488195901367e-05, |
| "loss": 1.9496, |
| "step": 10620 |
| }, |
| { |
| "epoch": 401.50943396226415, |
| "grad_norm": 1.7976530024431303, |
| "learning_rate": 6.144127110279726e-05, |
| "loss": 1.9869, |
| "step": 10640 |
| }, |
| { |
| "epoch": 402.2641509433962, |
| "grad_norm": 1.1341428853515279, |
| "learning_rate": 6.137758699331498e-05, |
| "loss": 1.9235, |
| "step": 10660 |
| }, |
| { |
| "epoch": 403.0188679245283, |
| "grad_norm": 1.107959509965475, |
| "learning_rate": 6.131382987685803e-05, |
| "loss": 1.927, |
| "step": 10680 |
| }, |
| { |
| "epoch": 403.77358490566036, |
| "grad_norm": 1.8525780404729881, |
| "learning_rate": 6.125000000000001e-05, |
| "loss": 1.9487, |
| "step": 10700 |
| }, |
| { |
| "epoch": 404.52830188679246, |
| "grad_norm": 1.1448557605131082, |
| "learning_rate": 6.118609760959587e-05, |
| "loss": 1.9547, |
| "step": 10720 |
| }, |
| { |
| "epoch": 405.2830188679245, |
| "grad_norm": 1.1150883211805585, |
| "learning_rate": 6.112212295278103e-05, |
| "loss": 1.9487, |
| "step": 10740 |
| }, |
| { |
| "epoch": 406.0377358490566, |
| "grad_norm": 1.149956522288425, |
| "learning_rate": 6.105807627697039e-05, |
| "loss": 1.9614, |
| "step": 10760 |
| }, |
| { |
| "epoch": 406.79245283018867, |
| "grad_norm": 1.1217970508214505, |
| "learning_rate": 6.099395782985736e-05, |
| "loss": 1.9555, |
| "step": 10780 |
| }, |
| { |
| "epoch": 407.54716981132077, |
| "grad_norm": 1.261444831314206, |
| "learning_rate": 6.0929767859412914e-05, |
| "loss": 1.9527, |
| "step": 10800 |
| }, |
| { |
| "epoch": 408.3018867924528, |
| "grad_norm": 1.2610523391632782, |
| "learning_rate": 6.086550661388466e-05, |
| "loss": 1.9321, |
| "step": 10820 |
| }, |
| { |
| "epoch": 409.0566037735849, |
| "grad_norm": 1.1090357115444625, |
| "learning_rate": 6.080117434179586e-05, |
| "loss": 1.9211, |
| "step": 10840 |
| }, |
| { |
| "epoch": 409.811320754717, |
| "grad_norm": 0.9790706595618122, |
| "learning_rate": 6.0736771291944384e-05, |
| "loss": 1.9203, |
| "step": 10860 |
| }, |
| { |
| "epoch": 410.5660377358491, |
| "grad_norm": 1.2166651155014474, |
| "learning_rate": 6.067229771340195e-05, |
| "loss": 1.9323, |
| "step": 10880 |
| }, |
| { |
| "epoch": 411.3207547169811, |
| "grad_norm": 1.433284831152631, |
| "learning_rate": 6.0607753855512944e-05, |
| "loss": 1.9623, |
| "step": 10900 |
| }, |
| { |
| "epoch": 412.07547169811323, |
| "grad_norm": 1.5219003618009672, |
| "learning_rate": 6.054313996789358e-05, |
| "loss": 1.9198, |
| "step": 10920 |
| }, |
| { |
| "epoch": 412.8301886792453, |
| "grad_norm": 1.1498060228780786, |
| "learning_rate": 6.047845630043091e-05, |
| "loss": 1.9487, |
| "step": 10940 |
| }, |
| { |
| "epoch": 413.58490566037733, |
| "grad_norm": 1.1202482322074203, |
| "learning_rate": 6.041370310328184e-05, |
| "loss": 1.9067, |
| "step": 10960 |
| }, |
| { |
| "epoch": 414.33962264150944, |
| "grad_norm": 1.2430430079147141, |
| "learning_rate": 6.0348880626872184e-05, |
| "loss": 1.9382, |
| "step": 10980 |
| }, |
| { |
| "epoch": 415.0943396226415, |
| "grad_norm": 1.0869359699622836, |
| "learning_rate": 6.028398912189569e-05, |
| "loss": 1.9611, |
| "step": 11000 |
| }, |
| { |
| "epoch": 415.8490566037736, |
| "grad_norm": 1.2526066034095944, |
| "learning_rate": 6.0219028839313045e-05, |
| "loss": 1.9644, |
| "step": 11020 |
| }, |
| { |
| "epoch": 416.60377358490564, |
| "grad_norm": 1.3228258582837578, |
| "learning_rate": 6.015400003035096e-05, |
| "loss": 1.9401, |
| "step": 11040 |
| }, |
| { |
| "epoch": 417.35849056603774, |
| "grad_norm": 1.9762624742364299, |
| "learning_rate": 6.008890294650111e-05, |
| "loss": 1.9465, |
| "step": 11060 |
| }, |
| { |
| "epoch": 418.1132075471698, |
| "grad_norm": 1.1424506198639062, |
| "learning_rate": 6.0023737839519284e-05, |
| "loss": 1.9439, |
| "step": 11080 |
| }, |
| { |
| "epoch": 418.8679245283019, |
| "grad_norm": 1.2107658482065982, |
| "learning_rate": 5.995850496142429e-05, |
| "loss": 1.9342, |
| "step": 11100 |
| }, |
| { |
| "epoch": 419.62264150943395, |
| "grad_norm": 1.1945042871517195, |
| "learning_rate": 5.989320456449705e-05, |
| "loss": 1.9463, |
| "step": 11120 |
| }, |
| { |
| "epoch": 420.37735849056605, |
| "grad_norm": 1.4521278706471037, |
| "learning_rate": 5.9827836901279616e-05, |
| "loss": 1.9097, |
| "step": 11140 |
| }, |
| { |
| "epoch": 421.1320754716981, |
| "grad_norm": 1.3941560968372226, |
| "learning_rate": 5.97624022245742e-05, |
| "loss": 1.9332, |
| "step": 11160 |
| }, |
| { |
| "epoch": 421.8867924528302, |
| "grad_norm": 1.39870585850457, |
| "learning_rate": 5.969690078744211e-05, |
| "loss": 1.9458, |
| "step": 11180 |
| }, |
| { |
| "epoch": 422.64150943396226, |
| "grad_norm": 1.0274165784628992, |
| "learning_rate": 5.963133284320292e-05, |
| "loss": 1.9365, |
| "step": 11200 |
| }, |
| { |
| "epoch": 423.39622641509436, |
| "grad_norm": 1.1694905737535597, |
| "learning_rate": 5.956569864543338e-05, |
| "loss": 1.8966, |
| "step": 11220 |
| }, |
| { |
| "epoch": 424.1509433962264, |
| "grad_norm": 2.28522960541773, |
| "learning_rate": 5.9499998447966484e-05, |
| "loss": 1.9332, |
| "step": 11240 |
| }, |
| { |
| "epoch": 424.9056603773585, |
| "grad_norm": 1.1252246816652476, |
| "learning_rate": 5.943423250489044e-05, |
| "loss": 1.9308, |
| "step": 11260 |
| }, |
| { |
| "epoch": 425.66037735849056, |
| "grad_norm": 1.132211561056973, |
| "learning_rate": 5.9368401070547756e-05, |
| "loss": 1.9221, |
| "step": 11280 |
| }, |
| { |
| "epoch": 426.41509433962267, |
| "grad_norm": 1.0848974124812198, |
| "learning_rate": 5.93025043995342e-05, |
| "loss": 1.9374, |
| "step": 11300 |
| }, |
| { |
| "epoch": 427.1698113207547, |
| "grad_norm": 1.0363649309093041, |
| "learning_rate": 5.9236542746697845e-05, |
| "loss": 1.9461, |
| "step": 11320 |
| }, |
| { |
| "epoch": 427.92452830188677, |
| "grad_norm": 1.1928905874896651, |
| "learning_rate": 5.9170516367138065e-05, |
| "loss": 1.9378, |
| "step": 11340 |
| }, |
| { |
| "epoch": 428.6792452830189, |
| "grad_norm": 1.1106230737067035, |
| "learning_rate": 5.910442551620457e-05, |
| "loss": 1.942, |
| "step": 11360 |
| }, |
| { |
| "epoch": 429.4339622641509, |
| "grad_norm": 1.1576265708604865, |
| "learning_rate": 5.903827044949638e-05, |
| "loss": 1.9471, |
| "step": 11380 |
| }, |
| { |
| "epoch": 430.188679245283, |
| "grad_norm": 1.1174079944741127, |
| "learning_rate": 5.897205142286091e-05, |
| "loss": 1.8922, |
| "step": 11400 |
| }, |
| { |
| "epoch": 430.9433962264151, |
| "grad_norm": 1.3085632343404145, |
| "learning_rate": 5.890576869239289e-05, |
| "loss": 1.9458, |
| "step": 11420 |
| }, |
| { |
| "epoch": 431.6981132075472, |
| "grad_norm": 1.2863747903344196, |
| "learning_rate": 5.883942251443342e-05, |
| "loss": 1.9099, |
| "step": 11440 |
| }, |
| { |
| "epoch": 432.45283018867923, |
| "grad_norm": 1.0551943593908828, |
| "learning_rate": 5.877301314556899e-05, |
| "loss": 1.9141, |
| "step": 11460 |
| }, |
| { |
| "epoch": 433.20754716981133, |
| "grad_norm": 1.086077987508793, |
| "learning_rate": 5.870654084263047e-05, |
| "loss": 1.96, |
| "step": 11480 |
| }, |
| { |
| "epoch": 433.9622641509434, |
| "grad_norm": 1.3656558008500363, |
| "learning_rate": 5.864000586269215e-05, |
| "loss": 1.904, |
| "step": 11500 |
| }, |
| { |
| "epoch": 434.7169811320755, |
| "grad_norm": 1.4439265227942644, |
| "learning_rate": 5.8573408463070655e-05, |
| "loss": 1.9273, |
| "step": 11520 |
| }, |
| { |
| "epoch": 435.47169811320754, |
| "grad_norm": 1.3611886653187657, |
| "learning_rate": 5.850674890132405e-05, |
| "loss": 1.9034, |
| "step": 11540 |
| }, |
| { |
| "epoch": 436.22641509433964, |
| "grad_norm": 1.3616986059829845, |
| "learning_rate": 5.844002743525081e-05, |
| "loss": 1.9143, |
| "step": 11560 |
| }, |
| { |
| "epoch": 436.9811320754717, |
| "grad_norm": 1.1127209994732485, |
| "learning_rate": 5.8373244322888796e-05, |
| "loss": 1.9467, |
| "step": 11580 |
| }, |
| { |
| "epoch": 437.7358490566038, |
| "grad_norm": 1.2452581872873123, |
| "learning_rate": 5.83063998225143e-05, |
| "loss": 1.946, |
| "step": 11600 |
| }, |
| { |
| "epoch": 438.49056603773585, |
| "grad_norm": 1.169219637417814, |
| "learning_rate": 5.823949419264102e-05, |
| "loss": 1.9057, |
| "step": 11620 |
| }, |
| { |
| "epoch": 439.24528301886795, |
| "grad_norm": 1.3149994286787028, |
| "learning_rate": 5.817252769201905e-05, |
| "loss": 1.8922, |
| "step": 11640 |
| }, |
| { |
| "epoch": 440.0, |
| "grad_norm": 1.3111574851574335, |
| "learning_rate": 5.81055005796339e-05, |
| "loss": 1.9222, |
| "step": 11660 |
| }, |
| { |
| "epoch": 440.75471698113205, |
| "grad_norm": 1.187457856172297, |
| "learning_rate": 5.803841311470551e-05, |
| "loss": 1.9188, |
| "step": 11680 |
| }, |
| { |
| "epoch": 441.50943396226415, |
| "grad_norm": 1.057264779435906, |
| "learning_rate": 5.7971265556687206e-05, |
| "loss": 1.9185, |
| "step": 11700 |
| }, |
| { |
| "epoch": 442.2641509433962, |
| "grad_norm": 1.1403855029477634, |
| "learning_rate": 5.790405816526473e-05, |
| "loss": 1.9328, |
| "step": 11720 |
| }, |
| { |
| "epoch": 443.0188679245283, |
| "grad_norm": 1.2270148994812622, |
| "learning_rate": 5.78367912003552e-05, |
| "loss": 1.8952, |
| "step": 11740 |
| }, |
| { |
| "epoch": 443.77358490566036, |
| "grad_norm": 1.0385291337101263, |
| "learning_rate": 5.776946492210618e-05, |
| "loss": 1.9042, |
| "step": 11760 |
| }, |
| { |
| "epoch": 444.52830188679246, |
| "grad_norm": 1.2996045459665522, |
| "learning_rate": 5.770207959089455e-05, |
| "loss": 1.9373, |
| "step": 11780 |
| }, |
| { |
| "epoch": 445.2830188679245, |
| "grad_norm": 1.1405006769622614, |
| "learning_rate": 5.763463546732563e-05, |
| "loss": 1.9035, |
| "step": 11800 |
| }, |
| { |
| "epoch": 446.0377358490566, |
| "grad_norm": 1.2182586988416257, |
| "learning_rate": 5.756713281223206e-05, |
| "loss": 1.936, |
| "step": 11820 |
| }, |
| { |
| "epoch": 446.79245283018867, |
| "grad_norm": 1.097561953783009, |
| "learning_rate": 5.74995718866729e-05, |
| "loss": 1.9057, |
| "step": 11840 |
| }, |
| { |
| "epoch": 447.54716981132077, |
| "grad_norm": 1.0690078287157365, |
| "learning_rate": 5.743195295193255e-05, |
| "loss": 1.9074, |
| "step": 11860 |
| }, |
| { |
| "epoch": 448.3018867924528, |
| "grad_norm": 1.1127790113128593, |
| "learning_rate": 5.736427626951971e-05, |
| "loss": 1.9269, |
| "step": 11880 |
| }, |
| { |
| "epoch": 449.0566037735849, |
| "grad_norm": 1.0521548682001445, |
| "learning_rate": 5.729654210116646e-05, |
| "loss": 1.897, |
| "step": 11900 |
| }, |
| { |
| "epoch": 449.811320754717, |
| "grad_norm": 1.0831322365716964, |
| "learning_rate": 5.7228750708827196e-05, |
| "loss": 1.9019, |
| "step": 11920 |
| }, |
| { |
| "epoch": 450.5660377358491, |
| "grad_norm": 1.113425539515294, |
| "learning_rate": 5.71609023546776e-05, |
| "loss": 1.8995, |
| "step": 11940 |
| }, |
| { |
| "epoch": 451.3207547169811, |
| "grad_norm": 1.1378527380008467, |
| "learning_rate": 5.709299730111367e-05, |
| "loss": 1.9112, |
| "step": 11960 |
| }, |
| { |
| "epoch": 452.07547169811323, |
| "grad_norm": 1.2308344759482057, |
| "learning_rate": 5.702503581075065e-05, |
| "loss": 1.8869, |
| "step": 11980 |
| }, |
| { |
| "epoch": 452.8301886792453, |
| "grad_norm": 1.3869181367868268, |
| "learning_rate": 5.6957018146422106e-05, |
| "loss": 1.9092, |
| "step": 12000 |
| }, |
| { |
| "epoch": 453.58490566037733, |
| "grad_norm": 1.1702979518774306, |
| "learning_rate": 5.688894457117877e-05, |
| "loss": 1.8944, |
| "step": 12020 |
| }, |
| { |
| "epoch": 454.33962264150944, |
| "grad_norm": 1.2974690219427283, |
| "learning_rate": 5.6820815348287674e-05, |
| "loss": 1.8794, |
| "step": 12040 |
| }, |
| { |
| "epoch": 455.0943396226415, |
| "grad_norm": 1.3757370848375583, |
| "learning_rate": 5.675263074123103e-05, |
| "loss": 1.9208, |
| "step": 12060 |
| }, |
| { |
| "epoch": 455.8490566037736, |
| "grad_norm": 1.3314963474728592, |
| "learning_rate": 5.668439101370524e-05, |
| "loss": 1.8823, |
| "step": 12080 |
| }, |
| { |
| "epoch": 456.60377358490564, |
| "grad_norm": 1.1525239716029143, |
| "learning_rate": 5.6616096429619885e-05, |
| "loss": 1.8778, |
| "step": 12100 |
| }, |
| { |
| "epoch": 457.35849056603774, |
| "grad_norm": 1.1391429331630094, |
| "learning_rate": 5.6547747253096713e-05, |
| "loss": 1.8973, |
| "step": 12120 |
| }, |
| { |
| "epoch": 458.1132075471698, |
| "grad_norm": 1.2813875070982645, |
| "learning_rate": 5.647934374846856e-05, |
| "loss": 1.9037, |
| "step": 12140 |
| }, |
| { |
| "epoch": 458.8679245283019, |
| "grad_norm": 1.130130379386682, |
| "learning_rate": 5.641088618027841e-05, |
| "loss": 1.8946, |
| "step": 12160 |
| }, |
| { |
| "epoch": 459.62264150943395, |
| "grad_norm": 1.189098976296786, |
| "learning_rate": 5.6342374813278305e-05, |
| "loss": 1.9122, |
| "step": 12180 |
| }, |
| { |
| "epoch": 460.37735849056605, |
| "grad_norm": 1.18982288351709, |
| "learning_rate": 5.627380991242839e-05, |
| "loss": 1.8893, |
| "step": 12200 |
| }, |
| { |
| "epoch": 461.1320754716981, |
| "grad_norm": 1.3440462024222728, |
| "learning_rate": 5.6205191742895787e-05, |
| "loss": 1.8879, |
| "step": 12220 |
| }, |
| { |
| "epoch": 461.8867924528302, |
| "grad_norm": 1.0998628162432096, |
| "learning_rate": 5.613652057005367e-05, |
| "loss": 1.8911, |
| "step": 12240 |
| }, |
| { |
| "epoch": 462.64150943396226, |
| "grad_norm": 1.0660994627393063, |
| "learning_rate": 5.6067796659480196e-05, |
| "loss": 1.9055, |
| "step": 12260 |
| }, |
| { |
| "epoch": 463.39622641509436, |
| "grad_norm": 1.7426680752228556, |
| "learning_rate": 5.599902027695745e-05, |
| "loss": 1.897, |
| "step": 12280 |
| }, |
| { |
| "epoch": 464.1509433962264, |
| "grad_norm": 1.388841332022157, |
| "learning_rate": 5.593019168847049e-05, |
| "loss": 1.8812, |
| "step": 12300 |
| }, |
| { |
| "epoch": 464.9056603773585, |
| "grad_norm": 1.2274558384609464, |
| "learning_rate": 5.586131116020621e-05, |
| "loss": 1.8496, |
| "step": 12320 |
| }, |
| { |
| "epoch": 465.66037735849056, |
| "grad_norm": 1.1945002690405846, |
| "learning_rate": 5.5792378958552456e-05, |
| "loss": 1.9146, |
| "step": 12340 |
| }, |
| { |
| "epoch": 466.41509433962267, |
| "grad_norm": 1.1629769495886029, |
| "learning_rate": 5.5723395350096866e-05, |
| "loss": 1.8734, |
| "step": 12360 |
| }, |
| { |
| "epoch": 467.1698113207547, |
| "grad_norm": 1.1703423211235366, |
| "learning_rate": 5.565436060162589e-05, |
| "loss": 1.8882, |
| "step": 12380 |
| }, |
| { |
| "epoch": 467.92452830188677, |
| "grad_norm": 1.3904930914694782, |
| "learning_rate": 5.5585274980123765e-05, |
| "loss": 1.8794, |
| "step": 12400 |
| }, |
| { |
| "epoch": 468.6792452830189, |
| "grad_norm": 1.1043102032574945, |
| "learning_rate": 5.551613875277148e-05, |
| "loss": 1.888, |
| "step": 12420 |
| }, |
| { |
| "epoch": 469.4339622641509, |
| "grad_norm": 1.019172984960956, |
| "learning_rate": 5.5446952186945716e-05, |
| "loss": 1.8887, |
| "step": 12440 |
| }, |
| { |
| "epoch": 470.188679245283, |
| "grad_norm": 1.2815784609995193, |
| "learning_rate": 5.537771555021785e-05, |
| "loss": 1.9026, |
| "step": 12460 |
| }, |
| { |
| "epoch": 470.9433962264151, |
| "grad_norm": 1.0452909524777938, |
| "learning_rate": 5.53084291103529e-05, |
| "loss": 1.8688, |
| "step": 12480 |
| }, |
| { |
| "epoch": 471.6981132075472, |
| "grad_norm": 1.2824929707840547, |
| "learning_rate": 5.5239093135308484e-05, |
| "loss": 1.8568, |
| "step": 12500 |
| }, |
| { |
| "epoch": 472.45283018867923, |
| "grad_norm": 1.0473918662270072, |
| "learning_rate": 5.516970789323382e-05, |
| "loss": 1.8962, |
| "step": 12520 |
| }, |
| { |
| "epoch": 473.20754716981133, |
| "grad_norm": 1.1551860073406197, |
| "learning_rate": 5.5100273652468596e-05, |
| "loss": 1.9053, |
| "step": 12540 |
| }, |
| { |
| "epoch": 473.9622641509434, |
| "grad_norm": 1.2315884678620779, |
| "learning_rate": 5.50307906815421e-05, |
| "loss": 1.8802, |
| "step": 12560 |
| }, |
| { |
| "epoch": 474.7169811320755, |
| "grad_norm": 1.2036057101238689, |
| "learning_rate": 5.496125924917195e-05, |
| "loss": 1.8848, |
| "step": 12580 |
| }, |
| { |
| "epoch": 475.47169811320754, |
| "grad_norm": 1.1443042979106692, |
| "learning_rate": 5.4891679624263313e-05, |
| "loss": 1.8993, |
| "step": 12600 |
| }, |
| { |
| "epoch": 476.22641509433964, |
| "grad_norm": 1.1112985130684456, |
| "learning_rate": 5.482205207590763e-05, |
| "loss": 1.8997, |
| "step": 12620 |
| }, |
| { |
| "epoch": 476.9811320754717, |
| "grad_norm": 1.1198907315048803, |
| "learning_rate": 5.475237687338175e-05, |
| "loss": 1.9204, |
| "step": 12640 |
| }, |
| { |
| "epoch": 477.7358490566038, |
| "grad_norm": 1.0505243476691362, |
| "learning_rate": 5.468265428614679e-05, |
| "loss": 1.8824, |
| "step": 12660 |
| }, |
| { |
| "epoch": 478.49056603773585, |
| "grad_norm": 1.1618158349057395, |
| "learning_rate": 5.461288458384711e-05, |
| "loss": 1.8675, |
| "step": 12680 |
| }, |
| { |
| "epoch": 479.24528301886795, |
| "grad_norm": 1.310696647632245, |
| "learning_rate": 5.454306803630931e-05, |
| "loss": 1.8617, |
| "step": 12700 |
| }, |
| { |
| "epoch": 480.0, |
| "grad_norm": 1.2853008412145361, |
| "learning_rate": 5.447320491354114e-05, |
| "loss": 1.8798, |
| "step": 12720 |
| }, |
| { |
| "epoch": 480.75471698113205, |
| "grad_norm": 1.2035604713641803, |
| "learning_rate": 5.440329548573049e-05, |
| "loss": 1.8505, |
| "step": 12740 |
| }, |
| { |
| "epoch": 481.50943396226415, |
| "grad_norm": 1.301768301418178, |
| "learning_rate": 5.433334002324431e-05, |
| "loss": 1.8849, |
| "step": 12760 |
| }, |
| { |
| "epoch": 482.2641509433962, |
| "grad_norm": 1.0741158531319273, |
| "learning_rate": 5.426333879662761e-05, |
| "loss": 1.8362, |
| "step": 12780 |
| }, |
| { |
| "epoch": 483.0188679245283, |
| "grad_norm": 1.2118720683926874, |
| "learning_rate": 5.419329207660237e-05, |
| "loss": 1.8811, |
| "step": 12800 |
| }, |
| { |
| "epoch": 483.77358490566036, |
| "grad_norm": 1.295829194970654, |
| "learning_rate": 5.412320013406651e-05, |
| "loss": 1.8473, |
| "step": 12820 |
| }, |
| { |
| "epoch": 484.52830188679246, |
| "grad_norm": 1.2658203604478202, |
| "learning_rate": 5.405306324009282e-05, |
| "loss": 1.8728, |
| "step": 12840 |
| }, |
| { |
| "epoch": 485.2830188679245, |
| "grad_norm": 1.2195390339098875, |
| "learning_rate": 5.3982881665928015e-05, |
| "loss": 1.8704, |
| "step": 12860 |
| }, |
| { |
| "epoch": 486.0377358490566, |
| "grad_norm": 1.067227068131729, |
| "learning_rate": 5.391265568299149e-05, |
| "loss": 1.8619, |
| "step": 12880 |
| }, |
| { |
| "epoch": 486.79245283018867, |
| "grad_norm": 1.3306442274846357, |
| "learning_rate": 5.384238556287451e-05, |
| "loss": 1.8638, |
| "step": 12900 |
| }, |
| { |
| "epoch": 487.54716981132077, |
| "grad_norm": 1.2531810114251472, |
| "learning_rate": 5.377207157733893e-05, |
| "loss": 1.8839, |
| "step": 12920 |
| }, |
| { |
| "epoch": 488.3018867924528, |
| "grad_norm": 1.0879029191456078, |
| "learning_rate": 5.370171399831631e-05, |
| "loss": 1.866, |
| "step": 12940 |
| }, |
| { |
| "epoch": 489.0566037735849, |
| "grad_norm": 1.1769881515511749, |
| "learning_rate": 5.363131309790678e-05, |
| "loss": 1.8253, |
| "step": 12960 |
| }, |
| { |
| "epoch": 489.811320754717, |
| "grad_norm": 1.3614975573612427, |
| "learning_rate": 5.356086914837802e-05, |
| "loss": 1.8487, |
| "step": 12980 |
| }, |
| { |
| "epoch": 490.5660377358491, |
| "grad_norm": 1.5342718531352588, |
| "learning_rate": 5.349038242216419e-05, |
| "loss": 1.847, |
| "step": 13000 |
| }, |
| { |
| "epoch": 491.3207547169811, |
| "grad_norm": 1.1571547119310825, |
| "learning_rate": 5.341985319186489e-05, |
| "loss": 1.8822, |
| "step": 13020 |
| }, |
| { |
| "epoch": 492.07547169811323, |
| "grad_norm": 1.1739881074209173, |
| "learning_rate": 5.33492817302441e-05, |
| "loss": 1.8531, |
| "step": 13040 |
| }, |
| { |
| "epoch": 492.8301886792453, |
| "grad_norm": 1.1934573145337144, |
| "learning_rate": 5.3278668310229125e-05, |
| "loss": 1.8986, |
| "step": 13060 |
| }, |
| { |
| "epoch": 493.58490566037733, |
| "grad_norm": 2.343948986647593, |
| "learning_rate": 5.320801320490955e-05, |
| "loss": 1.8935, |
| "step": 13080 |
| }, |
| { |
| "epoch": 494.33962264150944, |
| "grad_norm": 1.0764970314512263, |
| "learning_rate": 5.3137316687536136e-05, |
| "loss": 1.854, |
| "step": 13100 |
| }, |
| { |
| "epoch": 495.0943396226415, |
| "grad_norm": 1.0129335749841757, |
| "learning_rate": 5.3066579031519824e-05, |
| "loss": 1.874, |
| "step": 13120 |
| }, |
| { |
| "epoch": 495.8490566037736, |
| "grad_norm": 1.3577114498479963, |
| "learning_rate": 5.299580051043069e-05, |
| "loss": 1.8534, |
| "step": 13140 |
| }, |
| { |
| "epoch": 496.60377358490564, |
| "grad_norm": 1.1849380554040083, |
| "learning_rate": 5.292498139799678e-05, |
| "loss": 1.8705, |
| "step": 13160 |
| }, |
| { |
| "epoch": 497.35849056603774, |
| "grad_norm": 1.1290155132472126, |
| "learning_rate": 5.2854121968103185e-05, |
| "loss": 1.8659, |
| "step": 13180 |
| }, |
| { |
| "epoch": 498.1132075471698, |
| "grad_norm": 1.3627686926052522, |
| "learning_rate": 5.278322249479088e-05, |
| "loss": 1.8686, |
| "step": 13200 |
| }, |
| { |
| "epoch": 498.8679245283019, |
| "grad_norm": 1.147585604555274, |
| "learning_rate": 5.271228325225573e-05, |
| "loss": 1.8301, |
| "step": 13220 |
| }, |
| { |
| "epoch": 499.62264150943395, |
| "grad_norm": 1.124456551859716, |
| "learning_rate": 5.264130451484736e-05, |
| "loss": 1.846, |
| "step": 13240 |
| }, |
| { |
| "epoch": 500.37735849056605, |
| "grad_norm": 1.1328557419125425, |
| "learning_rate": 5.257028655706819e-05, |
| "loss": 1.8489, |
| "step": 13260 |
| }, |
| { |
| "epoch": 501.1320754716981, |
| "grad_norm": 1.3248366741093285, |
| "learning_rate": 5.249922965357231e-05, |
| "loss": 1.847, |
| "step": 13280 |
| }, |
| { |
| "epoch": 501.8867924528302, |
| "grad_norm": 1.2987470821979115, |
| "learning_rate": 5.24281340791644e-05, |
| "loss": 1.8598, |
| "step": 13300 |
| }, |
| { |
| "epoch": 502.64150943396226, |
| "grad_norm": 1.2954015666799925, |
| "learning_rate": 5.235700010879869e-05, |
| "loss": 1.8144, |
| "step": 13320 |
| }, |
| { |
| "epoch": 503.39622641509436, |
| "grad_norm": 1.14457084913068, |
| "learning_rate": 5.228582801757796e-05, |
| "loss": 1.8666, |
| "step": 13340 |
| }, |
| { |
| "epoch": 504.1509433962264, |
| "grad_norm": 1.1877213110343792, |
| "learning_rate": 5.221461808075237e-05, |
| "loss": 1.8637, |
| "step": 13360 |
| }, |
| { |
| "epoch": 504.9056603773585, |
| "grad_norm": 1.0687096720310838, |
| "learning_rate": 5.214337057371846e-05, |
| "loss": 1.8639, |
| "step": 13380 |
| }, |
| { |
| "epoch": 505.66037735849056, |
| "grad_norm": 1.1296264305151373, |
| "learning_rate": 5.207208577201805e-05, |
| "loss": 1.8508, |
| "step": 13400 |
| }, |
| { |
| "epoch": 506.41509433962267, |
| "grad_norm": 1.4245030753661052, |
| "learning_rate": 5.200076395133721e-05, |
| "loss": 1.8328, |
| "step": 13420 |
| }, |
| { |
| "epoch": 507.1698113207547, |
| "grad_norm": 1.3736962730435212, |
| "learning_rate": 5.1929405387505185e-05, |
| "loss": 1.8402, |
| "step": 13440 |
| }, |
| { |
| "epoch": 507.92452830188677, |
| "grad_norm": 1.3874806329791736, |
| "learning_rate": 5.185801035649329e-05, |
| "loss": 1.8392, |
| "step": 13460 |
| }, |
| { |
| "epoch": 508.6792452830189, |
| "grad_norm": 1.2993168124302985, |
| "learning_rate": 5.1786579134413916e-05, |
| "loss": 1.8357, |
| "step": 13480 |
| }, |
| { |
| "epoch": 509.4339622641509, |
| "grad_norm": 1.1615849238599296, |
| "learning_rate": 5.171511199751936e-05, |
| "loss": 1.8602, |
| "step": 13500 |
| }, |
| { |
| "epoch": 510.188679245283, |
| "grad_norm": 1.313961870036688, |
| "learning_rate": 5.164360922220089e-05, |
| "loss": 1.8276, |
| "step": 13520 |
| }, |
| { |
| "epoch": 510.9433962264151, |
| "grad_norm": 1.240911570140835, |
| "learning_rate": 5.157207108498754e-05, |
| "loss": 1.83, |
| "step": 13540 |
| }, |
| { |
| "epoch": 511.6981132075472, |
| "grad_norm": 1.2739381058558579, |
| "learning_rate": 5.1500497862545134e-05, |
| "loss": 1.864, |
| "step": 13560 |
| }, |
| { |
| "epoch": 512.4528301886793, |
| "grad_norm": 1.3641387795362538, |
| "learning_rate": 5.142888983167516e-05, |
| "loss": 1.9016, |
| "step": 13580 |
| }, |
| { |
| "epoch": 513.2075471698113, |
| "grad_norm": 1.233949050539118, |
| "learning_rate": 5.135724726931374e-05, |
| "loss": 1.8224, |
| "step": 13600 |
| }, |
| { |
| "epoch": 513.9622641509434, |
| "grad_norm": 1.2764553522178392, |
| "learning_rate": 5.128557045253056e-05, |
| "loss": 1.8489, |
| "step": 13620 |
| }, |
| { |
| "epoch": 514.7169811320755, |
| "grad_norm": 1.0847794881407822, |
| "learning_rate": 5.121385965852773e-05, |
| "loss": 1.8433, |
| "step": 13640 |
| }, |
| { |
| "epoch": 515.4716981132076, |
| "grad_norm": 1.211639546404476, |
| "learning_rate": 5.114211516463883e-05, |
| "loss": 1.8592, |
| "step": 13660 |
| }, |
| { |
| "epoch": 516.2264150943396, |
| "grad_norm": 1.6499382505803508, |
| "learning_rate": 5.1070337248327704e-05, |
| "loss": 1.8491, |
| "step": 13680 |
| }, |
| { |
| "epoch": 516.9811320754717, |
| "grad_norm": 1.1415154218905448, |
| "learning_rate": 5.0998526187187506e-05, |
| "loss": 1.8263, |
| "step": 13700 |
| }, |
| { |
| "epoch": 517.7358490566038, |
| "grad_norm": 1.2931592721596668, |
| "learning_rate": 5.092668225893955e-05, |
| "loss": 1.8341, |
| "step": 13720 |
| }, |
| { |
| "epoch": 518.4905660377359, |
| "grad_norm": 1.1289936456910783, |
| "learning_rate": 5.0854805741432266e-05, |
| "loss": 1.8256, |
| "step": 13740 |
| }, |
| { |
| "epoch": 519.2452830188679, |
| "grad_norm": 1.1568681178648177, |
| "learning_rate": 5.078289691264009e-05, |
| "loss": 1.8268, |
| "step": 13760 |
| }, |
| { |
| "epoch": 520.0, |
| "grad_norm": 1.2075151796344337, |
| "learning_rate": 5.071095605066247e-05, |
| "loss": 1.8342, |
| "step": 13780 |
| }, |
| { |
| "epoch": 520.7547169811321, |
| "grad_norm": 1.41061431054736, |
| "learning_rate": 5.063898343372271e-05, |
| "loss": 1.8569, |
| "step": 13800 |
| }, |
| { |
| "epoch": 521.5094339622641, |
| "grad_norm": 1.7141184097601845, |
| "learning_rate": 5.0566979340166915e-05, |
| "loss": 1.8447, |
| "step": 13820 |
| }, |
| { |
| "epoch": 522.2641509433962, |
| "grad_norm": 1.1912730571129804, |
| "learning_rate": 5.0494944048462946e-05, |
| "loss": 1.8632, |
| "step": 13840 |
| }, |
| { |
| "epoch": 523.0188679245283, |
| "grad_norm": 1.2784159482259496, |
| "learning_rate": 5.042287783719931e-05, |
| "loss": 1.8293, |
| "step": 13860 |
| }, |
| { |
| "epoch": 523.7735849056604, |
| "grad_norm": 1.1444265949319492, |
| "learning_rate": 5.0350780985084076e-05, |
| "loss": 1.8423, |
| "step": 13880 |
| }, |
| { |
| "epoch": 524.5283018867924, |
| "grad_norm": 1.1366776283872817, |
| "learning_rate": 5.027865377094383e-05, |
| "loss": 1.8284, |
| "step": 13900 |
| }, |
| { |
| "epoch": 525.2830188679245, |
| "grad_norm": 1.2870871198292675, |
| "learning_rate": 5.020649647372258e-05, |
| "loss": 1.8313, |
| "step": 13920 |
| }, |
| { |
| "epoch": 526.0377358490566, |
| "grad_norm": 1.8138208437079086, |
| "learning_rate": 5.013430937248066e-05, |
| "loss": 1.8382, |
| "step": 13940 |
| }, |
| { |
| "epoch": 526.7924528301887, |
| "grad_norm": 1.319578877513452, |
| "learning_rate": 5.00620927463937e-05, |
| "loss": 1.8343, |
| "step": 13960 |
| }, |
| { |
| "epoch": 527.5471698113207, |
| "grad_norm": 1.2254503656584737, |
| "learning_rate": 4.998984687475148e-05, |
| "loss": 1.8439, |
| "step": 13980 |
| }, |
| { |
| "epoch": 528.3018867924528, |
| "grad_norm": 1.0900572753736815, |
| "learning_rate": 4.9917572036956896e-05, |
| "loss": 1.8339, |
| "step": 14000 |
| }, |
| { |
| "epoch": 529.0566037735849, |
| "grad_norm": 1.3672093328811397, |
| "learning_rate": 4.984526851252489e-05, |
| "loss": 1.8269, |
| "step": 14020 |
| }, |
| { |
| "epoch": 529.811320754717, |
| "grad_norm": 1.0474703180578433, |
| "learning_rate": 4.97729365810813e-05, |
| "loss": 1.8278, |
| "step": 14040 |
| }, |
| { |
| "epoch": 530.566037735849, |
| "grad_norm": 1.302303693187968, |
| "learning_rate": 4.9700576522361875e-05, |
| "loss": 1.8406, |
| "step": 14060 |
| }, |
| { |
| "epoch": 531.3207547169811, |
| "grad_norm": 1.3351319231004828, |
| "learning_rate": 4.96281886162111e-05, |
| "loss": 1.805, |
| "step": 14080 |
| }, |
| { |
| "epoch": 532.0754716981132, |
| "grad_norm": 1.7423062973900807, |
| "learning_rate": 4.955577314258118e-05, |
| "loss": 1.8021, |
| "step": 14100 |
| }, |
| { |
| "epoch": 532.8301886792453, |
| "grad_norm": 1.0851524592672839, |
| "learning_rate": 4.9483330381530944e-05, |
| "loss": 1.8376, |
| "step": 14120 |
| }, |
| { |
| "epoch": 533.5849056603773, |
| "grad_norm": 1.110982412101906, |
| "learning_rate": 4.941086061322473e-05, |
| "loss": 1.8468, |
| "step": 14140 |
| }, |
| { |
| "epoch": 534.3396226415094, |
| "grad_norm": 1.1481042439437046, |
| "learning_rate": 4.933836411793133e-05, |
| "loss": 1.8131, |
| "step": 14160 |
| }, |
| { |
| "epoch": 535.0943396226415, |
| "grad_norm": 1.1504371756112235, |
| "learning_rate": 4.926584117602288e-05, |
| "loss": 1.8081, |
| "step": 14180 |
| }, |
| { |
| "epoch": 535.8490566037735, |
| "grad_norm": 1.1403864383961178, |
| "learning_rate": 4.919329206797387e-05, |
| "loss": 1.823, |
| "step": 14200 |
| }, |
| { |
| "epoch": 536.6037735849056, |
| "grad_norm": 1.2962348904995422, |
| "learning_rate": 4.912071707435988e-05, |
| "loss": 1.8187, |
| "step": 14220 |
| }, |
| { |
| "epoch": 537.3584905660377, |
| "grad_norm": 1.1885752096952027, |
| "learning_rate": 4.904811647585668e-05, |
| "loss": 1.8256, |
| "step": 14240 |
| }, |
| { |
| "epoch": 538.1132075471698, |
| "grad_norm": 1.064497747677543, |
| "learning_rate": 4.897549055323902e-05, |
| "loss": 1.8, |
| "step": 14260 |
| }, |
| { |
| "epoch": 538.8679245283018, |
| "grad_norm": 1.4469124816185257, |
| "learning_rate": 4.8902839587379614e-05, |
| "loss": 1.8365, |
| "step": 14280 |
| }, |
| { |
| "epoch": 539.622641509434, |
| "grad_norm": 1.0326597719869466, |
| "learning_rate": 4.8830163859248014e-05, |
| "loss": 1.812, |
| "step": 14300 |
| }, |
| { |
| "epoch": 540.377358490566, |
| "grad_norm": 1.261127094091647, |
| "learning_rate": 4.875746364990955e-05, |
| "loss": 1.7936, |
| "step": 14320 |
| }, |
| { |
| "epoch": 541.1320754716982, |
| "grad_norm": 1.6850662159573848, |
| "learning_rate": 4.8684739240524185e-05, |
| "loss": 1.8039, |
| "step": 14340 |
| }, |
| { |
| "epoch": 541.8867924528302, |
| "grad_norm": 1.1719859164333604, |
| "learning_rate": 4.861199091234556e-05, |
| "loss": 1.7995, |
| "step": 14360 |
| }, |
| { |
| "epoch": 542.6415094339623, |
| "grad_norm": 1.1168812884827573, |
| "learning_rate": 4.853921894671973e-05, |
| "loss": 1.804, |
| "step": 14380 |
| }, |
| { |
| "epoch": 543.3962264150944, |
| "grad_norm": 1.5041434010962127, |
| "learning_rate": 4.846642362508422e-05, |
| "loss": 1.8042, |
| "step": 14400 |
| }, |
| { |
| "epoch": 544.1509433962265, |
| "grad_norm": 1.2922119772360392, |
| "learning_rate": 4.8393605228966854e-05, |
| "loss": 1.8176, |
| "step": 14420 |
| }, |
| { |
| "epoch": 544.9056603773585, |
| "grad_norm": 1.316092813395267, |
| "learning_rate": 4.832076403998472e-05, |
| "loss": 1.8324, |
| "step": 14440 |
| }, |
| { |
| "epoch": 545.6603773584906, |
| "grad_norm": 1.148925533679318, |
| "learning_rate": 4.8247900339843045e-05, |
| "loss": 1.8249, |
| "step": 14460 |
| }, |
| { |
| "epoch": 546.4150943396227, |
| "grad_norm": 1.3351586320323485, |
| "learning_rate": 4.817501441033409e-05, |
| "loss": 1.8023, |
| "step": 14480 |
| }, |
| { |
| "epoch": 547.1698113207547, |
| "grad_norm": 1.4554583529380825, |
| "learning_rate": 4.810210653333613e-05, |
| "loss": 1.782, |
| "step": 14500 |
| }, |
| { |
| "epoch": 547.9245283018868, |
| "grad_norm": 1.2418737812043639, |
| "learning_rate": 4.802917699081225e-05, |
| "loss": 1.7981, |
| "step": 14520 |
| }, |
| { |
| "epoch": 548.6792452830189, |
| "grad_norm": 1.1837142285238051, |
| "learning_rate": 4.795622606480942e-05, |
| "loss": 1.7982, |
| "step": 14540 |
| }, |
| { |
| "epoch": 549.433962264151, |
| "grad_norm": 1.2674115880751322, |
| "learning_rate": 4.788325403745724e-05, |
| "loss": 1.8055, |
| "step": 14560 |
| }, |
| { |
| "epoch": 550.188679245283, |
| "grad_norm": 2.02523705877845, |
| "learning_rate": 4.7810261190966944e-05, |
| "loss": 1.7905, |
| "step": 14580 |
| }, |
| { |
| "epoch": 550.9433962264151, |
| "grad_norm": 1.3660297273644537, |
| "learning_rate": 4.773724780763023e-05, |
| "loss": 1.8267, |
| "step": 14600 |
| }, |
| { |
| "epoch": 551.6981132075472, |
| "grad_norm": 1.1728070148137189, |
| "learning_rate": 4.766421416981833e-05, |
| "loss": 1.7862, |
| "step": 14620 |
| }, |
| { |
| "epoch": 552.4528301886793, |
| "grad_norm": 1.148521109395332, |
| "learning_rate": 4.759116055998069e-05, |
| "loss": 1.7842, |
| "step": 14640 |
| }, |
| { |
| "epoch": 553.2075471698113, |
| "grad_norm": 1.2578627421373816, |
| "learning_rate": 4.7518087260644065e-05, |
| "loss": 1.8105, |
| "step": 14660 |
| }, |
| { |
| "epoch": 553.9622641509434, |
| "grad_norm": 1.2736902452272465, |
| "learning_rate": 4.744499455441133e-05, |
| "loss": 1.7931, |
| "step": 14680 |
| }, |
| { |
| "epoch": 554.7169811320755, |
| "grad_norm": 1.0794014181765008, |
| "learning_rate": 4.737188272396044e-05, |
| "loss": 1.8043, |
| "step": 14700 |
| }, |
| { |
| "epoch": 555.4716981132076, |
| "grad_norm": 1.3894129104855453, |
| "learning_rate": 4.729875205204327e-05, |
| "loss": 1.8301, |
| "step": 14720 |
| }, |
| { |
| "epoch": 556.2264150943396, |
| "grad_norm": 1.147340224849857, |
| "learning_rate": 4.722560282148459e-05, |
| "loss": 1.8178, |
| "step": 14740 |
| }, |
| { |
| "epoch": 556.9811320754717, |
| "grad_norm": 1.3948879461559769, |
| "learning_rate": 4.7152435315180975e-05, |
| "loss": 1.7648, |
| "step": 14760 |
| }, |
| { |
| "epoch": 557.7358490566038, |
| "grad_norm": 1.3694680696221502, |
| "learning_rate": 4.7079249816099584e-05, |
| "loss": 1.8104, |
| "step": 14780 |
| }, |
| { |
| "epoch": 558.4905660377359, |
| "grad_norm": 1.4147919843537753, |
| "learning_rate": 4.700604660727726e-05, |
| "loss": 1.7721, |
| "step": 14800 |
| }, |
| { |
| "epoch": 559.2452830188679, |
| "grad_norm": 1.2297666792262925, |
| "learning_rate": 4.6932825971819285e-05, |
| "loss": 1.7923, |
| "step": 14820 |
| }, |
| { |
| "epoch": 560.0, |
| "grad_norm": 1.1416590332464547, |
| "learning_rate": 4.6859588192898365e-05, |
| "loss": 1.7709, |
| "step": 14840 |
| }, |
| { |
| "epoch": 560.7547169811321, |
| "grad_norm": 1.2633394473980435, |
| "learning_rate": 4.6786333553753454e-05, |
| "loss": 1.8265, |
| "step": 14860 |
| }, |
| { |
| "epoch": 561.5094339622641, |
| "grad_norm": 1.73410063706433, |
| "learning_rate": 4.671306233768877e-05, |
| "loss": 1.7935, |
| "step": 14880 |
| }, |
| { |
| "epoch": 562.2641509433962, |
| "grad_norm": 1.909552398589606, |
| "learning_rate": 4.663977482807263e-05, |
| "loss": 1.7928, |
| "step": 14900 |
| }, |
| { |
| "epoch": 563.0188679245283, |
| "grad_norm": 2.3340344731557505, |
| "learning_rate": 4.656647130833632e-05, |
| "loss": 1.8083, |
| "step": 14920 |
| }, |
| { |
| "epoch": 563.7735849056604, |
| "grad_norm": 1.5856106264075287, |
| "learning_rate": 4.64931520619731e-05, |
| "loss": 1.8345, |
| "step": 14940 |
| }, |
| { |
| "epoch": 564.5283018867924, |
| "grad_norm": 1.4125116448786768, |
| "learning_rate": 4.6419817372537015e-05, |
| "loss": 1.7764, |
| "step": 14960 |
| }, |
| { |
| "epoch": 565.2830188679245, |
| "grad_norm": 1.1720058705654566, |
| "learning_rate": 4.634646752364185e-05, |
| "loss": 1.7917, |
| "step": 14980 |
| }, |
| { |
| "epoch": 566.0377358490566, |
| "grad_norm": 1.1615325214837866, |
| "learning_rate": 4.627310279896001e-05, |
| "loss": 1.7916, |
| "step": 15000 |
| }, |
| { |
| "epoch": 566.7924528301887, |
| "grad_norm": 1.3392839325444756, |
| "learning_rate": 4.619972348222143e-05, |
| "loss": 1.7803, |
| "step": 15020 |
| }, |
| { |
| "epoch": 567.5471698113207, |
| "grad_norm": 1.3665016393198224, |
| "learning_rate": 4.6126329857212486e-05, |
| "loss": 1.7822, |
| "step": 15040 |
| }, |
| { |
| "epoch": 568.3018867924528, |
| "grad_norm": 1.6085820618369988, |
| "learning_rate": 4.605292220777489e-05, |
| "loss": 1.7889, |
| "step": 15060 |
| }, |
| { |
| "epoch": 569.0566037735849, |
| "grad_norm": 1.4230286645037085, |
| "learning_rate": 4.5979500817804594e-05, |
| "loss": 1.805, |
| "step": 15080 |
| }, |
| { |
| "epoch": 569.811320754717, |
| "grad_norm": 1.1596468566263143, |
| "learning_rate": 4.590606597125065e-05, |
| "loss": 1.7892, |
| "step": 15100 |
| }, |
| { |
| "epoch": 570.566037735849, |
| "grad_norm": 1.4539519726534167, |
| "learning_rate": 4.583261795211423e-05, |
| "loss": 1.7831, |
| "step": 15120 |
| }, |
| { |
| "epoch": 571.3207547169811, |
| "grad_norm": 1.2521318013943803, |
| "learning_rate": 4.575915704444736e-05, |
| "loss": 1.8018, |
| "step": 15140 |
| }, |
| { |
| "epoch": 572.0754716981132, |
| "grad_norm": 1.1819685518944387, |
| "learning_rate": 4.5685683532352e-05, |
| "loss": 1.7866, |
| "step": 15160 |
| }, |
| { |
| "epoch": 572.8301886792453, |
| "grad_norm": 1.1876204585927221, |
| "learning_rate": 4.5612197699978766e-05, |
| "loss": 1.7833, |
| "step": 15180 |
| }, |
| { |
| "epoch": 573.5849056603773, |
| "grad_norm": 1.1556357684763976, |
| "learning_rate": 4.5538699831526006e-05, |
| "loss": 1.8024, |
| "step": 15200 |
| }, |
| { |
| "epoch": 574.3396226415094, |
| "grad_norm": 1.3326980140111142, |
| "learning_rate": 4.5465190211238544e-05, |
| "loss": 1.7829, |
| "step": 15220 |
| }, |
| { |
| "epoch": 575.0943396226415, |
| "grad_norm": 1.308268798679134, |
| "learning_rate": 4.539166912340671e-05, |
| "loss": 1.7766, |
| "step": 15240 |
| }, |
| { |
| "epoch": 575.8490566037735, |
| "grad_norm": 1.1564791044184874, |
| "learning_rate": 4.531813685236516e-05, |
| "loss": 1.8021, |
| "step": 15260 |
| }, |
| { |
| "epoch": 576.6037735849056, |
| "grad_norm": 1.4187942127459952, |
| "learning_rate": 4.524459368249179e-05, |
| "loss": 1.7523, |
| "step": 15280 |
| }, |
| { |
| "epoch": 577.3584905660377, |
| "grad_norm": 1.1994628151621998, |
| "learning_rate": 4.5171039898206644e-05, |
| "loss": 1.7845, |
| "step": 15300 |
| }, |
| { |
| "epoch": 578.1132075471698, |
| "grad_norm": 1.172216325696233, |
| "learning_rate": 4.509747578397086e-05, |
| "loss": 1.7591, |
| "step": 15320 |
| }, |
| { |
| "epoch": 578.8679245283018, |
| "grad_norm": 1.1667988074546227, |
| "learning_rate": 4.5023901624285465e-05, |
| "loss": 1.7955, |
| "step": 15340 |
| }, |
| { |
| "epoch": 579.622641509434, |
| "grad_norm": 1.31427332849911, |
| "learning_rate": 4.495031770369038e-05, |
| "loss": 1.7605, |
| "step": 15360 |
| }, |
| { |
| "epoch": 580.377358490566, |
| "grad_norm": 1.2050607756000014, |
| "learning_rate": 4.487672430676325e-05, |
| "loss": 1.7673, |
| "step": 15380 |
| }, |
| { |
| "epoch": 581.1320754716982, |
| "grad_norm": 1.2087614153318165, |
| "learning_rate": 4.480312171811838e-05, |
| "loss": 1.7876, |
| "step": 15400 |
| }, |
| { |
| "epoch": 581.8867924528302, |
| "grad_norm": 1.3000620466205515, |
| "learning_rate": 4.472951022240562e-05, |
| "loss": 1.7611, |
| "step": 15420 |
| }, |
| { |
| "epoch": 582.6415094339623, |
| "grad_norm": 1.7966112906689369, |
| "learning_rate": 4.4655890104309254e-05, |
| "loss": 1.7702, |
| "step": 15440 |
| }, |
| { |
| "epoch": 583.3962264150944, |
| "grad_norm": 1.198242649687164, |
| "learning_rate": 4.458226164854697e-05, |
| "loss": 1.7942, |
| "step": 15460 |
| }, |
| { |
| "epoch": 584.1509433962265, |
| "grad_norm": 1.6859720478683236, |
| "learning_rate": 4.450862513986861e-05, |
| "loss": 1.758, |
| "step": 15480 |
| }, |
| { |
| "epoch": 584.9056603773585, |
| "grad_norm": 1.1440767805984655, |
| "learning_rate": 4.443498086305525e-05, |
| "loss": 1.7647, |
| "step": 15500 |
| }, |
| { |
| "epoch": 585.6603773584906, |
| "grad_norm": 1.2426581026511485, |
| "learning_rate": 4.436132910291792e-05, |
| "loss": 1.7468, |
| "step": 15520 |
| }, |
| { |
| "epoch": 586.4150943396227, |
| "grad_norm": 1.5652742956982049, |
| "learning_rate": 4.4287670144296675e-05, |
| "loss": 1.7733, |
| "step": 15540 |
| }, |
| { |
| "epoch": 587.1698113207547, |
| "grad_norm": 1.150105537080449, |
| "learning_rate": 4.421400427205934e-05, |
| "loss": 1.7878, |
| "step": 15560 |
| }, |
| { |
| "epoch": 587.9245283018868, |
| "grad_norm": 1.297179193085273, |
| "learning_rate": 4.4140331771100516e-05, |
| "loss": 1.7558, |
| "step": 15580 |
| }, |
| { |
| "epoch": 588.6792452830189, |
| "grad_norm": 1.4354989531166704, |
| "learning_rate": 4.406665292634046e-05, |
| "loss": 1.7652, |
| "step": 15600 |
| }, |
| { |
| "epoch": 589.433962264151, |
| "grad_norm": 1.3544454831633896, |
| "learning_rate": 4.399296802272388e-05, |
| "loss": 1.7695, |
| "step": 15620 |
| }, |
| { |
| "epoch": 590.188679245283, |
| "grad_norm": 1.356710977975809, |
| "learning_rate": 4.3919277345219033e-05, |
| "loss": 1.7317, |
| "step": 15640 |
| }, |
| { |
| "epoch": 590.9433962264151, |
| "grad_norm": 1.3504644293745585, |
| "learning_rate": 4.3845581178816394e-05, |
| "loss": 1.7784, |
| "step": 15660 |
| }, |
| { |
| "epoch": 591.6981132075472, |
| "grad_norm": 1.2934057468915228, |
| "learning_rate": 4.377187980852775e-05, |
| "loss": 1.7655, |
| "step": 15680 |
| }, |
| { |
| "epoch": 592.4528301886793, |
| "grad_norm": 2.7284471186236976, |
| "learning_rate": 4.369817351938495e-05, |
| "loss": 1.7617, |
| "step": 15700 |
| }, |
| { |
| "epoch": 593.2075471698113, |
| "grad_norm": 1.4587946653999224, |
| "learning_rate": 4.3624462596438926e-05, |
| "loss": 1.7675, |
| "step": 15720 |
| }, |
| { |
| "epoch": 593.9622641509434, |
| "grad_norm": 1.824543804524391, |
| "learning_rate": 4.3550747324758475e-05, |
| "loss": 1.7835, |
| "step": 15740 |
| }, |
| { |
| "epoch": 594.7169811320755, |
| "grad_norm": 1.1558960324762337, |
| "learning_rate": 4.3477027989429267e-05, |
| "loss": 1.7848, |
| "step": 15760 |
| }, |
| { |
| "epoch": 595.4716981132076, |
| "grad_norm": 1.3618125278208344, |
| "learning_rate": 4.340330487555261e-05, |
| "loss": 1.7717, |
| "step": 15780 |
| }, |
| { |
| "epoch": 596.2264150943396, |
| "grad_norm": 1.8336334887122832, |
| "learning_rate": 4.332957826824451e-05, |
| "loss": 1.7753, |
| "step": 15800 |
| }, |
| { |
| "epoch": 596.9811320754717, |
| "grad_norm": 1.6035556059617442, |
| "learning_rate": 4.325584845263445e-05, |
| "loss": 1.7507, |
| "step": 15820 |
| }, |
| { |
| "epoch": 597.7358490566038, |
| "grad_norm": 1.1021262642715972, |
| "learning_rate": 4.318211571386428e-05, |
| "loss": 1.7683, |
| "step": 15840 |
| }, |
| { |
| "epoch": 598.4905660377359, |
| "grad_norm": 1.3112589542500708, |
| "learning_rate": 4.310838033708722e-05, |
| "loss": 1.753, |
| "step": 15860 |
| }, |
| { |
| "epoch": 599.2452830188679, |
| "grad_norm": 1.5257318148219035, |
| "learning_rate": 4.303464260746667e-05, |
| "loss": 1.7446, |
| "step": 15880 |
| }, |
| { |
| "epoch": 600.0, |
| "grad_norm": 1.2648959489882874, |
| "learning_rate": 4.296090281017511e-05, |
| "loss": 1.7513, |
| "step": 15900 |
| }, |
| { |
| "epoch": 600.7547169811321, |
| "grad_norm": 1.4414622706601208, |
| "learning_rate": 4.2887161230393034e-05, |
| "loss": 1.7421, |
| "step": 15920 |
| }, |
| { |
| "epoch": 601.5094339622641, |
| "grad_norm": 1.60216933395765, |
| "learning_rate": 4.281341815330784e-05, |
| "loss": 1.7335, |
| "step": 15940 |
| }, |
| { |
| "epoch": 602.2641509433962, |
| "grad_norm": 1.438261210769706, |
| "learning_rate": 4.273967386411267e-05, |
| "loss": 1.7676, |
| "step": 15960 |
| }, |
| { |
| "epoch": 603.0188679245283, |
| "grad_norm": 1.61121062509495, |
| "learning_rate": 4.26659286480054e-05, |
| "loss": 1.7767, |
| "step": 15980 |
| }, |
| { |
| "epoch": 603.7735849056604, |
| "grad_norm": 1.3368219249794455, |
| "learning_rate": 4.2592182790187495e-05, |
| "loss": 1.7615, |
| "step": 16000 |
| }, |
| { |
| "epoch": 604.5283018867924, |
| "grad_norm": 1.1964899050496502, |
| "learning_rate": 4.251843657586285e-05, |
| "loss": 1.7909, |
| "step": 16020 |
| }, |
| { |
| "epoch": 605.2830188679245, |
| "grad_norm": 1.1409598499641234, |
| "learning_rate": 4.244469029023682e-05, |
| "loss": 1.7806, |
| "step": 16040 |
| }, |
| { |
| "epoch": 606.0377358490566, |
| "grad_norm": 1.0775618341358217, |
| "learning_rate": 4.237094421851494e-05, |
| "loss": 1.7696, |
| "step": 16060 |
| }, |
| { |
| "epoch": 606.7924528301887, |
| "grad_norm": 1.201425866436519, |
| "learning_rate": 4.2297198645901986e-05, |
| "loss": 1.7424, |
| "step": 16080 |
| }, |
| { |
| "epoch": 607.5471698113207, |
| "grad_norm": 1.29163631265219, |
| "learning_rate": 4.222345385760079e-05, |
| "loss": 1.749, |
| "step": 16100 |
| }, |
| { |
| "epoch": 608.3018867924528, |
| "grad_norm": 1.4158324908813191, |
| "learning_rate": 4.214971013881114e-05, |
| "loss": 1.7594, |
| "step": 16120 |
| }, |
| { |
| "epoch": 609.0566037735849, |
| "grad_norm": 1.2390733211978042, |
| "learning_rate": 4.2075967774728675e-05, |
| "loss": 1.7707, |
| "step": 16140 |
| }, |
| { |
| "epoch": 609.811320754717, |
| "grad_norm": 1.0960663109570459, |
| "learning_rate": 4.200222705054385e-05, |
| "loss": 1.7633, |
| "step": 16160 |
| }, |
| { |
| "epoch": 610.566037735849, |
| "grad_norm": 1.167381366879647, |
| "learning_rate": 4.1928488251440704e-05, |
| "loss": 1.7735, |
| "step": 16180 |
| }, |
| { |
| "epoch": 611.3207547169811, |
| "grad_norm": 1.468960912277373, |
| "learning_rate": 4.185475166259588e-05, |
| "loss": 1.7222, |
| "step": 16200 |
| }, |
| { |
| "epoch": 612.0754716981132, |
| "grad_norm": 1.2572603668608606, |
| "learning_rate": 4.178101756917746e-05, |
| "loss": 1.7477, |
| "step": 16220 |
| }, |
| { |
| "epoch": 612.8301886792453, |
| "grad_norm": 1.2661070355556836, |
| "learning_rate": 4.170728625634387e-05, |
| "loss": 1.7437, |
| "step": 16240 |
| }, |
| { |
| "epoch": 613.5849056603773, |
| "grad_norm": 1.6793862205908143, |
| "learning_rate": 4.16335580092428e-05, |
| "loss": 1.7518, |
| "step": 16260 |
| }, |
| { |
| "epoch": 614.3396226415094, |
| "grad_norm": 1.3347192318840417, |
| "learning_rate": 4.155983311301006e-05, |
| "loss": 1.7275, |
| "step": 16280 |
| }, |
| { |
| "epoch": 615.0943396226415, |
| "grad_norm": 1.146186653201129, |
| "learning_rate": 4.148611185276852e-05, |
| "loss": 1.7505, |
| "step": 16300 |
| }, |
| { |
| "epoch": 615.8490566037735, |
| "grad_norm": 1.2853858560898548, |
| "learning_rate": 4.1412394513626976e-05, |
| "loss": 1.7345, |
| "step": 16320 |
| }, |
| { |
| "epoch": 616.6037735849056, |
| "grad_norm": 1.3689931241044506, |
| "learning_rate": 4.1338681380679055e-05, |
| "loss": 1.7372, |
| "step": 16340 |
| }, |
| { |
| "epoch": 617.3584905660377, |
| "grad_norm": 1.2520152979412003, |
| "learning_rate": 4.126497273900214e-05, |
| "loss": 1.7749, |
| "step": 16360 |
| }, |
| { |
| "epoch": 618.1132075471698, |
| "grad_norm": 4.4664529214017685, |
| "learning_rate": 4.119126887365623e-05, |
| "loss": 1.7291, |
| "step": 16380 |
| }, |
| { |
| "epoch": 618.8679245283018, |
| "grad_norm": 1.4871942552231863, |
| "learning_rate": 4.111757006968283e-05, |
| "loss": 1.729, |
| "step": 16400 |
| }, |
| { |
| "epoch": 619.622641509434, |
| "grad_norm": 1.7327021169643824, |
| "learning_rate": 4.104387661210391e-05, |
| "loss": 1.7906, |
| "step": 16420 |
| }, |
| { |
| "epoch": 620.377358490566, |
| "grad_norm": 1.3011566548368803, |
| "learning_rate": 4.0970188785920764e-05, |
| "loss": 1.7498, |
| "step": 16440 |
| }, |
| { |
| "epoch": 621.1320754716982, |
| "grad_norm": 1.503913785893422, |
| "learning_rate": 4.0896506876112856e-05, |
| "loss": 1.7333, |
| "step": 16460 |
| }, |
| { |
| "epoch": 621.8867924528302, |
| "grad_norm": 1.2396874135815048, |
| "learning_rate": 4.082283116763683e-05, |
| "loss": 1.7474, |
| "step": 16480 |
| }, |
| { |
| "epoch": 622.6415094339623, |
| "grad_norm": 1.3186465498196096, |
| "learning_rate": 4.07491619454253e-05, |
| "loss": 1.7641, |
| "step": 16500 |
| }, |
| { |
| "epoch": 623.3962264150944, |
| "grad_norm": 1.2224446651472063, |
| "learning_rate": 4.067549949438583e-05, |
| "loss": 1.7596, |
| "step": 16520 |
| }, |
| { |
| "epoch": 624.1509433962265, |
| "grad_norm": 1.299102298479128, |
| "learning_rate": 4.060184409939977e-05, |
| "loss": 1.7399, |
| "step": 16540 |
| }, |
| { |
| "epoch": 624.9056603773585, |
| "grad_norm": 1.2080109960062584, |
| "learning_rate": 4.052819604532121e-05, |
| "loss": 1.7545, |
| "step": 16560 |
| }, |
| { |
| "epoch": 625.6603773584906, |
| "grad_norm": 1.1330156099339754, |
| "learning_rate": 4.04545556169758e-05, |
| "loss": 1.7514, |
| "step": 16580 |
| }, |
| { |
| "epoch": 626.4150943396227, |
| "grad_norm": 1.877556318395021, |
| "learning_rate": 4.038092309915976e-05, |
| "loss": 1.7495, |
| "step": 16600 |
| }, |
| { |
| "epoch": 627.1698113207547, |
| "grad_norm": 1.3430468095941768, |
| "learning_rate": 4.0307298776638696e-05, |
| "loss": 1.7387, |
| "step": 16620 |
| }, |
| { |
| "epoch": 627.9245283018868, |
| "grad_norm": 1.3456306138048115, |
| "learning_rate": 4.023368293414651e-05, |
| "loss": 1.7586, |
| "step": 16640 |
| }, |
| { |
| "epoch": 628.6792452830189, |
| "grad_norm": 1.2925035537026515, |
| "learning_rate": 4.016007585638428e-05, |
| "loss": 1.7222, |
| "step": 16660 |
| }, |
| { |
| "epoch": 629.433962264151, |
| "grad_norm": 1.5060755357936446, |
| "learning_rate": 4.0086477828019247e-05, |
| "loss": 1.734, |
| "step": 16680 |
| }, |
| { |
| "epoch": 630.188679245283, |
| "grad_norm": 1.2358138916528858, |
| "learning_rate": 4.001288913368361e-05, |
| "loss": 1.7585, |
| "step": 16700 |
| }, |
| { |
| "epoch": 630.9433962264151, |
| "grad_norm": 1.0536359575721053, |
| "learning_rate": 3.9939310057973496e-05, |
| "loss": 1.699, |
| "step": 16720 |
| }, |
| { |
| "epoch": 631.6981132075472, |
| "grad_norm": 1.3396521000709494, |
| "learning_rate": 3.986574088544782e-05, |
| "loss": 1.745, |
| "step": 16740 |
| }, |
| { |
| "epoch": 632.4528301886793, |
| "grad_norm": 1.1966711285530698, |
| "learning_rate": 3.979218190062718e-05, |
| "loss": 1.7049, |
| "step": 16760 |
| }, |
| { |
| "epoch": 633.2075471698113, |
| "grad_norm": 1.3511753835544016, |
| "learning_rate": 3.971863338799283e-05, |
| "loss": 1.7319, |
| "step": 16780 |
| }, |
| { |
| "epoch": 633.9622641509434, |
| "grad_norm": 1.2759632464750865, |
| "learning_rate": 3.964509563198547e-05, |
| "loss": 1.7431, |
| "step": 16800 |
| }, |
| { |
| "epoch": 634.7169811320755, |
| "grad_norm": 1.5118791481962728, |
| "learning_rate": 3.957156891700422e-05, |
| "loss": 1.7549, |
| "step": 16820 |
| }, |
| { |
| "epoch": 635.4716981132076, |
| "grad_norm": 1.358305138616916, |
| "learning_rate": 3.949805352740549e-05, |
| "loss": 1.7146, |
| "step": 16840 |
| }, |
| { |
| "epoch": 636.2264150943396, |
| "grad_norm": 1.2468444876323985, |
| "learning_rate": 3.9424549747501916e-05, |
| "loss": 1.6839, |
| "step": 16860 |
| }, |
| { |
| "epoch": 636.9811320754717, |
| "grad_norm": 1.6815476229074826, |
| "learning_rate": 3.9351057861561194e-05, |
| "loss": 1.7381, |
| "step": 16880 |
| }, |
| { |
| "epoch": 637.7358490566038, |
| "grad_norm": 1.3183944234813532, |
| "learning_rate": 3.927757815380507e-05, |
| "loss": 1.737, |
| "step": 16900 |
| }, |
| { |
| "epoch": 638.4905660377359, |
| "grad_norm": 1.2664716657296504, |
| "learning_rate": 3.920411090840813e-05, |
| "loss": 1.7552, |
| "step": 16920 |
| }, |
| { |
| "epoch": 639.2452830188679, |
| "grad_norm": 1.4316626122660758, |
| "learning_rate": 3.9130656409496826e-05, |
| "loss": 1.7035, |
| "step": 16940 |
| }, |
| { |
| "epoch": 640.0, |
| "grad_norm": 1.624465349724497, |
| "learning_rate": 3.90572149411483e-05, |
| "loss": 1.7349, |
| "step": 16960 |
| }, |
| { |
| "epoch": 640.7547169811321, |
| "grad_norm": 1.3525138710560463, |
| "learning_rate": 3.8983786787389264e-05, |
| "loss": 1.7196, |
| "step": 16980 |
| }, |
| { |
| "epoch": 641.5094339622641, |
| "grad_norm": 1.1968289253916946, |
| "learning_rate": 3.891037223219497e-05, |
| "loss": 1.7288, |
| "step": 17000 |
| }, |
| { |
| "epoch": 642.2641509433962, |
| "grad_norm": 1.3150467173282183, |
| "learning_rate": 3.883697155948808e-05, |
| "loss": 1.7478, |
| "step": 17020 |
| }, |
| { |
| "epoch": 643.0188679245283, |
| "grad_norm": 1.3494514082635618, |
| "learning_rate": 3.876358505313754e-05, |
| "loss": 1.7208, |
| "step": 17040 |
| }, |
| { |
| "epoch": 643.7735849056604, |
| "grad_norm": 1.5328078930199742, |
| "learning_rate": 3.869021299695754e-05, |
| "loss": 1.747, |
| "step": 17060 |
| }, |
| { |
| "epoch": 644.5283018867924, |
| "grad_norm": 1.2945392233470665, |
| "learning_rate": 3.8616855674706354e-05, |
| "loss": 1.7225, |
| "step": 17080 |
| }, |
| { |
| "epoch": 645.2830188679245, |
| "grad_norm": 1.2582163265054458, |
| "learning_rate": 3.854351337008532e-05, |
| "loss": 1.7428, |
| "step": 17100 |
| }, |
| { |
| "epoch": 646.0377358490566, |
| "grad_norm": 1.1370724946903576, |
| "learning_rate": 3.847018636673765e-05, |
| "loss": 1.704, |
| "step": 17120 |
| }, |
| { |
| "epoch": 646.7924528301887, |
| "grad_norm": 1.2161474947016768, |
| "learning_rate": 3.839687494824741e-05, |
| "loss": 1.7129, |
| "step": 17140 |
| }, |
| { |
| "epoch": 647.5471698113207, |
| "grad_norm": 1.1033819366614397, |
| "learning_rate": 3.832357939813837e-05, |
| "loss": 1.695, |
| "step": 17160 |
| }, |
| { |
| "epoch": 648.3018867924528, |
| "grad_norm": 1.2993665260901381, |
| "learning_rate": 3.825029999987296e-05, |
| "loss": 1.7022, |
| "step": 17180 |
| }, |
| { |
| "epoch": 649.0566037735849, |
| "grad_norm": 1.2577276608492982, |
| "learning_rate": 3.8177037036851115e-05, |
| "loss": 1.7029, |
| "step": 17200 |
| }, |
| { |
| "epoch": 649.811320754717, |
| "grad_norm": 1.2459092691964395, |
| "learning_rate": 3.810379079240922e-05, |
| "loss": 1.7139, |
| "step": 17220 |
| }, |
| { |
| "epoch": 650.566037735849, |
| "grad_norm": 1.3152629296897698, |
| "learning_rate": 3.8030561549819015e-05, |
| "loss": 1.7088, |
| "step": 17240 |
| }, |
| { |
| "epoch": 651.3207547169811, |
| "grad_norm": 1.2367123181404969, |
| "learning_rate": 3.795734959228645e-05, |
| "loss": 1.6936, |
| "step": 17260 |
| }, |
| { |
| "epoch": 652.0754716981132, |
| "grad_norm": 1.1338754969305556, |
| "learning_rate": 3.7884155202950696e-05, |
| "loss": 1.7151, |
| "step": 17280 |
| }, |
| { |
| "epoch": 652.8301886792453, |
| "grad_norm": 1.2942728726977033, |
| "learning_rate": 3.781097866488291e-05, |
| "loss": 1.712, |
| "step": 17300 |
| }, |
| { |
| "epoch": 653.5849056603773, |
| "grad_norm": 1.39400718208209, |
| "learning_rate": 3.773782026108526e-05, |
| "loss": 1.7181, |
| "step": 17320 |
| }, |
| { |
| "epoch": 654.3396226415094, |
| "grad_norm": 1.3198924641523746, |
| "learning_rate": 3.766468027448973e-05, |
| "loss": 1.6913, |
| "step": 17340 |
| }, |
| { |
| "epoch": 655.0943396226415, |
| "grad_norm": 1.1991934682117795, |
| "learning_rate": 3.759155898795714e-05, |
| "loss": 1.7093, |
| "step": 17360 |
| }, |
| { |
| "epoch": 655.8490566037735, |
| "grad_norm": 1.239259370659102, |
| "learning_rate": 3.751845668427593e-05, |
| "loss": 1.7009, |
| "step": 17380 |
| }, |
| { |
| "epoch": 656.6037735849056, |
| "grad_norm": 1.2833857218204128, |
| "learning_rate": 3.7445373646161176e-05, |
| "loss": 1.7005, |
| "step": 17400 |
| }, |
| { |
| "epoch": 657.3584905660377, |
| "grad_norm": 1.454767822481044, |
| "learning_rate": 3.737231015625341e-05, |
| "loss": 1.6906, |
| "step": 17420 |
| }, |
| { |
| "epoch": 658.1132075471698, |
| "grad_norm": 1.4542141511941185, |
| "learning_rate": 3.729926649711759e-05, |
| "loss": 1.7058, |
| "step": 17440 |
| }, |
| { |
| "epoch": 658.8679245283018, |
| "grad_norm": 1.3091035418860133, |
| "learning_rate": 3.722624295124197e-05, |
| "loss": 1.6885, |
| "step": 17460 |
| }, |
| { |
| "epoch": 659.622641509434, |
| "grad_norm": 1.2943161972236163, |
| "learning_rate": 3.7153239801037014e-05, |
| "loss": 1.714, |
| "step": 17480 |
| }, |
| { |
| "epoch": 660.377358490566, |
| "grad_norm": 1.3377320776810098, |
| "learning_rate": 3.708025732883431e-05, |
| "loss": 1.684, |
| "step": 17500 |
| }, |
| { |
| "epoch": 661.1320754716982, |
| "grad_norm": 1.2629223675934866, |
| "learning_rate": 3.700729581688547e-05, |
| "loss": 1.699, |
| "step": 17520 |
| }, |
| { |
| "epoch": 661.8867924528302, |
| "grad_norm": 1.2451499003174673, |
| "learning_rate": 3.693435554736107e-05, |
| "loss": 1.6818, |
| "step": 17540 |
| }, |
| { |
| "epoch": 662.6415094339623, |
| "grad_norm": 1.5331175213775703, |
| "learning_rate": 3.6861436802349504e-05, |
| "loss": 1.7177, |
| "step": 17560 |
| }, |
| { |
| "epoch": 663.3962264150944, |
| "grad_norm": 1.4360430543768725, |
| "learning_rate": 3.6788539863855925e-05, |
| "loss": 1.7119, |
| "step": 17580 |
| }, |
| { |
| "epoch": 664.1509433962265, |
| "grad_norm": 1.3816247903457854, |
| "learning_rate": 3.671566501380116e-05, |
| "loss": 1.7148, |
| "step": 17600 |
| }, |
| { |
| "epoch": 664.9056603773585, |
| "grad_norm": 1.2778334686031196, |
| "learning_rate": 3.6642812534020636e-05, |
| "loss": 1.6935, |
| "step": 17620 |
| }, |
| { |
| "epoch": 665.6603773584906, |
| "grad_norm": 1.552301737650962, |
| "learning_rate": 3.656998270626322e-05, |
| "loss": 1.6917, |
| "step": 17640 |
| }, |
| { |
| "epoch": 666.4150943396227, |
| "grad_norm": 1.1626344688263202, |
| "learning_rate": 3.649717581219022e-05, |
| "loss": 1.6869, |
| "step": 17660 |
| }, |
| { |
| "epoch": 667.1698113207547, |
| "grad_norm": 1.2478591651994395, |
| "learning_rate": 3.642439213337418e-05, |
| "loss": 1.6964, |
| "step": 17680 |
| }, |
| { |
| "epoch": 667.9245283018868, |
| "grad_norm": 1.1665269494870496, |
| "learning_rate": 3.635163195129796e-05, |
| "loss": 1.706, |
| "step": 17700 |
| }, |
| { |
| "epoch": 668.6792452830189, |
| "grad_norm": 1.2417440240279074, |
| "learning_rate": 3.627889554735346e-05, |
| "loss": 1.6607, |
| "step": 17720 |
| }, |
| { |
| "epoch": 669.433962264151, |
| "grad_norm": 1.4243990985436537, |
| "learning_rate": 3.620618320284067e-05, |
| "loss": 1.6874, |
| "step": 17740 |
| }, |
| { |
| "epoch": 670.188679245283, |
| "grad_norm": 1.4914544739718891, |
| "learning_rate": 3.613349519896652e-05, |
| "loss": 1.6908, |
| "step": 17760 |
| }, |
| { |
| "epoch": 670.9433962264151, |
| "grad_norm": 1.3300772606283862, |
| "learning_rate": 3.606083181684381e-05, |
| "loss": 1.688, |
| "step": 17780 |
| }, |
| { |
| "epoch": 671.6981132075472, |
| "grad_norm": 1.2461357748180606, |
| "learning_rate": 3.5988193337490116e-05, |
| "loss": 1.6547, |
| "step": 17800 |
| }, |
| { |
| "epoch": 672.4528301886793, |
| "grad_norm": 1.370151145210619, |
| "learning_rate": 3.5915580041826694e-05, |
| "loss": 1.7193, |
| "step": 17820 |
| }, |
| { |
| "epoch": 673.2075471698113, |
| "grad_norm": 1.2763659906881193, |
| "learning_rate": 3.5842992210677416e-05, |
| "loss": 1.6808, |
| "step": 17840 |
| }, |
| { |
| "epoch": 673.9622641509434, |
| "grad_norm": 1.2944519984940064, |
| "learning_rate": 3.577043012476768e-05, |
| "loss": 1.7, |
| "step": 17860 |
| }, |
| { |
| "epoch": 674.7169811320755, |
| "grad_norm": 1.3186599824633134, |
| "learning_rate": 3.56978940647233e-05, |
| "loss": 1.6954, |
| "step": 17880 |
| }, |
| { |
| "epoch": 675.4716981132076, |
| "grad_norm": 1.252700498164797, |
| "learning_rate": 3.5625384311069444e-05, |
| "loss": 1.6686, |
| "step": 17900 |
| }, |
| { |
| "epoch": 676.2264150943396, |
| "grad_norm": 1.5231032873107, |
| "learning_rate": 3.555290114422955e-05, |
| "loss": 1.6747, |
| "step": 17920 |
| }, |
| { |
| "epoch": 676.9811320754717, |
| "grad_norm": 1.2910659178037445, |
| "learning_rate": 3.548044484452421e-05, |
| "loss": 1.6778, |
| "step": 17940 |
| }, |
| { |
| "epoch": 677.7358490566038, |
| "grad_norm": 1.398570166804289, |
| "learning_rate": 3.540801569217016e-05, |
| "loss": 1.6949, |
| "step": 17960 |
| }, |
| { |
| "epoch": 678.4905660377359, |
| "grad_norm": 1.4283155036503146, |
| "learning_rate": 3.53356139672791e-05, |
| "loss": 1.682, |
| "step": 17980 |
| }, |
| { |
| "epoch": 679.2452830188679, |
| "grad_norm": 1.3275162110816598, |
| "learning_rate": 3.526323994985669e-05, |
| "loss": 1.695, |
| "step": 18000 |
| }, |
| { |
| "epoch": 680.0, |
| "grad_norm": 1.2754138886413842, |
| "learning_rate": 3.519089391980139e-05, |
| "loss": 1.6977, |
| "step": 18020 |
| }, |
| { |
| "epoch": 680.7547169811321, |
| "grad_norm": 1.3077633836764546, |
| "learning_rate": 3.511857615690347e-05, |
| "loss": 1.6811, |
| "step": 18040 |
| }, |
| { |
| "epoch": 681.5094339622641, |
| "grad_norm": 1.3473268942249876, |
| "learning_rate": 3.504628694084385e-05, |
| "loss": 1.6984, |
| "step": 18060 |
| }, |
| { |
| "epoch": 682.2641509433962, |
| "grad_norm": 1.3350261204503644, |
| "learning_rate": 3.497402655119306e-05, |
| "loss": 1.6567, |
| "step": 18080 |
| }, |
| { |
| "epoch": 683.0188679245283, |
| "grad_norm": 1.243885167646148, |
| "learning_rate": 3.490179526741014e-05, |
| "loss": 1.6837, |
| "step": 18100 |
| }, |
| { |
| "epoch": 683.7735849056604, |
| "grad_norm": 1.4293023473168278, |
| "learning_rate": 3.48295933688416e-05, |
| "loss": 1.7039, |
| "step": 18120 |
| }, |
| { |
| "epoch": 684.5283018867924, |
| "grad_norm": 1.3686594771374196, |
| "learning_rate": 3.4757421134720236e-05, |
| "loss": 1.7067, |
| "step": 18140 |
| }, |
| { |
| "epoch": 685.2830188679245, |
| "grad_norm": 1.6243192735337049, |
| "learning_rate": 3.46852788441642e-05, |
| "loss": 1.6661, |
| "step": 18160 |
| }, |
| { |
| "epoch": 686.0377358490566, |
| "grad_norm": 1.2075045336020302, |
| "learning_rate": 3.461316677617577e-05, |
| "loss": 1.6779, |
| "step": 18180 |
| }, |
| { |
| "epoch": 686.7924528301887, |
| "grad_norm": 1.348462905709941, |
| "learning_rate": 3.4541085209640396e-05, |
| "loss": 1.6962, |
| "step": 18200 |
| }, |
| { |
| "epoch": 687.5471698113207, |
| "grad_norm": 1.370184561468331, |
| "learning_rate": 3.446903442332552e-05, |
| "loss": 1.6819, |
| "step": 18220 |
| }, |
| { |
| "epoch": 688.3018867924528, |
| "grad_norm": 2.4058560541467537, |
| "learning_rate": 3.439701469587961e-05, |
| "loss": 1.6562, |
| "step": 18240 |
| }, |
| { |
| "epoch": 689.0566037735849, |
| "grad_norm": 1.2548392090130422, |
| "learning_rate": 3.4325026305830914e-05, |
| "loss": 1.662, |
| "step": 18260 |
| }, |
| { |
| "epoch": 689.811320754717, |
| "grad_norm": 1.2311253301629015, |
| "learning_rate": 3.4253069531586616e-05, |
| "loss": 1.6629, |
| "step": 18280 |
| }, |
| { |
| "epoch": 690.566037735849, |
| "grad_norm": 1.9966791662877068, |
| "learning_rate": 3.418114465143153e-05, |
| "loss": 1.6592, |
| "step": 18300 |
| }, |
| { |
| "epoch": 691.3207547169811, |
| "grad_norm": 1.2370362395857986, |
| "learning_rate": 3.410925194352715e-05, |
| "loss": 1.6806, |
| "step": 18320 |
| }, |
| { |
| "epoch": 692.0754716981132, |
| "grad_norm": 1.463146145452869, |
| "learning_rate": 3.4037391685910566e-05, |
| "loss": 1.6937, |
| "step": 18340 |
| }, |
| { |
| "epoch": 692.8301886792453, |
| "grad_norm": 1.2590469253316379, |
| "learning_rate": 3.396556415649336e-05, |
| "loss": 1.6746, |
| "step": 18360 |
| }, |
| { |
| "epoch": 693.5849056603773, |
| "grad_norm": 1.3472170619382864, |
| "learning_rate": 3.389376963306052e-05, |
| "loss": 1.681, |
| "step": 18380 |
| }, |
| { |
| "epoch": 694.3396226415094, |
| "grad_norm": 1.4907805923383493, |
| "learning_rate": 3.382200839326942e-05, |
| "loss": 1.6822, |
| "step": 18400 |
| }, |
| { |
| "epoch": 695.0943396226415, |
| "grad_norm": 1.3754366409172392, |
| "learning_rate": 3.375028071464869e-05, |
| "loss": 1.6819, |
| "step": 18420 |
| }, |
| { |
| "epoch": 695.8490566037735, |
| "grad_norm": 1.2854564980336112, |
| "learning_rate": 3.3678586874597176e-05, |
| "loss": 1.6712, |
| "step": 18440 |
| }, |
| { |
| "epoch": 696.6037735849056, |
| "grad_norm": 1.4614311570416143, |
| "learning_rate": 3.3606927150382865e-05, |
| "loss": 1.649, |
| "step": 18460 |
| }, |
| { |
| "epoch": 697.3584905660377, |
| "grad_norm": 1.3139946901519874, |
| "learning_rate": 3.353530181914178e-05, |
| "loss": 1.7062, |
| "step": 18480 |
| }, |
| { |
| "epoch": 698.1132075471698, |
| "grad_norm": 1.4895975475886944, |
| "learning_rate": 3.3463711157876966e-05, |
| "loss": 1.6841, |
| "step": 18500 |
| }, |
| { |
| "epoch": 698.8679245283018, |
| "grad_norm": 1.2111074764483576, |
| "learning_rate": 3.339215544345735e-05, |
| "loss": 1.6799, |
| "step": 18520 |
| }, |
| { |
| "epoch": 699.622641509434, |
| "grad_norm": 1.254964544152517, |
| "learning_rate": 3.3320634952616736e-05, |
| "loss": 1.6554, |
| "step": 18540 |
| }, |
| { |
| "epoch": 700.377358490566, |
| "grad_norm": 1.4098934710763775, |
| "learning_rate": 3.3249149961952686e-05, |
| "loss": 1.6821, |
| "step": 18560 |
| }, |
| { |
| "epoch": 701.1320754716982, |
| "grad_norm": 1.449098110180846, |
| "learning_rate": 3.3177700747925484e-05, |
| "loss": 1.6775, |
| "step": 18580 |
| }, |
| { |
| "epoch": 701.8867924528302, |
| "grad_norm": 1.4166300599178772, |
| "learning_rate": 3.310628758685702e-05, |
| "loss": 1.6647, |
| "step": 18600 |
| }, |
| { |
| "epoch": 702.6415094339623, |
| "grad_norm": 1.3321739096846923, |
| "learning_rate": 3.30349107549298e-05, |
| "loss": 1.6606, |
| "step": 18620 |
| }, |
| { |
| "epoch": 703.3962264150944, |
| "grad_norm": 1.3195021828180338, |
| "learning_rate": 3.2963570528185814e-05, |
| "loss": 1.6414, |
| "step": 18640 |
| }, |
| { |
| "epoch": 704.1509433962265, |
| "grad_norm": 1.2954808039261523, |
| "learning_rate": 3.2892267182525456e-05, |
| "loss": 1.6691, |
| "step": 18660 |
| }, |
| { |
| "epoch": 704.9056603773585, |
| "grad_norm": 1.3215765511079391, |
| "learning_rate": 3.2821000993706524e-05, |
| "loss": 1.6774, |
| "step": 18680 |
| }, |
| { |
| "epoch": 705.6603773584906, |
| "grad_norm": 1.3256079186058618, |
| "learning_rate": 3.2749772237343104e-05, |
| "loss": 1.6675, |
| "step": 18700 |
| }, |
| { |
| "epoch": 706.4150943396227, |
| "grad_norm": 1.3105427183809564, |
| "learning_rate": 3.26785811889045e-05, |
| "loss": 1.669, |
| "step": 18720 |
| }, |
| { |
| "epoch": 707.1698113207547, |
| "grad_norm": 1.1406031822674032, |
| "learning_rate": 3.26074281237142e-05, |
| "loss": 1.6528, |
| "step": 18740 |
| }, |
| { |
| "epoch": 707.9245283018868, |
| "grad_norm": 1.1721675684528943, |
| "learning_rate": 3.253631331694882e-05, |
| "loss": 1.6243, |
| "step": 18760 |
| }, |
| { |
| "epoch": 708.6792452830189, |
| "grad_norm": 1.262858428237141, |
| "learning_rate": 3.2465237043636945e-05, |
| "loss": 1.6811, |
| "step": 18780 |
| }, |
| { |
| "epoch": 709.433962264151, |
| "grad_norm": 1.3398257997775693, |
| "learning_rate": 3.239419957865822e-05, |
| "loss": 1.6531, |
| "step": 18800 |
| }, |
| { |
| "epoch": 710.188679245283, |
| "grad_norm": 1.3245763474105379, |
| "learning_rate": 3.2323201196742164e-05, |
| "loss": 1.6796, |
| "step": 18820 |
| }, |
| { |
| "epoch": 710.9433962264151, |
| "grad_norm": 1.3633874472219405, |
| "learning_rate": 3.225224217246712e-05, |
| "loss": 1.6544, |
| "step": 18840 |
| }, |
| { |
| "epoch": 711.6981132075472, |
| "grad_norm": 1.7407734601052158, |
| "learning_rate": 3.218132278025927e-05, |
| "loss": 1.6765, |
| "step": 18860 |
| }, |
| { |
| "epoch": 712.4528301886793, |
| "grad_norm": 1.4569167040451834, |
| "learning_rate": 3.2110443294391486e-05, |
| "loss": 1.6411, |
| "step": 18880 |
| }, |
| { |
| "epoch": 713.2075471698113, |
| "grad_norm": 1.3711197707215454, |
| "learning_rate": 3.203960398898234e-05, |
| "loss": 1.6385, |
| "step": 18900 |
| }, |
| { |
| "epoch": 713.9622641509434, |
| "grad_norm": 1.2731560765553942, |
| "learning_rate": 3.196880513799497e-05, |
| "loss": 1.6605, |
| "step": 18920 |
| }, |
| { |
| "epoch": 714.7169811320755, |
| "grad_norm": 1.3127125434194904, |
| "learning_rate": 3.189804701523608e-05, |
| "loss": 1.6774, |
| "step": 18940 |
| }, |
| { |
| "epoch": 715.4716981132076, |
| "grad_norm": 1.3249230445075728, |
| "learning_rate": 3.1827329894354874e-05, |
| "loss": 1.6753, |
| "step": 18960 |
| }, |
| { |
| "epoch": 716.2264150943396, |
| "grad_norm": 1.4612490587732805, |
| "learning_rate": 3.1756654048842e-05, |
| "loss": 1.655, |
| "step": 18980 |
| }, |
| { |
| "epoch": 716.9811320754717, |
| "grad_norm": 1.278645383417836, |
| "learning_rate": 3.1686019752028424e-05, |
| "loss": 1.6692, |
| "step": 19000 |
| }, |
| { |
| "epoch": 717.7358490566038, |
| "grad_norm": 1.3408714115191198, |
| "learning_rate": 3.161542727708446e-05, |
| "loss": 1.6448, |
| "step": 19020 |
| }, |
| { |
| "epoch": 718.4905660377359, |
| "grad_norm": 1.8695203026536409, |
| "learning_rate": 3.154487689701869e-05, |
| "loss": 1.6786, |
| "step": 19040 |
| }, |
| { |
| "epoch": 719.2452830188679, |
| "grad_norm": 1.3167685135936378, |
| "learning_rate": 3.147436888467689e-05, |
| "loss": 1.6625, |
| "step": 19060 |
| }, |
| { |
| "epoch": 720.0, |
| "grad_norm": 1.2539486625475944, |
| "learning_rate": 3.140390351274096e-05, |
| "loss": 1.6533, |
| "step": 19080 |
| }, |
| { |
| "epoch": 720.7547169811321, |
| "grad_norm": 1.3710221082026877, |
| "learning_rate": 3.133348105372793e-05, |
| "loss": 1.677, |
| "step": 19100 |
| }, |
| { |
| "epoch": 721.5094339622641, |
| "grad_norm": 1.528521350034396, |
| "learning_rate": 3.126310177998883e-05, |
| "loss": 1.6593, |
| "step": 19120 |
| }, |
| { |
| "epoch": 722.2641509433962, |
| "grad_norm": 1.2092386328287839, |
| "learning_rate": 3.1192765963707726e-05, |
| "loss": 1.669, |
| "step": 19140 |
| }, |
| { |
| "epoch": 723.0188679245283, |
| "grad_norm": 1.605845379972632, |
| "learning_rate": 3.1122473876900574e-05, |
| "loss": 1.6372, |
| "step": 19160 |
| }, |
| { |
| "epoch": 723.7735849056604, |
| "grad_norm": 1.5877097735994508, |
| "learning_rate": 3.105222579141423e-05, |
| "loss": 1.6557, |
| "step": 19180 |
| }, |
| { |
| "epoch": 724.5283018867924, |
| "grad_norm": 1.2516228941598748, |
| "learning_rate": 3.098202197892538e-05, |
| "loss": 1.6513, |
| "step": 19200 |
| }, |
| { |
| "epoch": 725.2830188679245, |
| "grad_norm": 1.2391402579938813, |
| "learning_rate": 3.091186271093947e-05, |
| "loss": 1.6526, |
| "step": 19220 |
| }, |
| { |
| "epoch": 726.0377358490566, |
| "grad_norm": 1.2782890497326889, |
| "learning_rate": 3.084174825878972e-05, |
| "loss": 1.6591, |
| "step": 19240 |
| }, |
| { |
| "epoch": 726.7924528301887, |
| "grad_norm": 1.2506962493164657, |
| "learning_rate": 3.0771678893635963e-05, |
| "loss": 1.65, |
| "step": 19260 |
| }, |
| { |
| "epoch": 727.5471698113207, |
| "grad_norm": 1.768116692306316, |
| "learning_rate": 3.070165488646371e-05, |
| "loss": 1.6516, |
| "step": 19280 |
| }, |
| { |
| "epoch": 728.3018867924528, |
| "grad_norm": 1.559057461009202, |
| "learning_rate": 3.063167650808307e-05, |
| "loss": 1.6616, |
| "step": 19300 |
| }, |
| { |
| "epoch": 729.0566037735849, |
| "grad_norm": 1.2888728962143756, |
| "learning_rate": 3.0561744029127636e-05, |
| "loss": 1.6574, |
| "step": 19320 |
| }, |
| { |
| "epoch": 729.811320754717, |
| "grad_norm": 1.2688734788741953, |
| "learning_rate": 3.049185772005353e-05, |
| "loss": 1.618, |
| "step": 19340 |
| }, |
| { |
| "epoch": 730.566037735849, |
| "grad_norm": 1.155730285013269, |
| "learning_rate": 3.0422017851138287e-05, |
| "loss": 1.6515, |
| "step": 19360 |
| }, |
| { |
| "epoch": 731.3207547169811, |
| "grad_norm": 1.7451043683696195, |
| "learning_rate": 3.0352224692479883e-05, |
| "loss": 1.6371, |
| "step": 19380 |
| }, |
| { |
| "epoch": 732.0754716981132, |
| "grad_norm": 1.526187340694129, |
| "learning_rate": 3.0282478513995598e-05, |
| "loss": 1.6523, |
| "step": 19400 |
| }, |
| { |
| "epoch": 732.8301886792453, |
| "grad_norm": 1.4075608712323138, |
| "learning_rate": 3.0212779585421064e-05, |
| "loss": 1.6335, |
| "step": 19420 |
| }, |
| { |
| "epoch": 733.5849056603773, |
| "grad_norm": 1.345293550699471, |
| "learning_rate": 3.0143128176309125e-05, |
| "loss": 1.6505, |
| "step": 19440 |
| }, |
| { |
| "epoch": 734.3396226415094, |
| "grad_norm": 1.3467855791600631, |
| "learning_rate": 3.007352455602892e-05, |
| "loss": 1.6591, |
| "step": 19460 |
| }, |
| { |
| "epoch": 735.0943396226415, |
| "grad_norm": 1.3667404544607202, |
| "learning_rate": 3.000396899376472e-05, |
| "loss": 1.6244, |
| "step": 19480 |
| }, |
| { |
| "epoch": 735.8490566037735, |
| "grad_norm": 1.2844014927173513, |
| "learning_rate": 2.9934461758514944e-05, |
| "loss": 1.6154, |
| "step": 19500 |
| }, |
| { |
| "epoch": 736.6037735849056, |
| "grad_norm": 1.46598947181564, |
| "learning_rate": 2.986500311909114e-05, |
| "loss": 1.6443, |
| "step": 19520 |
| }, |
| { |
| "epoch": 737.3584905660377, |
| "grad_norm": 1.2682755408237392, |
| "learning_rate": 2.9795593344116856e-05, |
| "loss": 1.6492, |
| "step": 19540 |
| }, |
| { |
| "epoch": 738.1132075471698, |
| "grad_norm": 1.4017683975117536, |
| "learning_rate": 2.972623270202674e-05, |
| "loss": 1.6614, |
| "step": 19560 |
| }, |
| { |
| "epoch": 738.8679245283018, |
| "grad_norm": 1.5142927604100354, |
| "learning_rate": 2.9656921461065357e-05, |
| "loss": 1.6357, |
| "step": 19580 |
| }, |
| { |
| "epoch": 739.622641509434, |
| "grad_norm": 1.2492564466728204, |
| "learning_rate": 2.958765988928627e-05, |
| "loss": 1.6468, |
| "step": 19600 |
| }, |
| { |
| "epoch": 740.377358490566, |
| "grad_norm": 1.4008655564779207, |
| "learning_rate": 2.951844825455089e-05, |
| "loss": 1.64, |
| "step": 19620 |
| }, |
| { |
| "epoch": 741.1320754716982, |
| "grad_norm": 1.2731601803567079, |
| "learning_rate": 2.944928682452759e-05, |
| "loss": 1.6324, |
| "step": 19640 |
| }, |
| { |
| "epoch": 741.8867924528302, |
| "grad_norm": 1.5569572939387173, |
| "learning_rate": 2.9380175866690493e-05, |
| "loss": 1.6368, |
| "step": 19660 |
| }, |
| { |
| "epoch": 742.6415094339623, |
| "grad_norm": 1.3215892057968033, |
| "learning_rate": 2.9311115648318603e-05, |
| "loss": 1.5918, |
| "step": 19680 |
| }, |
| { |
| "epoch": 743.3962264150944, |
| "grad_norm": 1.301974969557669, |
| "learning_rate": 2.924210643649462e-05, |
| "loss": 1.625, |
| "step": 19700 |
| }, |
| { |
| "epoch": 744.1509433962265, |
| "grad_norm": 1.245601615853851, |
| "learning_rate": 2.917314849810405e-05, |
| "loss": 1.6436, |
| "step": 19720 |
| }, |
| { |
| "epoch": 744.9056603773585, |
| "grad_norm": 1.4218013050424188, |
| "learning_rate": 2.9104242099834047e-05, |
| "loss": 1.633, |
| "step": 19740 |
| }, |
| { |
| "epoch": 745.6603773584906, |
| "grad_norm": 1.584425316406802, |
| "learning_rate": 2.9035387508172488e-05, |
| "loss": 1.654, |
| "step": 19760 |
| }, |
| { |
| "epoch": 746.4150943396227, |
| "grad_norm": 1.237326080185327, |
| "learning_rate": 2.896658498940685e-05, |
| "loss": 1.6417, |
| "step": 19780 |
| }, |
| { |
| "epoch": 747.1698113207547, |
| "grad_norm": 1.336327325511772, |
| "learning_rate": 2.8897834809623266e-05, |
| "loss": 1.6278, |
| "step": 19800 |
| }, |
| { |
| "epoch": 747.9245283018868, |
| "grad_norm": 1.3731531069367304, |
| "learning_rate": 2.8829137234705436e-05, |
| "loss": 1.6339, |
| "step": 19820 |
| }, |
| { |
| "epoch": 748.6792452830189, |
| "grad_norm": 1.4396961322439583, |
| "learning_rate": 2.8760492530333595e-05, |
| "loss": 1.6132, |
| "step": 19840 |
| }, |
| { |
| "epoch": 749.433962264151, |
| "grad_norm": 1.4566587475130242, |
| "learning_rate": 2.869190096198354e-05, |
| "loss": 1.6236, |
| "step": 19860 |
| }, |
| { |
| "epoch": 750.188679245283, |
| "grad_norm": 1.3378675846922892, |
| "learning_rate": 2.8623362794925554e-05, |
| "loss": 1.6407, |
| "step": 19880 |
| }, |
| { |
| "epoch": 750.9433962264151, |
| "grad_norm": 1.1248642736382553, |
| "learning_rate": 2.85548782942234e-05, |
| "loss": 1.6328, |
| "step": 19900 |
| }, |
| { |
| "epoch": 751.6981132075472, |
| "grad_norm": 1.281060533625914, |
| "learning_rate": 2.8486447724733283e-05, |
| "loss": 1.6288, |
| "step": 19920 |
| }, |
| { |
| "epoch": 752.4528301886793, |
| "grad_norm": 1.2477580789710936, |
| "learning_rate": 2.841807135110286e-05, |
| "loss": 1.6129, |
| "step": 19940 |
| }, |
| { |
| "epoch": 753.2075471698113, |
| "grad_norm": 1.3050801379092132, |
| "learning_rate": 2.8349749437770146e-05, |
| "loss": 1.6259, |
| "step": 19960 |
| }, |
| { |
| "epoch": 753.9622641509434, |
| "grad_norm": 1.6556396088385372, |
| "learning_rate": 2.8281482248962588e-05, |
| "loss": 1.6264, |
| "step": 19980 |
| }, |
| { |
| "epoch": 754.7169811320755, |
| "grad_norm": 1.442836668716919, |
| "learning_rate": 2.8213270048695976e-05, |
| "loss": 1.6286, |
| "step": 20000 |
| }, |
| { |
| "epoch": 755.4716981132076, |
| "grad_norm": 1.3276233208619523, |
| "learning_rate": 2.814511310077342e-05, |
| "loss": 1.6485, |
| "step": 20020 |
| }, |
| { |
| "epoch": 756.2264150943396, |
| "grad_norm": 1.2751456415696178, |
| "learning_rate": 2.807701166878436e-05, |
| "loss": 1.622, |
| "step": 20040 |
| }, |
| { |
| "epoch": 756.9811320754717, |
| "grad_norm": 1.2003976158870355, |
| "learning_rate": 2.8008966016103532e-05, |
| "loss": 1.6002, |
| "step": 20060 |
| }, |
| { |
| "epoch": 757.7358490566038, |
| "grad_norm": 1.3873947326300384, |
| "learning_rate": 2.7940976405889962e-05, |
| "loss": 1.5892, |
| "step": 20080 |
| }, |
| { |
| "epoch": 758.4905660377359, |
| "grad_norm": 1.6648131685984493, |
| "learning_rate": 2.787304310108591e-05, |
| "loss": 1.6496, |
| "step": 20100 |
| }, |
| { |
| "epoch": 759.2452830188679, |
| "grad_norm": 1.4092462550250433, |
| "learning_rate": 2.780516636441591e-05, |
| "loss": 1.6222, |
| "step": 20120 |
| }, |
| { |
| "epoch": 760.0, |
| "grad_norm": 1.3221797397344044, |
| "learning_rate": 2.7737346458385732e-05, |
| "loss": 1.6276, |
| "step": 20140 |
| }, |
| { |
| "epoch": 760.7547169811321, |
| "grad_norm": 1.2328101453363856, |
| "learning_rate": 2.766958364528132e-05, |
| "loss": 1.6199, |
| "step": 20160 |
| }, |
| { |
| "epoch": 761.5094339622641, |
| "grad_norm": 1.198723191362267, |
| "learning_rate": 2.7601878187167865e-05, |
| "loss": 1.6028, |
| "step": 20180 |
| }, |
| { |
| "epoch": 762.2641509433962, |
| "grad_norm": 1.9424131363478752, |
| "learning_rate": 2.7534230345888686e-05, |
| "loss": 1.6155, |
| "step": 20200 |
| }, |
| { |
| "epoch": 763.0188679245283, |
| "grad_norm": 1.3568601924624037, |
| "learning_rate": 2.7466640383064343e-05, |
| "loss": 1.615, |
| "step": 20220 |
| }, |
| { |
| "epoch": 763.7735849056604, |
| "grad_norm": 1.6734295204532768, |
| "learning_rate": 2.7399108560091492e-05, |
| "loss": 1.6127, |
| "step": 20240 |
| }, |
| { |
| "epoch": 764.5283018867924, |
| "grad_norm": 1.3054727154474908, |
| "learning_rate": 2.7331635138141997e-05, |
| "loss": 1.6121, |
| "step": 20260 |
| }, |
| { |
| "epoch": 765.2830188679245, |
| "grad_norm": 1.4085434131191898, |
| "learning_rate": 2.7264220378161817e-05, |
| "loss": 1.5995, |
| "step": 20280 |
| }, |
| { |
| "epoch": 766.0377358490566, |
| "grad_norm": 1.2882798163186127, |
| "learning_rate": 2.719686454087006e-05, |
| "loss": 1.6209, |
| "step": 20300 |
| }, |
| { |
| "epoch": 766.7924528301887, |
| "grad_norm": 1.3843343328010425, |
| "learning_rate": 2.712956788675799e-05, |
| "loss": 1.6253, |
| "step": 20320 |
| }, |
| { |
| "epoch": 767.5471698113207, |
| "grad_norm": 1.2235858453276647, |
| "learning_rate": 2.7062330676087928e-05, |
| "loss": 1.5965, |
| "step": 20340 |
| }, |
| { |
| "epoch": 768.3018867924528, |
| "grad_norm": 3.572459256976869, |
| "learning_rate": 2.6995153168892342e-05, |
| "loss": 1.6146, |
| "step": 20360 |
| }, |
| { |
| "epoch": 769.0566037735849, |
| "grad_norm": 1.6994398915504043, |
| "learning_rate": 2.692803562497278e-05, |
| "loss": 1.6034, |
| "step": 20380 |
| }, |
| { |
| "epoch": 769.811320754717, |
| "grad_norm": 1.2122097844602269, |
| "learning_rate": 2.6860978303898913e-05, |
| "loss": 1.6133, |
| "step": 20400 |
| }, |
| { |
| "epoch": 770.566037735849, |
| "grad_norm": 1.5836773539567761, |
| "learning_rate": 2.6793981465007477e-05, |
| "loss": 1.6149, |
| "step": 20420 |
| }, |
| { |
| "epoch": 771.3207547169811, |
| "grad_norm": 1.9577284294586506, |
| "learning_rate": 2.6727045367401357e-05, |
| "loss": 1.6038, |
| "step": 20440 |
| }, |
| { |
| "epoch": 772.0754716981132, |
| "grad_norm": 1.453554282623515, |
| "learning_rate": 2.6660170269948445e-05, |
| "loss": 1.6425, |
| "step": 20460 |
| }, |
| { |
| "epoch": 772.8301886792453, |
| "grad_norm": 1.3031139346537821, |
| "learning_rate": 2.65933564312808e-05, |
| "loss": 1.5996, |
| "step": 20480 |
| }, |
| { |
| "epoch": 773.5849056603773, |
| "grad_norm": 1.2921513380534098, |
| "learning_rate": 2.6526604109793517e-05, |
| "loss": 1.6097, |
| "step": 20500 |
| }, |
| { |
| "epoch": 774.3396226415094, |
| "grad_norm": 1.2706024142950736, |
| "learning_rate": 2.6459913563643797e-05, |
| "loss": 1.6151, |
| "step": 20520 |
| }, |
| { |
| "epoch": 775.0943396226415, |
| "grad_norm": 1.3170228555500274, |
| "learning_rate": 2.6393285050749948e-05, |
| "loss": 1.6117, |
| "step": 20540 |
| }, |
| { |
| "epoch": 775.8490566037735, |
| "grad_norm": 1.4811497809397014, |
| "learning_rate": 2.6326718828790347e-05, |
| "loss": 1.6065, |
| "step": 20560 |
| }, |
| { |
| "epoch": 776.6037735849056, |
| "grad_norm": 1.3171774089155976, |
| "learning_rate": 2.6260215155202478e-05, |
| "loss": 1.5846, |
| "step": 20580 |
| }, |
| { |
| "epoch": 777.3584905660377, |
| "grad_norm": 1.4867958271178354, |
| "learning_rate": 2.6193774287181905e-05, |
| "loss": 1.6182, |
| "step": 20600 |
| }, |
| { |
| "epoch": 778.1132075471698, |
| "grad_norm": 1.485845846341643, |
| "learning_rate": 2.612739648168134e-05, |
| "loss": 1.618, |
| "step": 20620 |
| }, |
| { |
| "epoch": 778.8679245283018, |
| "grad_norm": 1.3411546152150449, |
| "learning_rate": 2.6061081995409594e-05, |
| "loss": 1.5979, |
| "step": 20640 |
| }, |
| { |
| "epoch": 779.622641509434, |
| "grad_norm": 1.352180099861608, |
| "learning_rate": 2.5994831084830585e-05, |
| "loss": 1.607, |
| "step": 20660 |
| }, |
| { |
| "epoch": 780.377358490566, |
| "grad_norm": 1.4317453454675355, |
| "learning_rate": 2.5928644006162356e-05, |
| "loss": 1.63, |
| "step": 20680 |
| }, |
| { |
| "epoch": 781.1320754716982, |
| "grad_norm": 1.3910753254665948, |
| "learning_rate": 2.5862521015376083e-05, |
| "loss": 1.6066, |
| "step": 20700 |
| }, |
| { |
| "epoch": 781.8867924528302, |
| "grad_norm": 1.3073180519851255, |
| "learning_rate": 2.579646236819513e-05, |
| "loss": 1.6064, |
| "step": 20720 |
| }, |
| { |
| "epoch": 782.6415094339623, |
| "grad_norm": 1.2595549167905473, |
| "learning_rate": 2.5730468320093977e-05, |
| "loss": 1.5911, |
| "step": 20740 |
| }, |
| { |
| "epoch": 783.3962264150944, |
| "grad_norm": 1.2678103789921547, |
| "learning_rate": 2.566453912629729e-05, |
| "loss": 1.5817, |
| "step": 20760 |
| }, |
| { |
| "epoch": 784.1509433962265, |
| "grad_norm": 1.5236492215060178, |
| "learning_rate": 2.5598675041778895e-05, |
| "loss": 1.6007, |
| "step": 20780 |
| }, |
| { |
| "epoch": 784.9056603773585, |
| "grad_norm": 1.4661421776894412, |
| "learning_rate": 2.553287632126086e-05, |
| "loss": 1.5504, |
| "step": 20800 |
| }, |
| { |
| "epoch": 785.6603773584906, |
| "grad_norm": 2.1927891520328635, |
| "learning_rate": 2.5467143219212452e-05, |
| "loss": 1.5841, |
| "step": 20820 |
| }, |
| { |
| "epoch": 786.4150943396227, |
| "grad_norm": 1.3795117819084444, |
| "learning_rate": 2.5401475989849135e-05, |
| "loss": 1.6066, |
| "step": 20840 |
| }, |
| { |
| "epoch": 787.1698113207547, |
| "grad_norm": 1.4556438165329462, |
| "learning_rate": 2.5335874887131648e-05, |
| "loss": 1.5968, |
| "step": 20860 |
| }, |
| { |
| "epoch": 787.9245283018868, |
| "grad_norm": 1.4073316916031215, |
| "learning_rate": 2.5270340164764954e-05, |
| "loss": 1.5903, |
| "step": 20880 |
| }, |
| { |
| "epoch": 788.6792452830189, |
| "grad_norm": 1.519045194155026, |
| "learning_rate": 2.5204872076197373e-05, |
| "loss": 1.6143, |
| "step": 20900 |
| }, |
| { |
| "epoch": 789.433962264151, |
| "grad_norm": 1.371854180935982, |
| "learning_rate": 2.513947087461945e-05, |
| "loss": 1.5956, |
| "step": 20920 |
| }, |
| { |
| "epoch": 790.188679245283, |
| "grad_norm": 1.3445443793198255, |
| "learning_rate": 2.5074136812963086e-05, |
| "loss": 1.6161, |
| "step": 20940 |
| }, |
| { |
| "epoch": 790.9433962264151, |
| "grad_norm": 1.3427364962397694, |
| "learning_rate": 2.5008870143900505e-05, |
| "loss": 1.5568, |
| "step": 20960 |
| }, |
| { |
| "epoch": 791.6981132075472, |
| "grad_norm": 1.2656549996025988, |
| "learning_rate": 2.4943671119843328e-05, |
| "loss": 1.5955, |
| "step": 20980 |
| }, |
| { |
| "epoch": 792.4528301886793, |
| "grad_norm": 1.4205258402430134, |
| "learning_rate": 2.4878539992941564e-05, |
| "loss": 1.5806, |
| "step": 21000 |
| }, |
| { |
| "epoch": 793.2075471698113, |
| "grad_norm": 1.6035321030423435, |
| "learning_rate": 2.4813477015082614e-05, |
| "loss": 1.6141, |
| "step": 21020 |
| }, |
| { |
| "epoch": 793.9622641509434, |
| "grad_norm": 1.411461969155631, |
| "learning_rate": 2.4748482437890327e-05, |
| "loss": 1.613, |
| "step": 21040 |
| }, |
| { |
| "epoch": 794.7169811320755, |
| "grad_norm": 1.5232357865305386, |
| "learning_rate": 2.4683556512724013e-05, |
| "loss": 1.5999, |
| "step": 21060 |
| }, |
| { |
| "epoch": 795.4716981132076, |
| "grad_norm": 1.423060839013135, |
| "learning_rate": 2.4618699490677522e-05, |
| "loss": 1.6014, |
| "step": 21080 |
| }, |
| { |
| "epoch": 796.2264150943396, |
| "grad_norm": 1.3310370009240546, |
| "learning_rate": 2.4553911622578173e-05, |
| "loss": 1.5633, |
| "step": 21100 |
| }, |
| { |
| "epoch": 796.9811320754717, |
| "grad_norm": 1.5449295211536895, |
| "learning_rate": 2.4489193158985862e-05, |
| "loss": 1.5948, |
| "step": 21120 |
| }, |
| { |
| "epoch": 797.7358490566038, |
| "grad_norm": 1.4953862144554202, |
| "learning_rate": 2.4424544350192054e-05, |
| "loss": 1.5576, |
| "step": 21140 |
| }, |
| { |
| "epoch": 798.4905660377359, |
| "grad_norm": 1.4322654299272977, |
| "learning_rate": 2.4359965446218893e-05, |
| "loss": 1.6043, |
| "step": 21160 |
| }, |
| { |
| "epoch": 799.2452830188679, |
| "grad_norm": 1.230403444648656, |
| "learning_rate": 2.4295456696818116e-05, |
| "loss": 1.5875, |
| "step": 21180 |
| }, |
| { |
| "epoch": 800.0, |
| "grad_norm": 1.2803680639521113, |
| "learning_rate": 2.423101835147014e-05, |
| "loss": 1.5929, |
| "step": 21200 |
| }, |
| { |
| "epoch": 800.7547169811321, |
| "grad_norm": 1.3170298641719804, |
| "learning_rate": 2.4166650659383118e-05, |
| "loss": 1.5807, |
| "step": 21220 |
| }, |
| { |
| "epoch": 801.5094339622641, |
| "grad_norm": 1.6269742919477346, |
| "learning_rate": 2.410235386949199e-05, |
| "loss": 1.6065, |
| "step": 21240 |
| }, |
| { |
| "epoch": 802.2641509433962, |
| "grad_norm": 1.5458337442207868, |
| "learning_rate": 2.4038128230457458e-05, |
| "loss": 1.5717, |
| "step": 21260 |
| }, |
| { |
| "epoch": 803.0188679245283, |
| "grad_norm": 2.161638931230412, |
| "learning_rate": 2.3973973990665043e-05, |
| "loss": 1.5762, |
| "step": 21280 |
| }, |
| { |
| "epoch": 803.7735849056604, |
| "grad_norm": 1.4046972399313973, |
| "learning_rate": 2.3909891398224146e-05, |
| "loss": 1.5661, |
| "step": 21300 |
| }, |
| { |
| "epoch": 804.5283018867924, |
| "grad_norm": 1.35050834441664, |
| "learning_rate": 2.3845880700967103e-05, |
| "loss": 1.5706, |
| "step": 21320 |
| }, |
| { |
| "epoch": 805.2830188679245, |
| "grad_norm": 1.5896148693041472, |
| "learning_rate": 2.3781942146448204e-05, |
| "loss": 1.5729, |
| "step": 21340 |
| }, |
| { |
| "epoch": 806.0377358490566, |
| "grad_norm": 1.5191801749378997, |
| "learning_rate": 2.3718075981942708e-05, |
| "loss": 1.5602, |
| "step": 21360 |
| }, |
| { |
| "epoch": 806.7924528301887, |
| "grad_norm": 1.1849456023631704, |
| "learning_rate": 2.3654282454445914e-05, |
| "loss": 1.5577, |
| "step": 21380 |
| }, |
| { |
| "epoch": 807.5471698113207, |
| "grad_norm": 1.6435607024595327, |
| "learning_rate": 2.3590561810672222e-05, |
| "loss": 1.5806, |
| "step": 21400 |
| }, |
| { |
| "epoch": 808.3018867924528, |
| "grad_norm": 2.2764964715817153, |
| "learning_rate": 2.3526914297054165e-05, |
| "loss": 1.5465, |
| "step": 21420 |
| }, |
| { |
| "epoch": 809.0566037735849, |
| "grad_norm": 1.143095634553467, |
| "learning_rate": 2.3463340159741438e-05, |
| "loss": 1.5608, |
| "step": 21440 |
| }, |
| { |
| "epoch": 809.811320754717, |
| "grad_norm": 1.786405965035776, |
| "learning_rate": 2.3399839644599966e-05, |
| "loss": 1.5685, |
| "step": 21460 |
| }, |
| { |
| "epoch": 810.566037735849, |
| "grad_norm": 1.7826688318895536, |
| "learning_rate": 2.3336412997210945e-05, |
| "loss": 1.5673, |
| "step": 21480 |
| }, |
| { |
| "epoch": 811.3207547169811, |
| "grad_norm": 1.9973557900265262, |
| "learning_rate": 2.3273060462869915e-05, |
| "loss": 1.58, |
| "step": 21500 |
| }, |
| { |
| "epoch": 812.0754716981132, |
| "grad_norm": 2.725826480276118, |
| "learning_rate": 2.320978228658578e-05, |
| "loss": 1.5798, |
| "step": 21520 |
| }, |
| { |
| "epoch": 812.8301886792453, |
| "grad_norm": 1.3785885365626125, |
| "learning_rate": 2.3146578713079873e-05, |
| "loss": 1.584, |
| "step": 21540 |
| }, |
| { |
| "epoch": 813.5849056603773, |
| "grad_norm": 1.3942585361528321, |
| "learning_rate": 2.308344998678499e-05, |
| "loss": 1.5801, |
| "step": 21560 |
| }, |
| { |
| "epoch": 814.3396226415094, |
| "grad_norm": 1.3778634108496939, |
| "learning_rate": 2.3020396351844476e-05, |
| "loss": 1.587, |
| "step": 21580 |
| }, |
| { |
| "epoch": 815.0943396226415, |
| "grad_norm": 1.3226870132637325, |
| "learning_rate": 2.2957418052111304e-05, |
| "loss": 1.5666, |
| "step": 21600 |
| }, |
| { |
| "epoch": 815.8490566037735, |
| "grad_norm": 1.4483636324260303, |
| "learning_rate": 2.2894515331147043e-05, |
| "loss": 1.5721, |
| "step": 21620 |
| }, |
| { |
| "epoch": 816.6037735849056, |
| "grad_norm": 1.2510733452498213, |
| "learning_rate": 2.2831688432220988e-05, |
| "loss": 1.5909, |
| "step": 21640 |
| }, |
| { |
| "epoch": 817.3584905660377, |
| "grad_norm": 1.4064726551185514, |
| "learning_rate": 2.2768937598309226e-05, |
| "loss": 1.5581, |
| "step": 21660 |
| }, |
| { |
| "epoch": 818.1132075471698, |
| "grad_norm": 1.3598025710319712, |
| "learning_rate": 2.2706263072093622e-05, |
| "loss": 1.5798, |
| "step": 21680 |
| }, |
| { |
| "epoch": 818.8679245283018, |
| "grad_norm": 1.4055798705480538, |
| "learning_rate": 2.2643665095960992e-05, |
| "loss": 1.5376, |
| "step": 21700 |
| }, |
| { |
| "epoch": 819.622641509434, |
| "grad_norm": 1.3886244820387288, |
| "learning_rate": 2.258114391200204e-05, |
| "loss": 1.588, |
| "step": 21720 |
| }, |
| { |
| "epoch": 820.377358490566, |
| "grad_norm": 1.672353853467523, |
| "learning_rate": 2.2518699762010527e-05, |
| "loss": 1.5771, |
| "step": 21740 |
| }, |
| { |
| "epoch": 821.1320754716982, |
| "grad_norm": 1.6122695109482281, |
| "learning_rate": 2.245633288748226e-05, |
| "loss": 1.5744, |
| "step": 21760 |
| }, |
| { |
| "epoch": 821.8867924528302, |
| "grad_norm": 1.2184243938930763, |
| "learning_rate": 2.239404352961424e-05, |
| "loss": 1.5579, |
| "step": 21780 |
| }, |
| { |
| "epoch": 822.6415094339623, |
| "grad_norm": 2.6739030707563383, |
| "learning_rate": 2.233183192930362e-05, |
| "loss": 1.5742, |
| "step": 21800 |
| }, |
| { |
| "epoch": 823.3962264150944, |
| "grad_norm": 1.513583565471533, |
| "learning_rate": 2.22696983271469e-05, |
| "loss": 1.5543, |
| "step": 21820 |
| }, |
| { |
| "epoch": 824.1509433962265, |
| "grad_norm": 1.5062076381870015, |
| "learning_rate": 2.2207642963438875e-05, |
| "loss": 1.5578, |
| "step": 21840 |
| }, |
| { |
| "epoch": 824.9056603773585, |
| "grad_norm": 1.4515191009181103, |
| "learning_rate": 2.2145666078171794e-05, |
| "loss": 1.5599, |
| "step": 21860 |
| }, |
| { |
| "epoch": 825.6603773584906, |
| "grad_norm": 1.7800885670540134, |
| "learning_rate": 2.2083767911034394e-05, |
| "loss": 1.5724, |
| "step": 21880 |
| }, |
| { |
| "epoch": 826.4150943396227, |
| "grad_norm": 1.655570469233021, |
| "learning_rate": 2.2021948701410956e-05, |
| "loss": 1.5722, |
| "step": 21900 |
| }, |
| { |
| "epoch": 827.1698113207547, |
| "grad_norm": 1.682338091450034, |
| "learning_rate": 2.1960208688380426e-05, |
| "loss": 1.5289, |
| "step": 21920 |
| }, |
| { |
| "epoch": 827.9245283018868, |
| "grad_norm": 1.3769805944636337, |
| "learning_rate": 2.189854811071546e-05, |
| "loss": 1.5523, |
| "step": 21940 |
| }, |
| { |
| "epoch": 828.6792452830189, |
| "grad_norm": 1.2988448014856364, |
| "learning_rate": 2.183696720688152e-05, |
| "loss": 1.5493, |
| "step": 21960 |
| }, |
| { |
| "epoch": 829.433962264151, |
| "grad_norm": 1.352528590030774, |
| "learning_rate": 2.1775466215035887e-05, |
| "loss": 1.5505, |
| "step": 21980 |
| }, |
| { |
| "epoch": 830.188679245283, |
| "grad_norm": 1.9587571716355492, |
| "learning_rate": 2.1714045373026878e-05, |
| "loss": 1.5611, |
| "step": 22000 |
| }, |
| { |
| "epoch": 830.9433962264151, |
| "grad_norm": 1.4092678213797292, |
| "learning_rate": 2.165270491839274e-05, |
| "loss": 1.5799, |
| "step": 22020 |
| }, |
| { |
| "epoch": 831.6981132075472, |
| "grad_norm": 1.2980309483736483, |
| "learning_rate": 2.159144508836092e-05, |
| "loss": 1.5409, |
| "step": 22040 |
| }, |
| { |
| "epoch": 832.4528301886793, |
| "grad_norm": 2.367411086569801, |
| "learning_rate": 2.1530266119847e-05, |
| "loss": 1.5565, |
| "step": 22060 |
| }, |
| { |
| "epoch": 833.2075471698113, |
| "grad_norm": 1.4677294354247894, |
| "learning_rate": 2.146916824945386e-05, |
| "loss": 1.567, |
| "step": 22080 |
| }, |
| { |
| "epoch": 833.9622641509434, |
| "grad_norm": 1.2034171336508228, |
| "learning_rate": 2.1408151713470727e-05, |
| "loss": 1.5324, |
| "step": 22100 |
| }, |
| { |
| "epoch": 834.7169811320755, |
| "grad_norm": 1.6112142759671855, |
| "learning_rate": 2.1347216747872316e-05, |
| "loss": 1.5728, |
| "step": 22120 |
| }, |
| { |
| "epoch": 835.4716981132076, |
| "grad_norm": 2.3612009460762025, |
| "learning_rate": 2.1286363588317815e-05, |
| "loss": 1.5777, |
| "step": 22140 |
| }, |
| { |
| "epoch": 836.2264150943396, |
| "grad_norm": 1.3794177780422423, |
| "learning_rate": 2.122559247015011e-05, |
| "loss": 1.5337, |
| "step": 22160 |
| }, |
| { |
| "epoch": 836.9811320754717, |
| "grad_norm": 1.4913217058342938, |
| "learning_rate": 2.116490362839475e-05, |
| "loss": 1.5712, |
| "step": 22180 |
| }, |
| { |
| "epoch": 837.7358490566038, |
| "grad_norm": 1.393269094002593, |
| "learning_rate": 2.1104297297759077e-05, |
| "loss": 1.56, |
| "step": 22200 |
| }, |
| { |
| "epoch": 838.4905660377359, |
| "grad_norm": 1.5277254368751014, |
| "learning_rate": 2.104377371263138e-05, |
| "loss": 1.564, |
| "step": 22220 |
| }, |
| { |
| "epoch": 839.2452830188679, |
| "grad_norm": 1.8220574387124733, |
| "learning_rate": 2.0983333107079923e-05, |
| "loss": 1.593, |
| "step": 22240 |
| }, |
| { |
| "epoch": 840.0, |
| "grad_norm": 1.4636327213867844, |
| "learning_rate": 2.0922975714852024e-05, |
| "loss": 1.5482, |
| "step": 22260 |
| }, |
| { |
| "epoch": 840.7547169811321, |
| "grad_norm": 1.374724993121681, |
| "learning_rate": 2.0862701769373194e-05, |
| "loss": 1.5386, |
| "step": 22280 |
| }, |
| { |
| "epoch": 841.5094339622641, |
| "grad_norm": 1.3056844963466483, |
| "learning_rate": 2.0802511503746282e-05, |
| "loss": 1.5499, |
| "step": 22300 |
| }, |
| { |
| "epoch": 842.2641509433962, |
| "grad_norm": 1.8941001751457995, |
| "learning_rate": 2.074240515075041e-05, |
| "loss": 1.5556, |
| "step": 22320 |
| }, |
| { |
| "epoch": 843.0188679245283, |
| "grad_norm": 1.5811456544096827, |
| "learning_rate": 2.0682382942840276e-05, |
| "loss": 1.5301, |
| "step": 22340 |
| }, |
| { |
| "epoch": 843.7735849056604, |
| "grad_norm": 1.6509929914813097, |
| "learning_rate": 2.062244511214511e-05, |
| "loss": 1.5114, |
| "step": 22360 |
| }, |
| { |
| "epoch": 844.5283018867924, |
| "grad_norm": 1.7262725135545645, |
| "learning_rate": 2.0562591890467795e-05, |
| "loss": 1.5771, |
| "step": 22380 |
| }, |
| { |
| "epoch": 845.2830188679245, |
| "grad_norm": 2.3494461416325176, |
| "learning_rate": 2.050282350928407e-05, |
| "loss": 1.5355, |
| "step": 22400 |
| }, |
| { |
| "epoch": 846.0377358490566, |
| "grad_norm": 1.5449531783263548, |
| "learning_rate": 2.0443140199741506e-05, |
| "loss": 1.5322, |
| "step": 22420 |
| }, |
| { |
| "epoch": 846.7924528301887, |
| "grad_norm": 1.6993440968380624, |
| "learning_rate": 2.0383542192658678e-05, |
| "loss": 1.5595, |
| "step": 22440 |
| }, |
| { |
| "epoch": 847.5471698113207, |
| "grad_norm": 1.4219970620295765, |
| "learning_rate": 2.0324029718524266e-05, |
| "loss": 1.544, |
| "step": 22460 |
| }, |
| { |
| "epoch": 848.3018867924528, |
| "grad_norm": 1.4581628071481192, |
| "learning_rate": 2.0264603007496174e-05, |
| "loss": 1.5504, |
| "step": 22480 |
| }, |
| { |
| "epoch": 849.0566037735849, |
| "grad_norm": 1.7218288706081564, |
| "learning_rate": 2.0205262289400635e-05, |
| "loss": 1.5329, |
| "step": 22500 |
| }, |
| { |
| "epoch": 849.811320754717, |
| "grad_norm": 1.557573117936356, |
| "learning_rate": 2.0146007793731277e-05, |
| "loss": 1.5413, |
| "step": 22520 |
| }, |
| { |
| "epoch": 850.566037735849, |
| "grad_norm": 1.556424340318002, |
| "learning_rate": 2.0086839749648294e-05, |
| "loss": 1.585, |
| "step": 22540 |
| }, |
| { |
| "epoch": 851.3207547169811, |
| "grad_norm": 1.5130697235799593, |
| "learning_rate": 2.002775838597753e-05, |
| "loss": 1.5365, |
| "step": 22560 |
| }, |
| { |
| "epoch": 852.0754716981132, |
| "grad_norm": 1.8393652727073544, |
| "learning_rate": 1.9968763931209628e-05, |
| "loss": 1.5459, |
| "step": 22580 |
| }, |
| { |
| "epoch": 852.8301886792453, |
| "grad_norm": 1.5587158507011118, |
| "learning_rate": 1.9909856613499096e-05, |
| "loss": 1.5429, |
| "step": 22600 |
| }, |
| { |
| "epoch": 853.5849056603773, |
| "grad_norm": 1.5786253886757977, |
| "learning_rate": 1.9851036660663427e-05, |
| "loss": 1.5293, |
| "step": 22620 |
| }, |
| { |
| "epoch": 854.3396226415094, |
| "grad_norm": 1.6955187366248636, |
| "learning_rate": 1.9792304300182305e-05, |
| "loss": 1.5488, |
| "step": 22640 |
| }, |
| { |
| "epoch": 855.0943396226415, |
| "grad_norm": 1.429545844614554, |
| "learning_rate": 1.9733659759196588e-05, |
| "loss": 1.533, |
| "step": 22660 |
| }, |
| { |
| "epoch": 855.8490566037735, |
| "grad_norm": 1.3624588099774164, |
| "learning_rate": 1.967510326450757e-05, |
| "loss": 1.5257, |
| "step": 22680 |
| }, |
| { |
| "epoch": 856.6037735849056, |
| "grad_norm": 1.4701659884745055, |
| "learning_rate": 1.9616635042575986e-05, |
| "loss": 1.5579, |
| "step": 22700 |
| }, |
| { |
| "epoch": 857.3584905660377, |
| "grad_norm": 1.258458227155755, |
| "learning_rate": 1.9558255319521186e-05, |
| "loss": 1.5174, |
| "step": 22720 |
| }, |
| { |
| "epoch": 858.1132075471698, |
| "grad_norm": 1.147380018733113, |
| "learning_rate": 1.9499964321120298e-05, |
| "loss": 1.5483, |
| "step": 22740 |
| }, |
| { |
| "epoch": 858.8679245283018, |
| "grad_norm": 1.427160544906616, |
| "learning_rate": 1.9441762272807296e-05, |
| "loss": 1.53, |
| "step": 22760 |
| }, |
| { |
| "epoch": 859.622641509434, |
| "grad_norm": 1.480555621655005, |
| "learning_rate": 1.9383649399672136e-05, |
| "loss": 1.5431, |
| "step": 22780 |
| }, |
| { |
| "epoch": 860.377358490566, |
| "grad_norm": 1.6140763796883943, |
| "learning_rate": 1.9325625926459906e-05, |
| "loss": 1.5372, |
| "step": 22800 |
| }, |
| { |
| "epoch": 861.1320754716982, |
| "grad_norm": 1.3659868727706357, |
| "learning_rate": 1.9267692077569966e-05, |
| "loss": 1.5693, |
| "step": 22820 |
| }, |
| { |
| "epoch": 861.8867924528302, |
| "grad_norm": 1.668704322839176, |
| "learning_rate": 1.9209848077055063e-05, |
| "loss": 1.5491, |
| "step": 22840 |
| }, |
| { |
| "epoch": 862.6415094339623, |
| "grad_norm": 1.6416845244091214, |
| "learning_rate": 1.915209414862045e-05, |
| "loss": 1.5449, |
| "step": 22860 |
| }, |
| { |
| "epoch": 863.3962264150944, |
| "grad_norm": 1.5619688603918687, |
| "learning_rate": 1.9094430515623036e-05, |
| "loss": 1.5109, |
| "step": 22880 |
| }, |
| { |
| "epoch": 864.1509433962265, |
| "grad_norm": 1.5251429637162535, |
| "learning_rate": 1.9036857401070517e-05, |
| "loss": 1.5358, |
| "step": 22900 |
| }, |
| { |
| "epoch": 864.9056603773585, |
| "grad_norm": 1.6195136008209567, |
| "learning_rate": 1.8979375027620553e-05, |
| "loss": 1.5167, |
| "step": 22920 |
| }, |
| { |
| "epoch": 865.6603773584906, |
| "grad_norm": 1.4453402703839808, |
| "learning_rate": 1.8921983617579843e-05, |
| "loss": 1.5345, |
| "step": 22940 |
| }, |
| { |
| "epoch": 866.4150943396227, |
| "grad_norm": 1.6142287693511135, |
| "learning_rate": 1.8864683392903296e-05, |
| "loss": 1.5427, |
| "step": 22960 |
| }, |
| { |
| "epoch": 867.1698113207547, |
| "grad_norm": 1.4589091367603184, |
| "learning_rate": 1.880747457519317e-05, |
| "loss": 1.4945, |
| "step": 22980 |
| }, |
| { |
| "epoch": 867.9245283018868, |
| "grad_norm": 1.485668957375296, |
| "learning_rate": 1.8750357385698233e-05, |
| "loss": 1.5278, |
| "step": 23000 |
| }, |
| { |
| "epoch": 868.6792452830189, |
| "grad_norm": 1.4865684774055008, |
| "learning_rate": 1.8693332045312905e-05, |
| "loss": 1.5178, |
| "step": 23020 |
| }, |
| { |
| "epoch": 869.433962264151, |
| "grad_norm": 1.6955473002125137, |
| "learning_rate": 1.8636398774576337e-05, |
| "loss": 1.5485, |
| "step": 23040 |
| }, |
| { |
| "epoch": 870.188679245283, |
| "grad_norm": 1.5715186186512253, |
| "learning_rate": 1.857955779367166e-05, |
| "loss": 1.5192, |
| "step": 23060 |
| }, |
| { |
| "epoch": 870.9433962264151, |
| "grad_norm": 1.5717069835325073, |
| "learning_rate": 1.8522809322425036e-05, |
| "loss": 1.5106, |
| "step": 23080 |
| }, |
| { |
| "epoch": 871.6981132075472, |
| "grad_norm": 1.3775027498551788, |
| "learning_rate": 1.8466153580304923e-05, |
| "loss": 1.5255, |
| "step": 23100 |
| }, |
| { |
| "epoch": 872.4528301886793, |
| "grad_norm": 1.7060704667189681, |
| "learning_rate": 1.8409590786421106e-05, |
| "loss": 1.5152, |
| "step": 23120 |
| }, |
| { |
| "epoch": 873.2075471698113, |
| "grad_norm": 1.3772746674273528, |
| "learning_rate": 1.8353121159523913e-05, |
| "loss": 1.4952, |
| "step": 23140 |
| }, |
| { |
| "epoch": 873.9622641509434, |
| "grad_norm": 1.6021480905291907, |
| "learning_rate": 1.8296744918003365e-05, |
| "loss": 1.5548, |
| "step": 23160 |
| }, |
| { |
| "epoch": 874.7169811320755, |
| "grad_norm": 1.6510954563611369, |
| "learning_rate": 1.8240462279888328e-05, |
| "loss": 1.5341, |
| "step": 23180 |
| }, |
| { |
| "epoch": 875.4716981132076, |
| "grad_norm": 1.5525128595509998, |
| "learning_rate": 1.8184273462845678e-05, |
| "loss": 1.5399, |
| "step": 23200 |
| }, |
| { |
| "epoch": 876.2264150943396, |
| "grad_norm": 1.3584051699815205, |
| "learning_rate": 1.812817868417943e-05, |
| "loss": 1.5245, |
| "step": 23220 |
| }, |
| { |
| "epoch": 876.9811320754717, |
| "grad_norm": 1.909931733744526, |
| "learning_rate": 1.8072178160829906e-05, |
| "loss": 1.5333, |
| "step": 23240 |
| }, |
| { |
| "epoch": 877.7358490566038, |
| "grad_norm": 1.7102569423853409, |
| "learning_rate": 1.8016272109372925e-05, |
| "loss": 1.5131, |
| "step": 23260 |
| }, |
| { |
| "epoch": 878.4905660377359, |
| "grad_norm": 2.4326218341752384, |
| "learning_rate": 1.7960460746018958e-05, |
| "loss": 1.4983, |
| "step": 23280 |
| }, |
| { |
| "epoch": 879.2452830188679, |
| "grad_norm": 1.6888708257619338, |
| "learning_rate": 1.790474428661225e-05, |
| "loss": 1.5268, |
| "step": 23300 |
| }, |
| { |
| "epoch": 880.0, |
| "grad_norm": 1.4793278776392822, |
| "learning_rate": 1.784912294663003e-05, |
| "loss": 1.5144, |
| "step": 23320 |
| }, |
| { |
| "epoch": 880.7547169811321, |
| "grad_norm": 1.3797110952325906, |
| "learning_rate": 1.7793596941181667e-05, |
| "loss": 1.5224, |
| "step": 23340 |
| }, |
| { |
| "epoch": 881.5094339622641, |
| "grad_norm": 1.5055338530715117, |
| "learning_rate": 1.7738166485007843e-05, |
| "loss": 1.5276, |
| "step": 23360 |
| }, |
| { |
| "epoch": 882.2641509433962, |
| "grad_norm": 1.3850071229139178, |
| "learning_rate": 1.768283179247969e-05, |
| "loss": 1.5216, |
| "step": 23380 |
| }, |
| { |
| "epoch": 883.0188679245283, |
| "grad_norm": 1.4681066166997387, |
| "learning_rate": 1.7627593077597997e-05, |
| "loss": 1.534, |
| "step": 23400 |
| }, |
| { |
| "epoch": 883.7735849056604, |
| "grad_norm": 1.5242995737679692, |
| "learning_rate": 1.7572450553992356e-05, |
| "loss": 1.4992, |
| "step": 23420 |
| }, |
| { |
| "epoch": 884.5283018867924, |
| "grad_norm": 1.642787390621851, |
| "learning_rate": 1.751740443492039e-05, |
| "loss": 1.5002, |
| "step": 23440 |
| }, |
| { |
| "epoch": 885.2830188679245, |
| "grad_norm": 1.490074296578881, |
| "learning_rate": 1.7462454933266846e-05, |
| "loss": 1.5211, |
| "step": 23460 |
| }, |
| { |
| "epoch": 886.0377358490566, |
| "grad_norm": 1.5694629977285655, |
| "learning_rate": 1.740760226154283e-05, |
| "loss": 1.5335, |
| "step": 23480 |
| }, |
| { |
| "epoch": 886.7924528301887, |
| "grad_norm": 1.6846894322403163, |
| "learning_rate": 1.7352846631884956e-05, |
| "loss": 1.4995, |
| "step": 23500 |
| }, |
| { |
| "epoch": 887.5471698113207, |
| "grad_norm": 1.4525398790667088, |
| "learning_rate": 1.7298188256054564e-05, |
| "loss": 1.4957, |
| "step": 23520 |
| }, |
| { |
| "epoch": 888.3018867924528, |
| "grad_norm": 1.635106498771857, |
| "learning_rate": 1.7243627345436874e-05, |
| "loss": 1.5271, |
| "step": 23540 |
| }, |
| { |
| "epoch": 889.0566037735849, |
| "grad_norm": 1.4587656230559394, |
| "learning_rate": 1.7189164111040147e-05, |
| "loss": 1.501, |
| "step": 23560 |
| }, |
| { |
| "epoch": 889.811320754717, |
| "grad_norm": 1.5410070982779924, |
| "learning_rate": 1.71347987634949e-05, |
| "loss": 1.4982, |
| "step": 23580 |
| }, |
| { |
| "epoch": 890.566037735849, |
| "grad_norm": 1.5645035336411055, |
| "learning_rate": 1.708053151305308e-05, |
| "loss": 1.5002, |
| "step": 23600 |
| }, |
| { |
| "epoch": 891.3207547169811, |
| "grad_norm": 1.3307742805961782, |
| "learning_rate": 1.702636256958728e-05, |
| "loss": 1.5184, |
| "step": 23620 |
| }, |
| { |
| "epoch": 892.0754716981132, |
| "grad_norm": 1.6962843737118656, |
| "learning_rate": 1.6972292142589877e-05, |
| "loss": 1.5107, |
| "step": 23640 |
| }, |
| { |
| "epoch": 892.8301886792453, |
| "grad_norm": 1.8950680189724871, |
| "learning_rate": 1.6918320441172233e-05, |
| "loss": 1.517, |
| "step": 23660 |
| }, |
| { |
| "epoch": 893.5849056603773, |
| "grad_norm": 1.7479434721374532, |
| "learning_rate": 1.686444767406395e-05, |
| "loss": 1.5051, |
| "step": 23680 |
| }, |
| { |
| "epoch": 894.3396226415094, |
| "grad_norm": 1.8611101959164753, |
| "learning_rate": 1.6810674049611953e-05, |
| "loss": 1.5063, |
| "step": 23700 |
| }, |
| { |
| "epoch": 895.0943396226415, |
| "grad_norm": 1.5841028344361991, |
| "learning_rate": 1.67569997757798e-05, |
| "loss": 1.481, |
| "step": 23720 |
| }, |
| { |
| "epoch": 895.8490566037735, |
| "grad_norm": 1.5025051335412982, |
| "learning_rate": 1.6703425060146778e-05, |
| "loss": 1.5253, |
| "step": 23740 |
| }, |
| { |
| "epoch": 896.6037735849056, |
| "grad_norm": 2.8439948944917757, |
| "learning_rate": 1.6649950109907165e-05, |
| "loss": 1.5216, |
| "step": 23760 |
| }, |
| { |
| "epoch": 897.3584905660377, |
| "grad_norm": 1.6268608502019901, |
| "learning_rate": 1.6596575131869387e-05, |
| "loss": 1.5334, |
| "step": 23780 |
| }, |
| { |
| "epoch": 898.1132075471698, |
| "grad_norm": 1.4759450457116179, |
| "learning_rate": 1.6543300332455273e-05, |
| "loss": 1.5007, |
| "step": 23800 |
| }, |
| { |
| "epoch": 898.8679245283018, |
| "grad_norm": 1.4818248018036755, |
| "learning_rate": 1.6490125917699203e-05, |
| "loss": 1.4973, |
| "step": 23820 |
| }, |
| { |
| "epoch": 899.622641509434, |
| "grad_norm": 1.548616527993675, |
| "learning_rate": 1.6437052093247303e-05, |
| "loss": 1.517, |
| "step": 23840 |
| }, |
| { |
| "epoch": 900.377358490566, |
| "grad_norm": 1.5445734121981956, |
| "learning_rate": 1.6384079064356744e-05, |
| "loss": 1.521, |
| "step": 23860 |
| }, |
| { |
| "epoch": 901.1320754716982, |
| "grad_norm": 1.5970555623190617, |
| "learning_rate": 1.6331207035894806e-05, |
| "loss": 1.5172, |
| "step": 23880 |
| }, |
| { |
| "epoch": 901.8867924528302, |
| "grad_norm": 1.389904429038452, |
| "learning_rate": 1.6278436212338226e-05, |
| "loss": 1.4987, |
| "step": 23900 |
| }, |
| { |
| "epoch": 902.6415094339623, |
| "grad_norm": 1.3455191149235926, |
| "learning_rate": 1.62257667977723e-05, |
| "loss": 1.5047, |
| "step": 23920 |
| }, |
| { |
| "epoch": 903.3962264150944, |
| "grad_norm": 1.4729168638466097, |
| "learning_rate": 1.6173198995890152e-05, |
| "loss": 1.5032, |
| "step": 23940 |
| }, |
| { |
| "epoch": 904.1509433962265, |
| "grad_norm": 1.5230989764955487, |
| "learning_rate": 1.612073300999191e-05, |
| "loss": 1.5244, |
| "step": 23960 |
| }, |
| { |
| "epoch": 904.9056603773585, |
| "grad_norm": 1.4504907356107584, |
| "learning_rate": 1.6068369042983987e-05, |
| "loss": 1.5072, |
| "step": 23980 |
| }, |
| { |
| "epoch": 905.6603773584906, |
| "grad_norm": 1.3570035581449431, |
| "learning_rate": 1.601610729737819e-05, |
| "loss": 1.5002, |
| "step": 24000 |
| }, |
| { |
| "epoch": 906.4150943396227, |
| "grad_norm": 1.408532335123701, |
| "learning_rate": 1.5963947975291056e-05, |
| "loss": 1.4974, |
| "step": 24020 |
| }, |
| { |
| "epoch": 907.1698113207547, |
| "grad_norm": 1.6703383627319723, |
| "learning_rate": 1.591189127844295e-05, |
| "loss": 1.5056, |
| "step": 24040 |
| }, |
| { |
| "epoch": 907.9245283018868, |
| "grad_norm": 1.4548307957349456, |
| "learning_rate": 1.5859937408157403e-05, |
| "loss": 1.4836, |
| "step": 24060 |
| }, |
| { |
| "epoch": 908.6792452830189, |
| "grad_norm": 1.622725332424491, |
| "learning_rate": 1.5808086565360235e-05, |
| "loss": 1.4652, |
| "step": 24080 |
| }, |
| { |
| "epoch": 909.433962264151, |
| "grad_norm": 1.9382762093036214, |
| "learning_rate": 1.575633895057883e-05, |
| "loss": 1.507, |
| "step": 24100 |
| }, |
| { |
| "epoch": 910.188679245283, |
| "grad_norm": 5.171486198720905, |
| "learning_rate": 1.5704694763941345e-05, |
| "loss": 1.4918, |
| "step": 24120 |
| }, |
| { |
| "epoch": 910.9433962264151, |
| "grad_norm": 1.318697524518072, |
| "learning_rate": 1.5653154205175963e-05, |
| "loss": 1.485, |
| "step": 24140 |
| }, |
| { |
| "epoch": 911.6981132075472, |
| "grad_norm": 1.640456368314345, |
| "learning_rate": 1.5601717473610066e-05, |
| "loss": 1.493, |
| "step": 24160 |
| }, |
| { |
| "epoch": 912.4528301886793, |
| "grad_norm": 1.7783411819352481, |
| "learning_rate": 1.555038476816951e-05, |
| "loss": 1.5233, |
| "step": 24180 |
| }, |
| { |
| "epoch": 913.2075471698113, |
| "grad_norm": 1.8560943552673308, |
| "learning_rate": 1.5499156287377857e-05, |
| "loss": 1.4845, |
| "step": 24200 |
| }, |
| { |
| "epoch": 913.9622641509434, |
| "grad_norm": 1.3922157561757162, |
| "learning_rate": 1.544803222935555e-05, |
| "loss": 1.513, |
| "step": 24220 |
| }, |
| { |
| "epoch": 914.7169811320755, |
| "grad_norm": 1.5964166307266414, |
| "learning_rate": 1.5397012791819248e-05, |
| "loss": 1.5029, |
| "step": 24240 |
| }, |
| { |
| "epoch": 915.4716981132076, |
| "grad_norm": 1.581271765982569, |
| "learning_rate": 1.5346098172080947e-05, |
| "loss": 1.5139, |
| "step": 24260 |
| }, |
| { |
| "epoch": 916.2264150943396, |
| "grad_norm": 1.3829789961056094, |
| "learning_rate": 1.5295288567047304e-05, |
| "loss": 1.4727, |
| "step": 24280 |
| }, |
| { |
| "epoch": 916.9811320754717, |
| "grad_norm": 1.595484488791353, |
| "learning_rate": 1.5244584173218816e-05, |
| "loss": 1.4764, |
| "step": 24300 |
| }, |
| { |
| "epoch": 917.7358490566038, |
| "grad_norm": 1.9817110984943331, |
| "learning_rate": 1.5193985186689126e-05, |
| "loss": 1.488, |
| "step": 24320 |
| }, |
| { |
| "epoch": 918.4905660377359, |
| "grad_norm": 1.5041365073617188, |
| "learning_rate": 1.5143491803144183e-05, |
| "loss": 1.4823, |
| "step": 24340 |
| }, |
| { |
| "epoch": 919.2452830188679, |
| "grad_norm": 1.623717820636255, |
| "learning_rate": 1.5093104217861574e-05, |
| "loss": 1.4711, |
| "step": 24360 |
| }, |
| { |
| "epoch": 920.0, |
| "grad_norm": 1.4153896302283269, |
| "learning_rate": 1.5042822625709687e-05, |
| "loss": 1.4729, |
| "step": 24380 |
| }, |
| { |
| "epoch": 920.7547169811321, |
| "grad_norm": 1.8914526627670851, |
| "learning_rate": 1.499264722114699e-05, |
| "loss": 1.4744, |
| "step": 24400 |
| }, |
| { |
| "epoch": 921.5094339622641, |
| "grad_norm": 1.3579367015171855, |
| "learning_rate": 1.494257819822132e-05, |
| "loss": 1.5068, |
| "step": 24420 |
| }, |
| { |
| "epoch": 922.2641509433962, |
| "grad_norm": 1.7241565511209502, |
| "learning_rate": 1.4892615750569062e-05, |
| "loss": 1.4629, |
| "step": 24440 |
| }, |
| { |
| "epoch": 923.0188679245283, |
| "grad_norm": 1.6169769566812962, |
| "learning_rate": 1.4842760071414446e-05, |
| "loss": 1.4987, |
| "step": 24460 |
| }, |
| { |
| "epoch": 923.7735849056604, |
| "grad_norm": 1.9954016377464863, |
| "learning_rate": 1.4793011353568764e-05, |
| "loss": 1.5263, |
| "step": 24480 |
| }, |
| { |
| "epoch": 924.5283018867924, |
| "grad_norm": 1.4779174235189176, |
| "learning_rate": 1.4743369789429686e-05, |
| "loss": 1.4769, |
| "step": 24500 |
| }, |
| { |
| "epoch": 925.2830188679245, |
| "grad_norm": 1.7019641943900714, |
| "learning_rate": 1.4693835570980468e-05, |
| "loss": 1.4749, |
| "step": 24520 |
| }, |
| { |
| "epoch": 926.0377358490566, |
| "grad_norm": 1.5323014302848716, |
| "learning_rate": 1.4644408889789189e-05, |
| "loss": 1.4984, |
| "step": 24540 |
| }, |
| { |
| "epoch": 926.7924528301887, |
| "grad_norm": 1.446942162217049, |
| "learning_rate": 1.4595089937008062e-05, |
| "loss": 1.4998, |
| "step": 24560 |
| }, |
| { |
| "epoch": 927.5471698113207, |
| "grad_norm": 1.3609927181175356, |
| "learning_rate": 1.4545878903372663e-05, |
| "loss": 1.4765, |
| "step": 24580 |
| }, |
| { |
| "epoch": 928.3018867924528, |
| "grad_norm": 1.4584582755904496, |
| "learning_rate": 1.4496775979201224e-05, |
| "loss": 1.4828, |
| "step": 24600 |
| }, |
| { |
| "epoch": 929.0566037735849, |
| "grad_norm": 1.4254389674669559, |
| "learning_rate": 1.444778135439385e-05, |
| "loss": 1.5041, |
| "step": 24620 |
| }, |
| { |
| "epoch": 929.811320754717, |
| "grad_norm": 1.5655038573484212, |
| "learning_rate": 1.4398895218431825e-05, |
| "loss": 1.4995, |
| "step": 24640 |
| }, |
| { |
| "epoch": 930.566037735849, |
| "grad_norm": 1.623569066402965, |
| "learning_rate": 1.4350117760376843e-05, |
| "loss": 1.4966, |
| "step": 24660 |
| }, |
| { |
| "epoch": 931.3207547169811, |
| "grad_norm": 1.594778698950599, |
| "learning_rate": 1.4301449168870325e-05, |
| "loss": 1.4899, |
| "step": 24680 |
| }, |
| { |
| "epoch": 932.0754716981132, |
| "grad_norm": 1.7627482209727463, |
| "learning_rate": 1.4252889632132667e-05, |
| "loss": 1.4784, |
| "step": 24700 |
| }, |
| { |
| "epoch": 932.8301886792453, |
| "grad_norm": 1.5595702425460922, |
| "learning_rate": 1.4204439337962486e-05, |
| "loss": 1.4962, |
| "step": 24720 |
| }, |
| { |
| "epoch": 933.5849056603773, |
| "grad_norm": 1.6175712268221147, |
| "learning_rate": 1.4156098473735903e-05, |
| "loss": 1.4858, |
| "step": 24740 |
| }, |
| { |
| "epoch": 934.3396226415094, |
| "grad_norm": 1.5528087670883148, |
| "learning_rate": 1.4107867226405882e-05, |
| "loss": 1.4959, |
| "step": 24760 |
| }, |
| { |
| "epoch": 935.0943396226415, |
| "grad_norm": 1.5105693139489524, |
| "learning_rate": 1.4059745782501403e-05, |
| "loss": 1.4694, |
| "step": 24780 |
| }, |
| { |
| "epoch": 935.8490566037735, |
| "grad_norm": 1.424625384350829, |
| "learning_rate": 1.4011734328126825e-05, |
| "loss": 1.4531, |
| "step": 24800 |
| }, |
| { |
| "epoch": 936.6037735849056, |
| "grad_norm": 1.921412092336305, |
| "learning_rate": 1.3963833048961103e-05, |
| "loss": 1.5003, |
| "step": 24820 |
| }, |
| { |
| "epoch": 937.3584905660377, |
| "grad_norm": 1.5289456190701718, |
| "learning_rate": 1.3916042130257145e-05, |
| "loss": 1.5177, |
| "step": 24840 |
| }, |
| { |
| "epoch": 938.1132075471698, |
| "grad_norm": 1.410017115369323, |
| "learning_rate": 1.3868361756841036e-05, |
| "loss": 1.4957, |
| "step": 24860 |
| }, |
| { |
| "epoch": 938.8679245283018, |
| "grad_norm": 1.3741594118478162, |
| "learning_rate": 1.3820792113111323e-05, |
| "loss": 1.4876, |
| "step": 24880 |
| }, |
| { |
| "epoch": 939.622641509434, |
| "grad_norm": 1.5111524219290895, |
| "learning_rate": 1.377333338303833e-05, |
| "loss": 1.4789, |
| "step": 24900 |
| }, |
| { |
| "epoch": 940.377358490566, |
| "grad_norm": 1.2690279082779223, |
| "learning_rate": 1.3725985750163418e-05, |
| "loss": 1.4851, |
| "step": 24920 |
| }, |
| { |
| "epoch": 941.1320754716982, |
| "grad_norm": 1.5760629816984877, |
| "learning_rate": 1.3678749397598337e-05, |
| "loss": 1.4993, |
| "step": 24940 |
| }, |
| { |
| "epoch": 941.8867924528302, |
| "grad_norm": 1.5719387109025893, |
| "learning_rate": 1.363162450802443e-05, |
| "loss": 1.4654, |
| "step": 24960 |
| }, |
| { |
| "epoch": 942.6415094339623, |
| "grad_norm": 1.51578687737706, |
| "learning_rate": 1.3584611263691974e-05, |
| "loss": 1.4985, |
| "step": 24980 |
| }, |
| { |
| "epoch": 943.3962264150944, |
| "grad_norm": 1.5864417766142165, |
| "learning_rate": 1.353770984641948e-05, |
| "loss": 1.4891, |
| "step": 25000 |
| }, |
| { |
| "epoch": 944.1509433962265, |
| "grad_norm": 1.5330683898736195, |
| "learning_rate": 1.3490920437592985e-05, |
| "loss": 1.4928, |
| "step": 25020 |
| }, |
| { |
| "epoch": 944.9056603773585, |
| "grad_norm": 1.8666313722767156, |
| "learning_rate": 1.344424321816535e-05, |
| "loss": 1.4558, |
| "step": 25040 |
| }, |
| { |
| "epoch": 945.6603773584906, |
| "grad_norm": 1.4103376741909914, |
| "learning_rate": 1.3397678368655534e-05, |
| "loss": 1.467, |
| "step": 25060 |
| }, |
| { |
| "epoch": 946.4150943396227, |
| "grad_norm": 1.6978974580611665, |
| "learning_rate": 1.3351226069147934e-05, |
| "loss": 1.4586, |
| "step": 25080 |
| }, |
| { |
| "epoch": 947.1698113207547, |
| "grad_norm": 1.3043741098462962, |
| "learning_rate": 1.3304886499291653e-05, |
| "loss": 1.4651, |
| "step": 25100 |
| }, |
| { |
| "epoch": 947.9245283018868, |
| "grad_norm": 1.5721530761043376, |
| "learning_rate": 1.3258659838299863e-05, |
| "loss": 1.4851, |
| "step": 25120 |
| }, |
| { |
| "epoch": 948.6792452830189, |
| "grad_norm": 2.445174125656233, |
| "learning_rate": 1.3212546264949038e-05, |
| "loss": 1.4861, |
| "step": 25140 |
| }, |
| { |
| "epoch": 949.433962264151, |
| "grad_norm": 3.0455557993861584, |
| "learning_rate": 1.3166545957578312e-05, |
| "loss": 1.4956, |
| "step": 25160 |
| }, |
| { |
| "epoch": 950.188679245283, |
| "grad_norm": 1.481231036001675, |
| "learning_rate": 1.3120659094088763e-05, |
| "loss": 1.4786, |
| "step": 25180 |
| }, |
| { |
| "epoch": 950.9433962264151, |
| "grad_norm": 1.6177001101633584, |
| "learning_rate": 1.3074885851942757e-05, |
| "loss": 1.4691, |
| "step": 25200 |
| }, |
| { |
| "epoch": 951.6981132075472, |
| "grad_norm": 1.7370265253795278, |
| "learning_rate": 1.3029226408163237e-05, |
| "loss": 1.456, |
| "step": 25220 |
| }, |
| { |
| "epoch": 952.4528301886793, |
| "grad_norm": 1.476098649785593, |
| "learning_rate": 1.2983680939333043e-05, |
| "loss": 1.457, |
| "step": 25240 |
| }, |
| { |
| "epoch": 953.2075471698113, |
| "grad_norm": 1.9700691780666086, |
| "learning_rate": 1.2938249621594219e-05, |
| "loss": 1.4916, |
| "step": 25260 |
| }, |
| { |
| "epoch": 953.9622641509434, |
| "grad_norm": 1.4124078828516038, |
| "learning_rate": 1.289293263064734e-05, |
| "loss": 1.4442, |
| "step": 25280 |
| }, |
| { |
| "epoch": 954.7169811320755, |
| "grad_norm": 1.609015057343637, |
| "learning_rate": 1.284773014175086e-05, |
| "loss": 1.4808, |
| "step": 25300 |
| }, |
| { |
| "epoch": 955.4716981132076, |
| "grad_norm": 1.545457288749583, |
| "learning_rate": 1.2802642329720385e-05, |
| "loss": 1.4388, |
| "step": 25320 |
| }, |
| { |
| "epoch": 956.2264150943396, |
| "grad_norm": 1.4137648487617847, |
| "learning_rate": 1.275766936892803e-05, |
| "loss": 1.4558, |
| "step": 25340 |
| }, |
| { |
| "epoch": 956.9811320754717, |
| "grad_norm": 1.7375121010804517, |
| "learning_rate": 1.2712811433301723e-05, |
| "loss": 1.4864, |
| "step": 25360 |
| }, |
| { |
| "epoch": 957.7358490566038, |
| "grad_norm": 2.170614678870875, |
| "learning_rate": 1.2668068696324572e-05, |
| "loss": 1.4668, |
| "step": 25380 |
| }, |
| { |
| "epoch": 958.4905660377359, |
| "grad_norm": 1.3921099231821001, |
| "learning_rate": 1.2623441331034153e-05, |
| "loss": 1.466, |
| "step": 25400 |
| }, |
| { |
| "epoch": 959.2452830188679, |
| "grad_norm": 1.763881906266782, |
| "learning_rate": 1.2578929510021851e-05, |
| "loss": 1.4556, |
| "step": 25420 |
| }, |
| { |
| "epoch": 960.0, |
| "grad_norm": 1.6251732366885816, |
| "learning_rate": 1.2534533405432192e-05, |
| "loss": 1.4831, |
| "step": 25440 |
| }, |
| { |
| "epoch": 960.7547169811321, |
| "grad_norm": 1.35568804382613, |
| "learning_rate": 1.2490253188962184e-05, |
| "loss": 1.4637, |
| "step": 25460 |
| }, |
| { |
| "epoch": 961.5094339622641, |
| "grad_norm": 1.5192686857357145, |
| "learning_rate": 1.2446089031860666e-05, |
| "loss": 1.5039, |
| "step": 25480 |
| }, |
| { |
| "epoch": 962.2641509433962, |
| "grad_norm": 1.645823339942095, |
| "learning_rate": 1.2402041104927622e-05, |
| "loss": 1.4643, |
| "step": 25500 |
| }, |
| { |
| "epoch": 963.0188679245283, |
| "grad_norm": 1.5266645922223165, |
| "learning_rate": 1.2358109578513502e-05, |
| "loss": 1.4609, |
| "step": 25520 |
| }, |
| { |
| "epoch": 963.7735849056604, |
| "grad_norm": 2.012096934939658, |
| "learning_rate": 1.2314294622518637e-05, |
| "loss": 1.4707, |
| "step": 25540 |
| }, |
| { |
| "epoch": 964.5283018867924, |
| "grad_norm": 1.6019652732905527, |
| "learning_rate": 1.227059640639251e-05, |
| "loss": 1.4624, |
| "step": 25560 |
| }, |
| { |
| "epoch": 965.2830188679245, |
| "grad_norm": 1.5459039987734797, |
| "learning_rate": 1.2227015099133119e-05, |
| "loss": 1.4462, |
| "step": 25580 |
| }, |
| { |
| "epoch": 966.0377358490566, |
| "grad_norm": 1.4581354369376407, |
| "learning_rate": 1.2183550869286346e-05, |
| "loss": 1.4602, |
| "step": 25600 |
| }, |
| { |
| "epoch": 966.7924528301887, |
| "grad_norm": 1.5627139982974774, |
| "learning_rate": 1.2140203884945257e-05, |
| "loss": 1.4558, |
| "step": 25620 |
| }, |
| { |
| "epoch": 967.5471698113207, |
| "grad_norm": 1.6163383081813927, |
| "learning_rate": 1.2096974313749544e-05, |
| "loss": 1.442, |
| "step": 25640 |
| }, |
| { |
| "epoch": 968.3018867924528, |
| "grad_norm": 1.4708485221948149, |
| "learning_rate": 1.2053862322884756e-05, |
| "loss": 1.4449, |
| "step": 25660 |
| }, |
| { |
| "epoch": 969.0566037735849, |
| "grad_norm": 1.457232110275896, |
| "learning_rate": 1.2010868079081735e-05, |
| "loss": 1.4714, |
| "step": 25680 |
| }, |
| { |
| "epoch": 969.811320754717, |
| "grad_norm": 2.130030633684405, |
| "learning_rate": 1.1967991748615972e-05, |
| "loss": 1.4672, |
| "step": 25700 |
| }, |
| { |
| "epoch": 970.566037735849, |
| "grad_norm": 1.6585416945015101, |
| "learning_rate": 1.1925233497306898e-05, |
| "loss": 1.4582, |
| "step": 25720 |
| }, |
| { |
| "epoch": 971.3207547169811, |
| "grad_norm": 1.696646559562477, |
| "learning_rate": 1.1882593490517333e-05, |
| "loss": 1.4616, |
| "step": 25740 |
| }, |
| { |
| "epoch": 972.0754716981132, |
| "grad_norm": 1.8347228047889477, |
| "learning_rate": 1.1840071893152767e-05, |
| "loss": 1.4412, |
| "step": 25760 |
| }, |
| { |
| "epoch": 972.8301886792453, |
| "grad_norm": 1.5105738469091443, |
| "learning_rate": 1.1797668869660753e-05, |
| "loss": 1.4476, |
| "step": 25780 |
| }, |
| { |
| "epoch": 973.5849056603773, |
| "grad_norm": 1.6402649798470197, |
| "learning_rate": 1.1755384584030287e-05, |
| "loss": 1.4458, |
| "step": 25800 |
| }, |
| { |
| "epoch": 974.3396226415094, |
| "grad_norm": 1.4580507747280478, |
| "learning_rate": 1.171321919979116e-05, |
| "loss": 1.4414, |
| "step": 25820 |
| }, |
| { |
| "epoch": 975.0943396226415, |
| "grad_norm": 1.8999226743757298, |
| "learning_rate": 1.1671172880013328e-05, |
| "loss": 1.4501, |
| "step": 25840 |
| }, |
| { |
| "epoch": 975.8490566037735, |
| "grad_norm": 1.3767670402035495, |
| "learning_rate": 1.1629245787306247e-05, |
| "loss": 1.4422, |
| "step": 25860 |
| }, |
| { |
| "epoch": 976.6037735849056, |
| "grad_norm": 1.3303378991562944, |
| "learning_rate": 1.158743808381832e-05, |
| "loss": 1.437, |
| "step": 25880 |
| }, |
| { |
| "epoch": 977.3584905660377, |
| "grad_norm": 1.5011235086965091, |
| "learning_rate": 1.1545749931236199e-05, |
| "loss": 1.4225, |
| "step": 25900 |
| }, |
| { |
| "epoch": 978.1132075471698, |
| "grad_norm": 1.7853875208460404, |
| "learning_rate": 1.1504181490784197e-05, |
| "loss": 1.4405, |
| "step": 25920 |
| }, |
| { |
| "epoch": 978.8679245283018, |
| "grad_norm": 1.4852022947554018, |
| "learning_rate": 1.1462732923223643e-05, |
| "loss": 1.4197, |
| "step": 25940 |
| }, |
| { |
| "epoch": 979.622641509434, |
| "grad_norm": 1.492057926353613, |
| "learning_rate": 1.1421404388852275e-05, |
| "loss": 1.4516, |
| "step": 25960 |
| }, |
| { |
| "epoch": 980.377358490566, |
| "grad_norm": 1.8767944270145316, |
| "learning_rate": 1.1380196047503614e-05, |
| "loss": 1.4613, |
| "step": 25980 |
| }, |
| { |
| "epoch": 981.1320754716982, |
| "grad_norm": 1.5723288438267475, |
| "learning_rate": 1.1339108058546365e-05, |
| "loss": 1.4636, |
| "step": 26000 |
| }, |
| { |
| "epoch": 981.8867924528302, |
| "grad_norm": 1.4572390965943247, |
| "learning_rate": 1.1298140580883752e-05, |
| "loss": 1.4291, |
| "step": 26020 |
| }, |
| { |
| "epoch": 982.6415094339623, |
| "grad_norm": 2.0340602707703566, |
| "learning_rate": 1.1257293772952971e-05, |
| "loss": 1.4342, |
| "step": 26040 |
| }, |
| { |
| "epoch": 983.3962264150944, |
| "grad_norm": 1.7563358001308935, |
| "learning_rate": 1.1216567792724513e-05, |
| "loss": 1.44, |
| "step": 26060 |
| }, |
| { |
| "epoch": 984.1509433962265, |
| "grad_norm": 1.7195863256249895, |
| "learning_rate": 1.1175962797701585e-05, |
| "loss": 1.473, |
| "step": 26080 |
| }, |
| { |
| "epoch": 984.9056603773585, |
| "grad_norm": 1.5325109929141458, |
| "learning_rate": 1.1135478944919515e-05, |
| "loss": 1.4537, |
| "step": 26100 |
| }, |
| { |
| "epoch": 985.6603773584906, |
| "grad_norm": 1.4246338183010563, |
| "learning_rate": 1.1095116390945116e-05, |
| "loss": 1.4576, |
| "step": 26120 |
| }, |
| { |
| "epoch": 986.4150943396227, |
| "grad_norm": 1.5264334254918077, |
| "learning_rate": 1.1054875291876081e-05, |
| "loss": 1.4355, |
| "step": 26140 |
| }, |
| { |
| "epoch": 987.1698113207547, |
| "grad_norm": 1.7871427472844674, |
| "learning_rate": 1.101475580334039e-05, |
| "loss": 1.4285, |
| "step": 26160 |
| }, |
| { |
| "epoch": 987.9245283018868, |
| "grad_norm": 1.628111810825388, |
| "learning_rate": 1.0974758080495742e-05, |
| "loss": 1.432, |
| "step": 26180 |
| }, |
| { |
| "epoch": 988.6792452830189, |
| "grad_norm": 1.6079918141380485, |
| "learning_rate": 1.0934882278028875e-05, |
| "loss": 1.473, |
| "step": 26200 |
| }, |
| { |
| "epoch": 989.433962264151, |
| "grad_norm": 1.9227955059143975, |
| "learning_rate": 1.0895128550155048e-05, |
| "loss": 1.4319, |
| "step": 26220 |
| }, |
| { |
| "epoch": 990.188679245283, |
| "grad_norm": 1.4777834491856459, |
| "learning_rate": 1.0855497050617383e-05, |
| "loss": 1.4715, |
| "step": 26240 |
| }, |
| { |
| "epoch": 990.9433962264151, |
| "grad_norm": 1.752347342407413, |
| "learning_rate": 1.0815987932686322e-05, |
| "loss": 1.4483, |
| "step": 26260 |
| }, |
| { |
| "epoch": 991.6981132075472, |
| "grad_norm": 1.7965242738400287, |
| "learning_rate": 1.0776601349158992e-05, |
| "loss": 1.445, |
| "step": 26280 |
| }, |
| { |
| "epoch": 992.4528301886793, |
| "grad_norm": 1.6880482866877031, |
| "learning_rate": 1.0737337452358643e-05, |
| "loss": 1.4289, |
| "step": 26300 |
| }, |
| { |
| "epoch": 993.2075471698113, |
| "grad_norm": 1.3587051959850933, |
| "learning_rate": 1.0698196394134027e-05, |
| "loss": 1.4248, |
| "step": 26320 |
| }, |
| { |
| "epoch": 993.9622641509434, |
| "grad_norm": 1.6893835419836905, |
| "learning_rate": 1.0659178325858868e-05, |
| "loss": 1.4593, |
| "step": 26340 |
| }, |
| { |
| "epoch": 994.7169811320755, |
| "grad_norm": 1.6372424305822535, |
| "learning_rate": 1.0620283398431196e-05, |
| "loss": 1.4248, |
| "step": 26360 |
| }, |
| { |
| "epoch": 995.4716981132076, |
| "grad_norm": 1.628959331603337, |
| "learning_rate": 1.0581511762272856e-05, |
| "loss": 1.459, |
| "step": 26380 |
| }, |
| { |
| "epoch": 996.2264150943396, |
| "grad_norm": 1.9899303146490552, |
| "learning_rate": 1.0542863567328837e-05, |
| "loss": 1.4608, |
| "step": 26400 |
| }, |
| { |
| "epoch": 996.9811320754717, |
| "grad_norm": 1.6980987241375505, |
| "learning_rate": 1.0504338963066745e-05, |
| "loss": 1.4489, |
| "step": 26420 |
| }, |
| { |
| "epoch": 997.7358490566038, |
| "grad_norm": 1.791483449843248, |
| "learning_rate": 1.0465938098476226e-05, |
| "loss": 1.4647, |
| "step": 26440 |
| }, |
| { |
| "epoch": 998.4905660377359, |
| "grad_norm": 1.3823874629634854, |
| "learning_rate": 1.0427661122068363e-05, |
| "loss": 1.431, |
| "step": 26460 |
| }, |
| { |
| "epoch": 999.2452830188679, |
| "grad_norm": 1.7547951381187532, |
| "learning_rate": 1.0389508181875114e-05, |
| "loss": 1.4374, |
| "step": 26480 |
| }, |
| { |
| "epoch": 1000.0, |
| "grad_norm": 1.6329317283212297, |
| "learning_rate": 1.035147942544874e-05, |
| "loss": 1.4436, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1000.7547169811321, |
| "grad_norm": 1.482848334089, |
| "learning_rate": 1.0313574999861255e-05, |
| "loss": 1.4263, |
| "step": 26520 |
| }, |
| { |
| "epoch": 1001.5094339622641, |
| "grad_norm": 1.4085297987389735, |
| "learning_rate": 1.027579505170381e-05, |
| "loss": 1.4423, |
| "step": 26540 |
| }, |
| { |
| "epoch": 1002.2641509433962, |
| "grad_norm": 1.586157768854042, |
| "learning_rate": 1.0238139727086178e-05, |
| "loss": 1.4289, |
| "step": 26560 |
| }, |
| { |
| "epoch": 1003.0188679245283, |
| "grad_norm": 1.4910507620311724, |
| "learning_rate": 1.020060917163614e-05, |
| "loss": 1.4555, |
| "step": 26580 |
| }, |
| { |
| "epoch": 1003.7735849056604, |
| "grad_norm": 1.7298473240434828, |
| "learning_rate": 1.0163203530498955e-05, |
| "loss": 1.4176, |
| "step": 26600 |
| }, |
| { |
| "epoch": 1004.5283018867924, |
| "grad_norm": 1.9395741512745615, |
| "learning_rate": 1.0125922948336813e-05, |
| "loss": 1.4297, |
| "step": 26620 |
| }, |
| { |
| "epoch": 1005.2830188679245, |
| "grad_norm": 1.3752095871887702, |
| "learning_rate": 1.0088767569328215e-05, |
| "loss": 1.4224, |
| "step": 26640 |
| }, |
| { |
| "epoch": 1006.0377358490566, |
| "grad_norm": 1.6566420053219757, |
| "learning_rate": 1.0051737537167479e-05, |
| "loss": 1.4416, |
| "step": 26660 |
| }, |
| { |
| "epoch": 1006.7924528301887, |
| "grad_norm": 1.8401842062612699, |
| "learning_rate": 1.001483299506413e-05, |
| "loss": 1.4406, |
| "step": 26680 |
| }, |
| { |
| "epoch": 1007.5471698113207, |
| "grad_norm": 1.5895021822365676, |
| "learning_rate": 9.978054085742407e-06, |
| "loss": 1.4104, |
| "step": 26700 |
| }, |
| { |
| "epoch": 1008.3018867924528, |
| "grad_norm": 1.5495688189805843, |
| "learning_rate": 9.941400951440674e-06, |
| "loss": 1.4446, |
| "step": 26720 |
| }, |
| { |
| "epoch": 1009.0566037735849, |
| "grad_norm": 1.6376917222270109, |
| "learning_rate": 9.904873733910852e-06, |
| "loss": 1.4023, |
| "step": 26740 |
| }, |
| { |
| "epoch": 1009.811320754717, |
| "grad_norm": 1.7729521919831477, |
| "learning_rate": 9.868472574417906e-06, |
| "loss": 1.4409, |
| "step": 26760 |
| }, |
| { |
| "epoch": 1010.566037735849, |
| "grad_norm": 1.5909106157325896, |
| "learning_rate": 9.832197613739278e-06, |
| "loss": 1.4284, |
| "step": 26780 |
| }, |
| { |
| "epoch": 1011.3207547169811, |
| "grad_norm": 1.5416992698357255, |
| "learning_rate": 9.79604899216437e-06, |
| "loss": 1.4165, |
| "step": 26800 |
| }, |
| { |
| "epoch": 1012.0754716981132, |
| "grad_norm": 1.7245150906399498, |
| "learning_rate": 9.760026849493962e-06, |
| "loss": 1.4281, |
| "step": 26820 |
| }, |
| { |
| "epoch": 1012.8301886792453, |
| "grad_norm": 1.8518007110272525, |
| "learning_rate": 9.7241313250397e-06, |
| "loss": 1.4223, |
| "step": 26840 |
| }, |
| { |
| "epoch": 1013.5849056603773, |
| "grad_norm": 1.593106128312966, |
| "learning_rate": 9.688362557623527e-06, |
| "loss": 1.4377, |
| "step": 26860 |
| }, |
| { |
| "epoch": 1014.3396226415094, |
| "grad_norm": 1.6557177655883284, |
| "learning_rate": 9.6527206855772e-06, |
| "loss": 1.4394, |
| "step": 26880 |
| }, |
| { |
| "epoch": 1015.0943396226415, |
| "grad_norm": 1.5950355314495743, |
| "learning_rate": 9.617205846741719e-06, |
| "loss": 1.4506, |
| "step": 26900 |
| }, |
| { |
| "epoch": 1015.8490566037735, |
| "grad_norm": 1.7685274450403552, |
| "learning_rate": 9.58181817846677e-06, |
| "loss": 1.4484, |
| "step": 26920 |
| }, |
| { |
| "epoch": 1016.6037735849056, |
| "grad_norm": 1.4639040403309866, |
| "learning_rate": 9.54655781761023e-06, |
| "loss": 1.4043, |
| "step": 26940 |
| }, |
| { |
| "epoch": 1017.3584905660377, |
| "grad_norm": 1.6074583945207908, |
| "learning_rate": 9.511424900537656e-06, |
| "loss": 1.4197, |
| "step": 26960 |
| }, |
| { |
| "epoch": 1018.1132075471698, |
| "grad_norm": 1.5459146912367183, |
| "learning_rate": 9.476419563121698e-06, |
| "loss": 1.4232, |
| "step": 26980 |
| }, |
| { |
| "epoch": 1018.8679245283018, |
| "grad_norm": 1.6166722954994783, |
| "learning_rate": 9.441541940741613e-06, |
| "loss": 1.4407, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1019.622641509434, |
| "grad_norm": 1.6533674302686083, |
| "learning_rate": 9.406792168282739e-06, |
| "loss": 1.4393, |
| "step": 27020 |
| }, |
| { |
| "epoch": 1020.377358490566, |
| "grad_norm": 2.1409264555789123, |
| "learning_rate": 9.37217038013597e-06, |
| "loss": 1.4507, |
| "step": 27040 |
| }, |
| { |
| "epoch": 1021.1320754716982, |
| "grad_norm": 1.9876202106584275, |
| "learning_rate": 9.337676710197243e-06, |
| "loss": 1.4486, |
| "step": 27060 |
| }, |
| { |
| "epoch": 1021.8867924528302, |
| "grad_norm": 1.6321392819191982, |
| "learning_rate": 9.303311291866996e-06, |
| "loss": 1.4337, |
| "step": 27080 |
| }, |
| { |
| "epoch": 1022.6415094339623, |
| "grad_norm": 1.5614664744291826, |
| "learning_rate": 9.269074258049671e-06, |
| "loss": 1.4245, |
| "step": 27100 |
| }, |
| { |
| "epoch": 1023.3962264150944, |
| "grad_norm": 1.775529049395487, |
| "learning_rate": 9.234965741153195e-06, |
| "loss": 1.4284, |
| "step": 27120 |
| }, |
| { |
| "epoch": 1024.1509433962265, |
| "grad_norm": 1.4430739083306536, |
| "learning_rate": 9.200985873088487e-06, |
| "loss": 1.4235, |
| "step": 27140 |
| }, |
| { |
| "epoch": 1024.9056603773586, |
| "grad_norm": 2.0811882500763255, |
| "learning_rate": 9.167134785268918e-06, |
| "loss": 1.402, |
| "step": 27160 |
| }, |
| { |
| "epoch": 1025.6603773584907, |
| "grad_norm": 1.5403915703954525, |
| "learning_rate": 9.133412608609811e-06, |
| "loss": 1.4302, |
| "step": 27180 |
| }, |
| { |
| "epoch": 1026.4150943396226, |
| "grad_norm": 1.9685065156678565, |
| "learning_rate": 9.099819473527936e-06, |
| "loss": 1.3969, |
| "step": 27200 |
| }, |
| { |
| "epoch": 1027.1698113207547, |
| "grad_norm": 1.5336587010545035, |
| "learning_rate": 9.066355509941036e-06, |
| "loss": 1.428, |
| "step": 27220 |
| }, |
| { |
| "epoch": 1027.9245283018868, |
| "grad_norm": 1.9045363331404057, |
| "learning_rate": 9.033020847267277e-06, |
| "loss": 1.4521, |
| "step": 27240 |
| }, |
| { |
| "epoch": 1028.6792452830189, |
| "grad_norm": 1.7010720746106325, |
| "learning_rate": 8.999815614424768e-06, |
| "loss": 1.4408, |
| "step": 27260 |
| }, |
| { |
| "epoch": 1029.433962264151, |
| "grad_norm": 1.6652770284797922, |
| "learning_rate": 8.966739939831065e-06, |
| "loss": 1.4275, |
| "step": 27280 |
| }, |
| { |
| "epoch": 1030.188679245283, |
| "grad_norm": 1.438920885601344, |
| "learning_rate": 8.933793951402666e-06, |
| "loss": 1.4363, |
| "step": 27300 |
| }, |
| { |
| "epoch": 1030.9433962264152, |
| "grad_norm": 1.523374273868093, |
| "learning_rate": 8.900977776554543e-06, |
| "loss": 1.4178, |
| "step": 27320 |
| }, |
| { |
| "epoch": 1031.698113207547, |
| "grad_norm": 1.9388166404138083, |
| "learning_rate": 8.868291542199601e-06, |
| "loss": 1.4339, |
| "step": 27340 |
| }, |
| { |
| "epoch": 1032.4528301886792, |
| "grad_norm": 1.910046684059762, |
| "learning_rate": 8.835735374748235e-06, |
| "loss": 1.407, |
| "step": 27360 |
| }, |
| { |
| "epoch": 1033.2075471698113, |
| "grad_norm": 1.5548634820286755, |
| "learning_rate": 8.803309400107802e-06, |
| "loss": 1.4183, |
| "step": 27380 |
| }, |
| { |
| "epoch": 1033.9622641509434, |
| "grad_norm": 1.5932417218331991, |
| "learning_rate": 8.771013743682171e-06, |
| "loss": 1.4447, |
| "step": 27400 |
| }, |
| { |
| "epoch": 1034.7169811320755, |
| "grad_norm": 1.4796581852592556, |
| "learning_rate": 8.738848530371221e-06, |
| "loss": 1.3946, |
| "step": 27420 |
| }, |
| { |
| "epoch": 1035.4716981132076, |
| "grad_norm": 1.6106803868616077, |
| "learning_rate": 8.706813884570337e-06, |
| "loss": 1.4152, |
| "step": 27440 |
| }, |
| { |
| "epoch": 1036.2264150943397, |
| "grad_norm": 1.5383725584269896, |
| "learning_rate": 8.674909930169968e-06, |
| "loss": 1.4344, |
| "step": 27460 |
| }, |
| { |
| "epoch": 1036.9811320754718, |
| "grad_norm": 1.6971458233324348, |
| "learning_rate": 8.643136790555101e-06, |
| "loss": 1.42, |
| "step": 27480 |
| }, |
| { |
| "epoch": 1037.7358490566037, |
| "grad_norm": 1.7975384013574476, |
| "learning_rate": 8.61149458860486e-06, |
| "loss": 1.4456, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1038.4905660377358, |
| "grad_norm": 1.5540181334521903, |
| "learning_rate": 8.579983446691931e-06, |
| "loss": 1.3976, |
| "step": 27520 |
| }, |
| { |
| "epoch": 1039.245283018868, |
| "grad_norm": 1.7107813027346386, |
| "learning_rate": 8.548603486682165e-06, |
| "loss": 1.4119, |
| "step": 27540 |
| }, |
| { |
| "epoch": 1040.0, |
| "grad_norm": 1.7225563012589893, |
| "learning_rate": 8.517354829934086e-06, |
| "loss": 1.4347, |
| "step": 27560 |
| }, |
| { |
| "epoch": 1040.754716981132, |
| "grad_norm": 1.6396983385388997, |
| "learning_rate": 8.486237597298396e-06, |
| "loss": 1.4076, |
| "step": 27580 |
| }, |
| { |
| "epoch": 1041.5094339622642, |
| "grad_norm": 1.59607993020723, |
| "learning_rate": 8.455251909117562e-06, |
| "loss": 1.391, |
| "step": 27600 |
| }, |
| { |
| "epoch": 1042.2641509433963, |
| "grad_norm": 1.6787714792885464, |
| "learning_rate": 8.424397885225284e-06, |
| "loss": 1.4319, |
| "step": 27620 |
| }, |
| { |
| "epoch": 1043.0188679245282, |
| "grad_norm": 1.514103336557697, |
| "learning_rate": 8.39367564494608e-06, |
| "loss": 1.4282, |
| "step": 27640 |
| }, |
| { |
| "epoch": 1043.7735849056603, |
| "grad_norm": 1.6827281624065857, |
| "learning_rate": 8.3630853070948e-06, |
| "loss": 1.4268, |
| "step": 27660 |
| }, |
| { |
| "epoch": 1044.5283018867924, |
| "grad_norm": 1.5242384493420091, |
| "learning_rate": 8.332626989976201e-06, |
| "loss": 1.394, |
| "step": 27680 |
| }, |
| { |
| "epoch": 1045.2830188679245, |
| "grad_norm": 1.5477899241579378, |
| "learning_rate": 8.302300811384443e-06, |
| "loss": 1.4188, |
| "step": 27700 |
| }, |
| { |
| "epoch": 1046.0377358490566, |
| "grad_norm": 1.7533265453937938, |
| "learning_rate": 8.272106888602644e-06, |
| "loss": 1.4147, |
| "step": 27720 |
| }, |
| { |
| "epoch": 1046.7924528301887, |
| "grad_norm": 1.7810905836721207, |
| "learning_rate": 8.242045338402464e-06, |
| "loss": 1.4249, |
| "step": 27740 |
| }, |
| { |
| "epoch": 1047.5471698113208, |
| "grad_norm": 1.6994451629715164, |
| "learning_rate": 8.212116277043624e-06, |
| "loss": 1.4087, |
| "step": 27760 |
| }, |
| { |
| "epoch": 1048.301886792453, |
| "grad_norm": 1.5273771258038336, |
| "learning_rate": 8.18231982027344e-06, |
| "loss": 1.4105, |
| "step": 27780 |
| }, |
| { |
| "epoch": 1049.0566037735848, |
| "grad_norm": 1.7986470388936215, |
| "learning_rate": 8.15265608332641e-06, |
| "loss": 1.417, |
| "step": 27800 |
| }, |
| { |
| "epoch": 1049.811320754717, |
| "grad_norm": 3.7362962798847605, |
| "learning_rate": 8.123125180923732e-06, |
| "loss": 1.4428, |
| "step": 27820 |
| }, |
| { |
| "epoch": 1050.566037735849, |
| "grad_norm": 1.4871345729412693, |
| "learning_rate": 8.093727227272918e-06, |
| "loss": 1.3913, |
| "step": 27840 |
| }, |
| { |
| "epoch": 1051.3207547169811, |
| "grad_norm": 1.6862935331038202, |
| "learning_rate": 8.064462336067288e-06, |
| "loss": 1.4099, |
| "step": 27860 |
| }, |
| { |
| "epoch": 1052.0754716981132, |
| "grad_norm": 1.5729155867984972, |
| "learning_rate": 8.03533062048555e-06, |
| "loss": 1.3896, |
| "step": 27880 |
| }, |
| { |
| "epoch": 1052.8301886792453, |
| "grad_norm": 1.7312033654611378, |
| "learning_rate": 8.006332193191406e-06, |
| "loss": 1.4183, |
| "step": 27900 |
| }, |
| { |
| "epoch": 1053.5849056603774, |
| "grad_norm": 1.737310060702965, |
| "learning_rate": 7.977467166333041e-06, |
| "loss": 1.4098, |
| "step": 27920 |
| }, |
| { |
| "epoch": 1054.3396226415093, |
| "grad_norm": 1.787345801838152, |
| "learning_rate": 7.948735651542762e-06, |
| "loss": 1.4472, |
| "step": 27940 |
| }, |
| { |
| "epoch": 1055.0943396226414, |
| "grad_norm": 1.6643759736424013, |
| "learning_rate": 7.920137759936503e-06, |
| "loss": 1.4248, |
| "step": 27960 |
| }, |
| { |
| "epoch": 1055.8490566037735, |
| "grad_norm": 1.665184448890738, |
| "learning_rate": 7.891673602113444e-06, |
| "loss": 1.4184, |
| "step": 27980 |
| }, |
| { |
| "epoch": 1056.6037735849056, |
| "grad_norm": 1.4651905410431068, |
| "learning_rate": 7.863343288155553e-06, |
| "loss": 1.4117, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1057.3584905660377, |
| "grad_norm": 1.761583496091816, |
| "learning_rate": 7.835146927627195e-06, |
| "loss": 1.4173, |
| "step": 28020 |
| }, |
| { |
| "epoch": 1058.1132075471698, |
| "grad_norm": 1.4468036902445778, |
| "learning_rate": 7.807084629574648e-06, |
| "loss": 1.3899, |
| "step": 28040 |
| }, |
| { |
| "epoch": 1058.867924528302, |
| "grad_norm": 1.9317915574764288, |
| "learning_rate": 7.779156502525752e-06, |
| "loss": 1.4283, |
| "step": 28060 |
| }, |
| { |
| "epoch": 1059.622641509434, |
| "grad_norm": 1.6586645034969292, |
| "learning_rate": 7.751362654489442e-06, |
| "loss": 1.3729, |
| "step": 28080 |
| }, |
| { |
| "epoch": 1060.377358490566, |
| "grad_norm": 1.54736903517111, |
| "learning_rate": 7.72370319295533e-06, |
| "loss": 1.4323, |
| "step": 28100 |
| }, |
| { |
| "epoch": 1061.132075471698, |
| "grad_norm": 1.7410908156190221, |
| "learning_rate": 7.696178224893333e-06, |
| "loss": 1.4446, |
| "step": 28120 |
| }, |
| { |
| "epoch": 1061.8867924528302, |
| "grad_norm": 1.5846972848377703, |
| "learning_rate": 7.668787856753206e-06, |
| "loss": 1.4069, |
| "step": 28140 |
| }, |
| { |
| "epoch": 1062.6415094339623, |
| "grad_norm": 2.0032825052950005, |
| "learning_rate": 7.641532194464159e-06, |
| "loss": 1.4091, |
| "step": 28160 |
| }, |
| { |
| "epoch": 1063.3962264150944, |
| "grad_norm": 1.5526416600245057, |
| "learning_rate": 7.6144113434344445e-06, |
| "loss": 1.3988, |
| "step": 28180 |
| }, |
| { |
| "epoch": 1064.1509433962265, |
| "grad_norm": 1.6399869572854062, |
| "learning_rate": 7.587425408550953e-06, |
| "loss": 1.4317, |
| "step": 28200 |
| }, |
| { |
| "epoch": 1064.9056603773586, |
| "grad_norm": 2.218545819761043, |
| "learning_rate": 7.560574494178785e-06, |
| "loss": 1.4166, |
| "step": 28220 |
| }, |
| { |
| "epoch": 1065.6603773584907, |
| "grad_norm": 1.610893838079929, |
| "learning_rate": 7.5338587041608855e-06, |
| "loss": 1.4034, |
| "step": 28240 |
| }, |
| { |
| "epoch": 1066.4150943396226, |
| "grad_norm": 1.901849515787354, |
| "learning_rate": 7.507278141817603e-06, |
| "loss": 1.4082, |
| "step": 28260 |
| }, |
| { |
| "epoch": 1067.1698113207547, |
| "grad_norm": 1.9915752693535391, |
| "learning_rate": 7.4808329099463165e-06, |
| "loss": 1.4202, |
| "step": 28280 |
| }, |
| { |
| "epoch": 1067.9245283018868, |
| "grad_norm": 2.337231756702343, |
| "learning_rate": 7.454523110821034e-06, |
| "loss": 1.4033, |
| "step": 28300 |
| }, |
| { |
| "epoch": 1068.6792452830189, |
| "grad_norm": 1.4499700621594815, |
| "learning_rate": 7.428348846191982e-06, |
| "loss": 1.4106, |
| "step": 28320 |
| }, |
| { |
| "epoch": 1069.433962264151, |
| "grad_norm": 1.7981102056016145, |
| "learning_rate": 7.402310217285226e-06, |
| "loss": 1.4061, |
| "step": 28340 |
| }, |
| { |
| "epoch": 1070.188679245283, |
| "grad_norm": 1.7129433355903898, |
| "learning_rate": 7.376407324802275e-06, |
| "loss": 1.4019, |
| "step": 28360 |
| }, |
| { |
| "epoch": 1070.9433962264152, |
| "grad_norm": 1.5382026111028457, |
| "learning_rate": 7.350640268919691e-06, |
| "loss": 1.4197, |
| "step": 28380 |
| }, |
| { |
| "epoch": 1071.698113207547, |
| "grad_norm": 1.7225324354326523, |
| "learning_rate": 7.325009149288721e-06, |
| "loss": 1.4061, |
| "step": 28400 |
| }, |
| { |
| "epoch": 1072.4528301886792, |
| "grad_norm": 1.9701222408661871, |
| "learning_rate": 7.299514065034864e-06, |
| "loss": 1.399, |
| "step": 28420 |
| }, |
| { |
| "epoch": 1073.2075471698113, |
| "grad_norm": 2.560013262107365, |
| "learning_rate": 7.2741551147575365e-06, |
| "loss": 1.4011, |
| "step": 28440 |
| }, |
| { |
| "epoch": 1073.9622641509434, |
| "grad_norm": 1.7468598350718882, |
| "learning_rate": 7.248932396529666e-06, |
| "loss": 1.3906, |
| "step": 28460 |
| }, |
| { |
| "epoch": 1074.7169811320755, |
| "grad_norm": 1.5217037013529344, |
| "learning_rate": 7.223846007897321e-06, |
| "loss": 1.3824, |
| "step": 28480 |
| }, |
| { |
| "epoch": 1075.4716981132076, |
| "grad_norm": 1.9246360758156291, |
| "learning_rate": 7.198896045879323e-06, |
| "loss": 1.401, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1076.2264150943397, |
| "grad_norm": 1.6887933139540061, |
| "learning_rate": 7.174082606966883e-06, |
| "loss": 1.4025, |
| "step": 28520 |
| }, |
| { |
| "epoch": 1076.9811320754718, |
| "grad_norm": 1.6294766788073725, |
| "learning_rate": 7.149405787123236e-06, |
| "loss": 1.3986, |
| "step": 28540 |
| }, |
| { |
| "epoch": 1077.7358490566037, |
| "grad_norm": 1.5618807274404587, |
| "learning_rate": 7.124865681783234e-06, |
| "loss": 1.4005, |
| "step": 28560 |
| }, |
| { |
| "epoch": 1078.4905660377358, |
| "grad_norm": 1.6678211596916697, |
| "learning_rate": 7.100462385853021e-06, |
| "loss": 1.4071, |
| "step": 28580 |
| }, |
| { |
| "epoch": 1079.245283018868, |
| "grad_norm": 1.9223978868928677, |
| "learning_rate": 7.07619599370964e-06, |
| "loss": 1.4135, |
| "step": 28600 |
| }, |
| { |
| "epoch": 1080.0, |
| "grad_norm": 1.6632265815235145, |
| "learning_rate": 7.052066599200659e-06, |
| "loss": 1.3882, |
| "step": 28620 |
| }, |
| { |
| "epoch": 1080.754716981132, |
| "grad_norm": 1.6022030717394165, |
| "learning_rate": 7.028074295643851e-06, |
| "loss": 1.3972, |
| "step": 28640 |
| }, |
| { |
| "epoch": 1081.5094339622642, |
| "grad_norm": 1.4991746539828543, |
| "learning_rate": 7.004219175826785e-06, |
| "loss": 1.382, |
| "step": 28660 |
| }, |
| { |
| "epoch": 1082.2641509433963, |
| "grad_norm": 1.6838520383575963, |
| "learning_rate": 6.9805013320064956e-06, |
| "loss": 1.4146, |
| "step": 28680 |
| }, |
| { |
| "epoch": 1083.0188679245282, |
| "grad_norm": 1.8350778781710608, |
| "learning_rate": 6.9569208559091e-06, |
| "loss": 1.4138, |
| "step": 28700 |
| }, |
| { |
| "epoch": 1083.7735849056603, |
| "grad_norm": 1.5249940477637465, |
| "learning_rate": 6.9334778387294835e-06, |
| "loss": 1.403, |
| "step": 28720 |
| }, |
| { |
| "epoch": 1084.5283018867924, |
| "grad_norm": 1.4543697117371763, |
| "learning_rate": 6.910172371130925e-06, |
| "loss": 1.4115, |
| "step": 28740 |
| }, |
| { |
| "epoch": 1085.2830188679245, |
| "grad_norm": 1.8878771205671918, |
| "learning_rate": 6.8870045432447285e-06, |
| "loss": 1.3783, |
| "step": 28760 |
| }, |
| { |
| "epoch": 1086.0377358490566, |
| "grad_norm": 1.6650946199070653, |
| "learning_rate": 6.8639744446698945e-06, |
| "loss": 1.4065, |
| "step": 28780 |
| }, |
| { |
| "epoch": 1086.7924528301887, |
| "grad_norm": 1.9063799347508024, |
| "learning_rate": 6.84108216447278e-06, |
| "loss": 1.3896, |
| "step": 28800 |
| }, |
| { |
| "epoch": 1087.5471698113208, |
| "grad_norm": 1.7745103676453513, |
| "learning_rate": 6.818327791186747e-06, |
| "loss": 1.4068, |
| "step": 28820 |
| }, |
| { |
| "epoch": 1088.301886792453, |
| "grad_norm": 1.6208415487366228, |
| "learning_rate": 6.795711412811805e-06, |
| "loss": 1.3827, |
| "step": 28840 |
| }, |
| { |
| "epoch": 1089.0566037735848, |
| "grad_norm": 1.4568669649899233, |
| "learning_rate": 6.773233116814289e-06, |
| "loss": 1.3918, |
| "step": 28860 |
| }, |
| { |
| "epoch": 1089.811320754717, |
| "grad_norm": 1.861515176168054, |
| "learning_rate": 6.750892990126514e-06, |
| "loss": 1.3901, |
| "step": 28880 |
| }, |
| { |
| "epoch": 1090.566037735849, |
| "grad_norm": 1.7283660067362911, |
| "learning_rate": 6.728691119146446e-06, |
| "loss": 1.4157, |
| "step": 28900 |
| }, |
| { |
| "epoch": 1091.3207547169811, |
| "grad_norm": 1.679598340558233, |
| "learning_rate": 6.706627589737369e-06, |
| "loss": 1.3938, |
| "step": 28920 |
| }, |
| { |
| "epoch": 1092.0754716981132, |
| "grad_norm": 1.5691857730547452, |
| "learning_rate": 6.6847024872275215e-06, |
| "loss": 1.4176, |
| "step": 28940 |
| }, |
| { |
| "epoch": 1092.8301886792453, |
| "grad_norm": 1.5537251935711112, |
| "learning_rate": 6.66291589640982e-06, |
| "loss": 1.3967, |
| "step": 28960 |
| }, |
| { |
| "epoch": 1093.5849056603774, |
| "grad_norm": 1.8881979410475171, |
| "learning_rate": 6.641267901541472e-06, |
| "loss": 1.418, |
| "step": 28980 |
| }, |
| { |
| "epoch": 1094.3396226415093, |
| "grad_norm": 1.613241830342873, |
| "learning_rate": 6.619758586343714e-06, |
| "loss": 1.3901, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1095.0943396226414, |
| "grad_norm": 1.5946632443607534, |
| "learning_rate": 6.598388034001433e-06, |
| "loss": 1.3634, |
| "step": 29020 |
| }, |
| { |
| "epoch": 1095.8490566037735, |
| "grad_norm": 1.8962995366661943, |
| "learning_rate": 6.577156327162867e-06, |
| "loss": 1.392, |
| "step": 29040 |
| }, |
| { |
| "epoch": 1096.6037735849056, |
| "grad_norm": 1.629681556076702, |
| "learning_rate": 6.55606354793928e-06, |
| "loss": 1.4078, |
| "step": 29060 |
| }, |
| { |
| "epoch": 1097.3584905660377, |
| "grad_norm": 1.6952819453222434, |
| "learning_rate": 6.535109777904677e-06, |
| "loss": 1.4017, |
| "step": 29080 |
| }, |
| { |
| "epoch": 1098.1132075471698, |
| "grad_norm": 2.5813616029432267, |
| "learning_rate": 6.514295098095432e-06, |
| "loss": 1.3986, |
| "step": 29100 |
| }, |
| { |
| "epoch": 1098.867924528302, |
| "grad_norm": 1.5192224713062508, |
| "learning_rate": 6.493619589010008e-06, |
| "loss": 1.3995, |
| "step": 29120 |
| }, |
| { |
| "epoch": 1099.622641509434, |
| "grad_norm": 1.5723195273483208, |
| "learning_rate": 6.4730833306086425e-06, |
| "loss": 1.3804, |
| "step": 29140 |
| }, |
| { |
| "epoch": 1100.377358490566, |
| "grad_norm": 1.6397338659549336, |
| "learning_rate": 6.452686402313042e-06, |
| "loss": 1.386, |
| "step": 29160 |
| }, |
| { |
| "epoch": 1101.132075471698, |
| "grad_norm": 1.5791257173150743, |
| "learning_rate": 6.43242888300607e-06, |
| "loss": 1.3847, |
| "step": 29180 |
| }, |
| { |
| "epoch": 1101.8867924528302, |
| "grad_norm": 1.5559887095506482, |
| "learning_rate": 6.412310851031428e-06, |
| "loss": 1.393, |
| "step": 29200 |
| }, |
| { |
| "epoch": 1102.6415094339623, |
| "grad_norm": 1.6663466000474887, |
| "learning_rate": 6.392332384193371e-06, |
| "loss": 1.3896, |
| "step": 29220 |
| }, |
| { |
| "epoch": 1103.3962264150944, |
| "grad_norm": 1.9956674599720932, |
| "learning_rate": 6.372493559756415e-06, |
| "loss": 1.378, |
| "step": 29240 |
| }, |
| { |
| "epoch": 1104.1509433962265, |
| "grad_norm": 1.787105155690102, |
| "learning_rate": 6.352794454445007e-06, |
| "loss": 1.3879, |
| "step": 29260 |
| }, |
| { |
| "epoch": 1104.9056603773586, |
| "grad_norm": 1.561482889041861, |
| "learning_rate": 6.333235144443262e-06, |
| "loss": 1.402, |
| "step": 29280 |
| }, |
| { |
| "epoch": 1105.6603773584907, |
| "grad_norm": 1.8736117457797759, |
| "learning_rate": 6.31381570539463e-06, |
| "loss": 1.3879, |
| "step": 29300 |
| }, |
| { |
| "epoch": 1106.4150943396226, |
| "grad_norm": 1.4692581652153442, |
| "learning_rate": 6.294536212401641e-06, |
| "loss": 1.3914, |
| "step": 29320 |
| }, |
| { |
| "epoch": 1107.1698113207547, |
| "grad_norm": 1.4908544439114542, |
| "learning_rate": 6.275396740025605e-06, |
| "loss": 1.4028, |
| "step": 29340 |
| }, |
| { |
| "epoch": 1107.9245283018868, |
| "grad_norm": 1.488666750171173, |
| "learning_rate": 6.256397362286306e-06, |
| "loss": 1.3799, |
| "step": 29360 |
| }, |
| { |
| "epoch": 1108.6792452830189, |
| "grad_norm": 1.517431762228245, |
| "learning_rate": 6.237538152661723e-06, |
| "loss": 1.3765, |
| "step": 29380 |
| }, |
| { |
| "epoch": 1109.433962264151, |
| "grad_norm": 2.2381909450089803, |
| "learning_rate": 6.218819184087767e-06, |
| "loss": 1.4079, |
| "step": 29400 |
| }, |
| { |
| "epoch": 1110.188679245283, |
| "grad_norm": 1.7858504458920295, |
| "learning_rate": 6.200240528957965e-06, |
| "loss": 1.3554, |
| "step": 29420 |
| }, |
| { |
| "epoch": 1110.9433962264152, |
| "grad_norm": 1.7350524849254911, |
| "learning_rate": 6.181802259123219e-06, |
| "loss": 1.3967, |
| "step": 29440 |
| }, |
| { |
| "epoch": 1111.698113207547, |
| "grad_norm": 1.702971597589678, |
| "learning_rate": 6.163504445891484e-06, |
| "loss": 1.3671, |
| "step": 29460 |
| }, |
| { |
| "epoch": 1112.4528301886792, |
| "grad_norm": 1.7712134929173684, |
| "learning_rate": 6.145347160027524e-06, |
| "loss": 1.3829, |
| "step": 29480 |
| }, |
| { |
| "epoch": 1113.2075471698113, |
| "grad_norm": 1.4073555395505457, |
| "learning_rate": 6.1273304717526284e-06, |
| "loss": 1.4108, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1113.9622641509434, |
| "grad_norm": 1.6527537265171588, |
| "learning_rate": 6.10945445074435e-06, |
| "loss": 1.4068, |
| "step": 29520 |
| }, |
| { |
| "epoch": 1114.7169811320755, |
| "grad_norm": 1.6866987009556351, |
| "learning_rate": 6.091719166136209e-06, |
| "loss": 1.3793, |
| "step": 29540 |
| }, |
| { |
| "epoch": 1115.4716981132076, |
| "grad_norm": 1.7073159356044332, |
| "learning_rate": 6.074124686517448e-06, |
| "loss": 1.3826, |
| "step": 29560 |
| }, |
| { |
| "epoch": 1116.2264150943397, |
| "grad_norm": 1.8230785653176147, |
| "learning_rate": 6.056671079932781e-06, |
| "loss": 1.4153, |
| "step": 29580 |
| }, |
| { |
| "epoch": 1116.9811320754718, |
| "grad_norm": 1.6857598634250675, |
| "learning_rate": 6.0393584138820814e-06, |
| "loss": 1.3887, |
| "step": 29600 |
| }, |
| { |
| "epoch": 1117.7358490566037, |
| "grad_norm": 1.5568678463492682, |
| "learning_rate": 6.022186755320181e-06, |
| "loss": 1.3901, |
| "step": 29620 |
| }, |
| { |
| "epoch": 1118.4905660377358, |
| "grad_norm": 1.8571545157336313, |
| "learning_rate": 6.0051561706565545e-06, |
| "loss": 1.4013, |
| "step": 29640 |
| }, |
| { |
| "epoch": 1119.245283018868, |
| "grad_norm": 2.413996452708785, |
| "learning_rate": 5.988266725755103e-06, |
| "loss": 1.3613, |
| "step": 29660 |
| }, |
| { |
| "epoch": 1120.0, |
| "grad_norm": 1.687989711452293, |
| "learning_rate": 5.9715184859338745e-06, |
| "loss": 1.4031, |
| "step": 29680 |
| }, |
| { |
| "epoch": 1120.754716981132, |
| "grad_norm": 1.7351377623187432, |
| "learning_rate": 5.9549115159648416e-06, |
| "loss": 1.3949, |
| "step": 29700 |
| }, |
| { |
| "epoch": 1121.5094339622642, |
| "grad_norm": 1.6317556572084198, |
| "learning_rate": 5.9384458800736175e-06, |
| "loss": 1.3769, |
| "step": 29720 |
| }, |
| { |
| "epoch": 1122.2641509433963, |
| "grad_norm": 1.5268456230996348, |
| "learning_rate": 5.922121641939213e-06, |
| "loss": 1.3816, |
| "step": 29740 |
| }, |
| { |
| "epoch": 1123.0188679245282, |
| "grad_norm": 1.712558259908726, |
| "learning_rate": 5.905938864693819e-06, |
| "loss": 1.3798, |
| "step": 29760 |
| }, |
| { |
| "epoch": 1123.7735849056603, |
| "grad_norm": 2.381990895927805, |
| "learning_rate": 5.889897610922528e-06, |
| "loss": 1.3607, |
| "step": 29780 |
| }, |
| { |
| "epoch": 1124.5283018867924, |
| "grad_norm": 1.880675021280631, |
| "learning_rate": 5.873997942663118e-06, |
| "loss": 1.3886, |
| "step": 29800 |
| }, |
| { |
| "epoch": 1125.2830188679245, |
| "grad_norm": 1.7160648060328811, |
| "learning_rate": 5.858239921405781e-06, |
| "loss": 1.4049, |
| "step": 29820 |
| }, |
| { |
| "epoch": 1126.0377358490566, |
| "grad_norm": 2.0253315053102656, |
| "learning_rate": 5.842623608092928e-06, |
| "loss": 1.393, |
| "step": 29840 |
| }, |
| { |
| "epoch": 1126.7924528301887, |
| "grad_norm": 1.7870648066969081, |
| "learning_rate": 5.8271490631189085e-06, |
| "loss": 1.3654, |
| "step": 29860 |
| }, |
| { |
| "epoch": 1127.5471698113208, |
| "grad_norm": 2.0620223544323393, |
| "learning_rate": 5.811816346329819e-06, |
| "loss": 1.3776, |
| "step": 29880 |
| }, |
| { |
| "epoch": 1128.301886792453, |
| "grad_norm": 1.892915815700359, |
| "learning_rate": 5.796625517023236e-06, |
| "loss": 1.377, |
| "step": 29900 |
| }, |
| { |
| "epoch": 1129.0566037735848, |
| "grad_norm": 1.6134589423454577, |
| "learning_rate": 5.781576633948012e-06, |
| "loss": 1.3958, |
| "step": 29920 |
| }, |
| { |
| "epoch": 1129.811320754717, |
| "grad_norm": 1.8880173462636753, |
| "learning_rate": 5.766669755304027e-06, |
| "loss": 1.3707, |
| "step": 29940 |
| }, |
| { |
| "epoch": 1130.566037735849, |
| "grad_norm": 1.899687605902805, |
| "learning_rate": 5.75190493874199e-06, |
| "loss": 1.3648, |
| "step": 29960 |
| }, |
| { |
| "epoch": 1131.3207547169811, |
| "grad_norm": 2.0484945041635143, |
| "learning_rate": 5.737282241363189e-06, |
| "loss": 1.3689, |
| "step": 29980 |
| }, |
| { |
| "epoch": 1132.0754716981132, |
| "grad_norm": 1.676321084433534, |
| "learning_rate": 5.72280171971928e-06, |
| "loss": 1.4161, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1132.8301886792453, |
| "grad_norm": 1.7718376566707665, |
| "learning_rate": 5.708463429812077e-06, |
| "loss": 1.3427, |
| "step": 30020 |
| }, |
| { |
| "epoch": 1133.5849056603774, |
| "grad_norm": 1.9751240318001524, |
| "learning_rate": 5.694267427093333e-06, |
| "loss": 1.3674, |
| "step": 30040 |
| }, |
| { |
| "epoch": 1134.3396226415093, |
| "grad_norm": 2.3259508666245754, |
| "learning_rate": 5.680213766464505e-06, |
| "loss": 1.3815, |
| "step": 30060 |
| }, |
| { |
| "epoch": 1135.0943396226414, |
| "grad_norm": 1.7499567507331477, |
| "learning_rate": 5.6663025022765734e-06, |
| "loss": 1.3898, |
| "step": 30080 |
| }, |
| { |
| "epoch": 1135.8490566037735, |
| "grad_norm": 1.7700410283415744, |
| "learning_rate": 5.652533688329809e-06, |
| "loss": 1.3801, |
| "step": 30100 |
| }, |
| { |
| "epoch": 1136.6037735849056, |
| "grad_norm": 2.4028113618062843, |
| "learning_rate": 5.638907377873572e-06, |
| "loss": 1.4025, |
| "step": 30120 |
| }, |
| { |
| "epoch": 1137.3584905660377, |
| "grad_norm": 1.7119758682153656, |
| "learning_rate": 5.625423623606109e-06, |
| "loss": 1.3933, |
| "step": 30140 |
| }, |
| { |
| "epoch": 1138.1132075471698, |
| "grad_norm": 1.6434771622606816, |
| "learning_rate": 5.612082477674341e-06, |
| "loss": 1.3723, |
| "step": 30160 |
| }, |
| { |
| "epoch": 1138.867924528302, |
| "grad_norm": 1.6260264586830788, |
| "learning_rate": 5.598883991673678e-06, |
| "loss": 1.4009, |
| "step": 30180 |
| }, |
| { |
| "epoch": 1139.622641509434, |
| "grad_norm": 3.7353731641696166, |
| "learning_rate": 5.58582821664779e-06, |
| "loss": 1.3621, |
| "step": 30200 |
| }, |
| { |
| "epoch": 1140.377358490566, |
| "grad_norm": 1.567966811159742, |
| "learning_rate": 5.572915203088453e-06, |
| "loss": 1.3679, |
| "step": 30220 |
| }, |
| { |
| "epoch": 1141.132075471698, |
| "grad_norm": 1.7536276327044822, |
| "learning_rate": 5.560145000935302e-06, |
| "loss": 1.3899, |
| "step": 30240 |
| }, |
| { |
| "epoch": 1141.8867924528302, |
| "grad_norm": 1.6246811713037859, |
| "learning_rate": 5.547517659575683e-06, |
| "loss": 1.3754, |
| "step": 30260 |
| }, |
| { |
| "epoch": 1142.6415094339623, |
| "grad_norm": 1.5935354859602073, |
| "learning_rate": 5.535033227844446e-06, |
| "loss": 1.3783, |
| "step": 30280 |
| }, |
| { |
| "epoch": 1143.3962264150944, |
| "grad_norm": 1.5837499746804282, |
| "learning_rate": 5.522691754023736e-06, |
| "loss": 1.3664, |
| "step": 30300 |
| }, |
| { |
| "epoch": 1144.1509433962265, |
| "grad_norm": 1.5561292753074283, |
| "learning_rate": 5.5104932858428386e-06, |
| "loss": 1.3934, |
| "step": 30320 |
| }, |
| { |
| "epoch": 1144.9056603773586, |
| "grad_norm": 1.5051486824601223, |
| "learning_rate": 5.498437870477979e-06, |
| "loss": 1.3569, |
| "step": 30340 |
| }, |
| { |
| "epoch": 1145.6603773584907, |
| "grad_norm": 1.5724530317281036, |
| "learning_rate": 5.48652555455214e-06, |
| "loss": 1.384, |
| "step": 30360 |
| }, |
| { |
| "epoch": 1146.4150943396226, |
| "grad_norm": 1.7499070562961392, |
| "learning_rate": 5.474756384134872e-06, |
| "loss": 1.3661, |
| "step": 30380 |
| }, |
| { |
| "epoch": 1147.1698113207547, |
| "grad_norm": 1.682172454392295, |
| "learning_rate": 5.46313040474215e-06, |
| "loss": 1.3668, |
| "step": 30400 |
| }, |
| { |
| "epoch": 1147.9245283018868, |
| "grad_norm": 1.6400451026874565, |
| "learning_rate": 5.4516476613361565e-06, |
| "loss": 1.3605, |
| "step": 30420 |
| }, |
| { |
| "epoch": 1148.6792452830189, |
| "grad_norm": 1.952384343786011, |
| "learning_rate": 5.440308198325125e-06, |
| "loss": 1.388, |
| "step": 30440 |
| }, |
| { |
| "epoch": 1149.433962264151, |
| "grad_norm": 2.052044266530817, |
| "learning_rate": 5.4291120595631796e-06, |
| "loss": 1.3699, |
| "step": 30460 |
| }, |
| { |
| "epoch": 1150.188679245283, |
| "grad_norm": 1.868354121694302, |
| "learning_rate": 5.4180592883501325e-06, |
| "loss": 1.4099, |
| "step": 30480 |
| }, |
| { |
| "epoch": 1150.9433962264152, |
| "grad_norm": 1.650613691746538, |
| "learning_rate": 5.40714992743136e-06, |
| "loss": 1.3788, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1151.698113207547, |
| "grad_norm": 1.48074352750423, |
| "learning_rate": 5.3963840189976066e-06, |
| "loss": 1.3587, |
| "step": 30520 |
| }, |
| { |
| "epoch": 1152.4528301886792, |
| "grad_norm": 1.914894176993607, |
| "learning_rate": 5.385761604684826e-06, |
| "loss": 1.3622, |
| "step": 30540 |
| }, |
| { |
| "epoch": 1153.2075471698113, |
| "grad_norm": 1.736154691724524, |
| "learning_rate": 5.375282725574028e-06, |
| "loss": 1.3451, |
| "step": 30560 |
| }, |
| { |
| "epoch": 1153.9622641509434, |
| "grad_norm": 1.7175923216328703, |
| "learning_rate": 5.364947422191111e-06, |
| "loss": 1.385, |
| "step": 30580 |
| }, |
| { |
| "epoch": 1154.7169811320755, |
| "grad_norm": 1.8230347081955776, |
| "learning_rate": 5.3547557345067295e-06, |
| "loss": 1.3797, |
| "step": 30600 |
| }, |
| { |
| "epoch": 1155.4716981132076, |
| "grad_norm": 1.4897355923840079, |
| "learning_rate": 5.344707701936093e-06, |
| "loss": 1.3812, |
| "step": 30620 |
| }, |
| { |
| "epoch": 1156.2264150943397, |
| "grad_norm": 1.7795720356372806, |
| "learning_rate": 5.334803363338855e-06, |
| "loss": 1.3508, |
| "step": 30640 |
| }, |
| { |
| "epoch": 1156.9811320754718, |
| "grad_norm": 2.461699887903762, |
| "learning_rate": 5.325042757018952e-06, |
| "loss": 1.3904, |
| "step": 30660 |
| }, |
| { |
| "epoch": 1157.7358490566037, |
| "grad_norm": 1.7684288169829847, |
| "learning_rate": 5.315425920724443e-06, |
| "loss": 1.362, |
| "step": 30680 |
| }, |
| { |
| "epoch": 1158.4905660377358, |
| "grad_norm": 1.9326301215722892, |
| "learning_rate": 5.3059528916473754e-06, |
| "loss": 1.3764, |
| "step": 30700 |
| }, |
| { |
| "epoch": 1159.245283018868, |
| "grad_norm": 1.7547993585411785, |
| "learning_rate": 5.296623706423637e-06, |
| "loss": 1.3624, |
| "step": 30720 |
| }, |
| { |
| "epoch": 1160.0, |
| "grad_norm": 2.2647989876543897, |
| "learning_rate": 5.2874384011328235e-06, |
| "loss": 1.3804, |
| "step": 30740 |
| }, |
| { |
| "epoch": 1160.754716981132, |
| "grad_norm": 1.897412746168143, |
| "learning_rate": 5.278397011298081e-06, |
| "loss": 1.3882, |
| "step": 30760 |
| }, |
| { |
| "epoch": 1161.5094339622642, |
| "grad_norm": 1.5286725772277845, |
| "learning_rate": 5.269499571885985e-06, |
| "loss": 1.381, |
| "step": 30780 |
| }, |
| { |
| "epoch": 1162.2641509433963, |
| "grad_norm": 1.6848292059915215, |
| "learning_rate": 5.260746117306394e-06, |
| "loss": 1.361, |
| "step": 30800 |
| }, |
| { |
| "epoch": 1163.0188679245282, |
| "grad_norm": 1.4576957104143031, |
| "learning_rate": 5.25213668141232e-06, |
| "loss": 1.3773, |
| "step": 30820 |
| }, |
| { |
| "epoch": 1163.7735849056603, |
| "grad_norm": 1.6655981961615232, |
| "learning_rate": 5.243671297499806e-06, |
| "loss": 1.3403, |
| "step": 30840 |
| }, |
| { |
| "epoch": 1164.5283018867924, |
| "grad_norm": 6.016182274377044, |
| "learning_rate": 5.235349998307786e-06, |
| "loss": 1.3994, |
| "step": 30860 |
| }, |
| { |
| "epoch": 1165.2830188679245, |
| "grad_norm": 1.7659588641922745, |
| "learning_rate": 5.227172816017956e-06, |
| "loss": 1.3507, |
| "step": 30880 |
| }, |
| { |
| "epoch": 1166.0377358490566, |
| "grad_norm": 2.0037468459561962, |
| "learning_rate": 5.219139782254665e-06, |
| "loss": 1.3703, |
| "step": 30900 |
| }, |
| { |
| "epoch": 1166.7924528301887, |
| "grad_norm": 2.15024644673786, |
| "learning_rate": 5.211250928084786e-06, |
| "loss": 1.3473, |
| "step": 30920 |
| }, |
| { |
| "epoch": 1167.5471698113208, |
| "grad_norm": 2.5013172573697466, |
| "learning_rate": 5.203506284017583e-06, |
| "loss": 1.3814, |
| "step": 30940 |
| }, |
| { |
| "epoch": 1168.301886792453, |
| "grad_norm": 1.5816513523971083, |
| "learning_rate": 5.195905880004609e-06, |
| "loss": 1.3668, |
| "step": 30960 |
| }, |
| { |
| "epoch": 1169.0566037735848, |
| "grad_norm": 1.512996764161357, |
| "learning_rate": 5.188449745439581e-06, |
| "loss": 1.3581, |
| "step": 30980 |
| }, |
| { |
| "epoch": 1169.811320754717, |
| "grad_norm": 1.536263448282502, |
| "learning_rate": 5.181137909158276e-06, |
| "loss": 1.3277, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1170.566037735849, |
| "grad_norm": 1.6755767673451942, |
| "learning_rate": 5.1739703994384105e-06, |
| "loss": 1.3923, |
| "step": 31020 |
| }, |
| { |
| "epoch": 1171.3207547169811, |
| "grad_norm": 1.7976047665675525, |
| "learning_rate": 5.166947243999532e-06, |
| "loss": 1.3671, |
| "step": 31040 |
| }, |
| { |
| "epoch": 1172.0754716981132, |
| "grad_norm": 1.5604607884699584, |
| "learning_rate": 5.1600684700029165e-06, |
| "loss": 1.3613, |
| "step": 31060 |
| }, |
| { |
| "epoch": 1172.8301886792453, |
| "grad_norm": 1.5133379987405895, |
| "learning_rate": 5.1533341040514576e-06, |
| "loss": 1.3696, |
| "step": 31080 |
| }, |
| { |
| "epoch": 1173.5849056603774, |
| "grad_norm": 1.8992042289915705, |
| "learning_rate": 5.146744172189571e-06, |
| "loss": 1.3464, |
| "step": 31100 |
| }, |
| { |
| "epoch": 1174.3396226415093, |
| "grad_norm": 1.8549085471784923, |
| "learning_rate": 5.140298699903085e-06, |
| "loss": 1.3478, |
| "step": 31120 |
| }, |
| { |
| "epoch": 1175.0943396226414, |
| "grad_norm": 1.6926406458235648, |
| "learning_rate": 5.133997712119152e-06, |
| "loss": 1.3526, |
| "step": 31140 |
| }, |
| { |
| "epoch": 1175.8490566037735, |
| "grad_norm": 1.9538672940442745, |
| "learning_rate": 5.127841233206144e-06, |
| "loss": 1.3686, |
| "step": 31160 |
| }, |
| { |
| "epoch": 1176.6037735849056, |
| "grad_norm": 1.850655603319905, |
| "learning_rate": 5.1218292869735606e-06, |
| "loss": 1.3906, |
| "step": 31180 |
| }, |
| { |
| "epoch": 1177.3584905660377, |
| "grad_norm": 1.7127479688627378, |
| "learning_rate": 5.115961896671935e-06, |
| "loss": 1.3703, |
| "step": 31200 |
| }, |
| { |
| "epoch": 1178.1132075471698, |
| "grad_norm": 1.556614260381109, |
| "learning_rate": 5.110239084992749e-06, |
| "loss": 1.3532, |
| "step": 31220 |
| }, |
| { |
| "epoch": 1178.867924528302, |
| "grad_norm": 2.001126139034296, |
| "learning_rate": 5.1046608740683435e-06, |
| "loss": 1.3929, |
| "step": 31240 |
| }, |
| { |
| "epoch": 1179.622641509434, |
| "grad_norm": 2.127747604876417, |
| "learning_rate": 5.09922728547183e-06, |
| "loss": 1.3657, |
| "step": 31260 |
| }, |
| { |
| "epoch": 1180.377358490566, |
| "grad_norm": 1.8364327564945553, |
| "learning_rate": 5.093938340217008e-06, |
| "loss": 1.3426, |
| "step": 31280 |
| }, |
| { |
| "epoch": 1181.132075471698, |
| "grad_norm": 1.9292610849222944, |
| "learning_rate": 5.088794058758295e-06, |
| "loss": 1.368, |
| "step": 31300 |
| }, |
| { |
| "epoch": 1181.8867924528302, |
| "grad_norm": 2.0114024877177505, |
| "learning_rate": 5.083794460990618e-06, |
| "loss": 1.39, |
| "step": 31320 |
| }, |
| { |
| "epoch": 1182.6415094339623, |
| "grad_norm": 1.5735214803674382, |
| "learning_rate": 5.078939566249372e-06, |
| "loss": 1.3632, |
| "step": 31340 |
| }, |
| { |
| "epoch": 1183.3962264150944, |
| "grad_norm": 1.8428642902345547, |
| "learning_rate": 5.074229393310324e-06, |
| "loss": 1.3757, |
| "step": 31360 |
| }, |
| { |
| "epoch": 1184.1509433962265, |
| "grad_norm": 1.697897177712772, |
| "learning_rate": 5.06966396038955e-06, |
| "loss": 1.354, |
| "step": 31380 |
| }, |
| { |
| "epoch": 1184.9056603773586, |
| "grad_norm": 1.807086734591878, |
| "learning_rate": 5.065243285143349e-06, |
| "loss": 1.3757, |
| "step": 31400 |
| }, |
| { |
| "epoch": 1185.6603773584907, |
| "grad_norm": 1.743179055242126, |
| "learning_rate": 5.0609673846681936e-06, |
| "loss": 1.3819, |
| "step": 31420 |
| }, |
| { |
| "epoch": 1186.4150943396226, |
| "grad_norm": 1.8735264452983302, |
| "learning_rate": 5.056836275500658e-06, |
| "loss": 1.3579, |
| "step": 31440 |
| }, |
| { |
| "epoch": 1187.1698113207547, |
| "grad_norm": 1.5862970321945447, |
| "learning_rate": 5.052849973617347e-06, |
| "loss": 1.3445, |
| "step": 31460 |
| }, |
| { |
| "epoch": 1187.9245283018868, |
| "grad_norm": 1.692517823714256, |
| "learning_rate": 5.049008494434844e-06, |
| "loss": 1.3694, |
| "step": 31480 |
| }, |
| { |
| "epoch": 1188.6792452830189, |
| "grad_norm": 1.6212477472255649, |
| "learning_rate": 5.045311852809638e-06, |
| "loss": 1.3929, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1189.433962264151, |
| "grad_norm": 1.52306373987035, |
| "learning_rate": 5.041760063038081e-06, |
| "loss": 1.3579, |
| "step": 31520 |
| }, |
| { |
| "epoch": 1190.188679245283, |
| "grad_norm": 1.7830544839573095, |
| "learning_rate": 5.038353138856331e-06, |
| "loss": 1.348, |
| "step": 31540 |
| }, |
| { |
| "epoch": 1190.9433962264152, |
| "grad_norm": 1.7203728735463606, |
| "learning_rate": 5.035091093440292e-06, |
| "loss": 1.37, |
| "step": 31560 |
| }, |
| { |
| "epoch": 1191.698113207547, |
| "grad_norm": 1.9298089743408848, |
| "learning_rate": 5.0319739394055525e-06, |
| "loss": 1.3627, |
| "step": 31580 |
| }, |
| { |
| "epoch": 1192.4528301886792, |
| "grad_norm": 1.9488940650586162, |
| "learning_rate": 5.029001688807368e-06, |
| "loss": 1.3537, |
| "step": 31600 |
| }, |
| { |
| "epoch": 1193.2075471698113, |
| "grad_norm": 2.0609178957358667, |
| "learning_rate": 5.026174353140584e-06, |
| "loss": 1.3521, |
| "step": 31620 |
| }, |
| { |
| "epoch": 1193.9622641509434, |
| "grad_norm": 1.710559073613117, |
| "learning_rate": 5.0234919433396115e-06, |
| "loss": 1.3768, |
| "step": 31640 |
| }, |
| { |
| "epoch": 1194.7169811320755, |
| "grad_norm": 1.5082465689013147, |
| "learning_rate": 5.02095446977837e-06, |
| "loss": 1.3893, |
| "step": 31660 |
| }, |
| { |
| "epoch": 1195.4716981132076, |
| "grad_norm": 2.4105153089947526, |
| "learning_rate": 5.018561942270259e-06, |
| "loss": 1.3532, |
| "step": 31680 |
| }, |
| { |
| "epoch": 1196.2264150943397, |
| "grad_norm": 1.5148689250273666, |
| "learning_rate": 5.016314370068112e-06, |
| "loss": 1.3429, |
| "step": 31700 |
| }, |
| { |
| "epoch": 1196.9811320754718, |
| "grad_norm": 1.7305388649029056, |
| "learning_rate": 5.014211761864169e-06, |
| "loss": 1.3559, |
| "step": 31720 |
| }, |
| { |
| "epoch": 1197.7358490566037, |
| "grad_norm": 3.661229816284544, |
| "learning_rate": 5.012254125790028e-06, |
| "loss": 1.37, |
| "step": 31740 |
| }, |
| { |
| "epoch": 1198.4905660377358, |
| "grad_norm": 1.9493540072501139, |
| "learning_rate": 5.010441469416635e-06, |
| "loss": 1.3808, |
| "step": 31760 |
| }, |
| { |
| "epoch": 1199.245283018868, |
| "grad_norm": 1.6896444872077154, |
| "learning_rate": 5.008773799754234e-06, |
| "loss": 1.3631, |
| "step": 31780 |
| }, |
| { |
| "epoch": 1200.0, |
| "grad_norm": 1.884439542410789, |
| "learning_rate": 5.007251123252356e-06, |
| "loss": 1.3638, |
| "step": 31800 |
| }, |
| { |
| "epoch": 1200.754716981132, |
| "grad_norm": 1.98761366434412, |
| "learning_rate": 5.005873445799779e-06, |
| "loss": 1.35, |
| "step": 31820 |
| }, |
| { |
| "epoch": 1201.5094339622642, |
| "grad_norm": 1.8352779283455332, |
| "learning_rate": 5.004640772724519e-06, |
| "loss": 1.3369, |
| "step": 31840 |
| }, |
| { |
| "epoch": 1202.2641509433963, |
| "grad_norm": 1.712020294826759, |
| "learning_rate": 5.003553108793802e-06, |
| "loss": 1.3511, |
| "step": 31860 |
| }, |
| { |
| "epoch": 1203.0188679245282, |
| "grad_norm": 1.6743616923339946, |
| "learning_rate": 5.002610458214054e-06, |
| "loss": 1.3259, |
| "step": 31880 |
| }, |
| { |
| "epoch": 1203.7735849056603, |
| "grad_norm": 1.8393462234102256, |
| "learning_rate": 5.001812824630864e-06, |
| "loss": 1.3646, |
| "step": 31900 |
| }, |
| { |
| "epoch": 1204.5283018867924, |
| "grad_norm": 1.7631293985305598, |
| "learning_rate": 5.001160211128995e-06, |
| "loss": 1.3384, |
| "step": 31920 |
| }, |
| { |
| "epoch": 1205.2830188679245, |
| "grad_norm": 1.6536424071703635, |
| "learning_rate": 5.0006526202323554e-06, |
| "loss": 1.3605, |
| "step": 31940 |
| }, |
| { |
| "epoch": 1206.0377358490566, |
| "grad_norm": 1.5387931434470863, |
| "learning_rate": 5.000290053904e-06, |
| "loss": 1.3892, |
| "step": 31960 |
| }, |
| { |
| "epoch": 1206.7924528301887, |
| "grad_norm": 1.948827205429464, |
| "learning_rate": 5.0000725135461104e-06, |
| "loss": 1.3541, |
| "step": 31980 |
| }, |
| { |
| "epoch": 1207.5471698113208, |
| "grad_norm": 1.595259284912312, |
| "learning_rate": 5e-06, |
| "loss": 1.3478, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1231.5094339622642, |
| "grad_norm": 1.9859843003442184, |
| "learning_rate": 3.1745653570607866e-05, |
| "loss": 1.4161, |
| "step": 32020 |
| }, |
| { |
| "epoch": 1232.2641509433963, |
| "grad_norm": 2.399291840461689, |
| "learning_rate": 3.170382168563073e-05, |
| "loss": 1.4292, |
| "step": 32040 |
| }, |
| { |
| "epoch": 1233.0188679245282, |
| "grad_norm": 2.2207067356830597, |
| "learning_rate": 3.166200444421923e-05, |
| "loss": 1.4248, |
| "step": 32060 |
| }, |
| { |
| "epoch": 1233.7735849056603, |
| "grad_norm": 1.7538222025729717, |
| "learning_rate": 3.1620201903092876e-05, |
| "loss": 1.4549, |
| "step": 32080 |
| }, |
| { |
| "epoch": 1234.5283018867924, |
| "grad_norm": 1.7296401624898199, |
| "learning_rate": 3.157841411895116e-05, |
| "loss": 1.4544, |
| "step": 32100 |
| }, |
| { |
| "epoch": 1235.2830188679245, |
| "grad_norm": 1.6657757057870137, |
| "learning_rate": 3.153664114847362e-05, |
| "loss": 1.4734, |
| "step": 32120 |
| }, |
| { |
| "epoch": 1236.0377358490566, |
| "grad_norm": 1.7240277610891936, |
| "learning_rate": 3.149488304831967e-05, |
| "loss": 1.451, |
| "step": 32140 |
| }, |
| { |
| "epoch": 1236.7924528301887, |
| "grad_norm": 1.6885797820089437, |
| "learning_rate": 3.145313987512854e-05, |
| "loss": 1.4366, |
| "step": 32160 |
| }, |
| { |
| "epoch": 1237.5471698113208, |
| "grad_norm": 1.4963776794399322, |
| "learning_rate": 3.141141168551928e-05, |
| "loss": 1.4652, |
| "step": 32180 |
| }, |
| { |
| "epoch": 1238.301886792453, |
| "grad_norm": 1.4609983523815115, |
| "learning_rate": 3.1369698536090554e-05, |
| "loss": 1.4648, |
| "step": 32200 |
| }, |
| { |
| "epoch": 1239.0566037735848, |
| "grad_norm": 1.9029419687473905, |
| "learning_rate": 3.132800048342065e-05, |
| "loss": 1.4664, |
| "step": 32220 |
| }, |
| { |
| "epoch": 1239.811320754717, |
| "grad_norm": 1.7932066669770592, |
| "learning_rate": 3.128631758406736e-05, |
| "loss": 1.4585, |
| "step": 32240 |
| }, |
| { |
| "epoch": 1240.566037735849, |
| "grad_norm": 1.6253328044167166, |
| "learning_rate": 3.1244649894567945e-05, |
| "loss": 1.4492, |
| "step": 32260 |
| }, |
| { |
| "epoch": 1241.3207547169811, |
| "grad_norm": 1.507775786714413, |
| "learning_rate": 3.120299747143905e-05, |
| "loss": 1.4934, |
| "step": 32280 |
| }, |
| { |
| "epoch": 1242.0754716981132, |
| "grad_norm": 1.7801850010709415, |
| "learning_rate": 3.1161360371176566e-05, |
| "loss": 1.4486, |
| "step": 32300 |
| }, |
| { |
| "epoch": 1242.8301886792453, |
| "grad_norm": 1.6106209389195743, |
| "learning_rate": 3.111973865025564e-05, |
| "loss": 1.4468, |
| "step": 32320 |
| }, |
| { |
| "epoch": 1243.5849056603774, |
| "grad_norm": 1.8027839874458171, |
| "learning_rate": 3.107813236513054e-05, |
| "loss": 1.477, |
| "step": 32340 |
| }, |
| { |
| "epoch": 1244.3396226415093, |
| "grad_norm": 1.883131295400716, |
| "learning_rate": 3.1036541572234594e-05, |
| "loss": 1.4555, |
| "step": 32360 |
| }, |
| { |
| "epoch": 1245.0943396226414, |
| "grad_norm": 1.591157945654413, |
| "learning_rate": 3.099496632798014e-05, |
| "loss": 1.4708, |
| "step": 32380 |
| }, |
| { |
| "epoch": 1245.8490566037735, |
| "grad_norm": 1.6694778342522842, |
| "learning_rate": 3.095340668875842e-05, |
| "loss": 1.4639, |
| "step": 32400 |
| }, |
| { |
| "epoch": 1246.6037735849056, |
| "grad_norm": 1.6841562206011031, |
| "learning_rate": 3.091186271093947e-05, |
| "loss": 1.5116, |
| "step": 32420 |
| }, |
| { |
| "epoch": 1247.3584905660377, |
| "grad_norm": 2.3369379900409943, |
| "learning_rate": 3.0870334450872156e-05, |
| "loss": 1.4754, |
| "step": 32440 |
| }, |
| { |
| "epoch": 1248.1132075471698, |
| "grad_norm": 1.720534890104194, |
| "learning_rate": 3.0828821964883944e-05, |
| "loss": 1.4941, |
| "step": 32460 |
| }, |
| { |
| "epoch": 1248.867924528302, |
| "grad_norm": 1.7549772489735695, |
| "learning_rate": 3.0787325309280966e-05, |
| "loss": 1.4799, |
| "step": 32480 |
| }, |
| { |
| "epoch": 1249.622641509434, |
| "grad_norm": 1.8182084066575632, |
| "learning_rate": 3.074584454034788e-05, |
| "loss": 1.4715, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1250.377358490566, |
| "grad_norm": 1.5605662428278646, |
| "learning_rate": 3.0704379714347736e-05, |
| "loss": 1.4783, |
| "step": 32520 |
| }, |
| { |
| "epoch": 1251.132075471698, |
| "grad_norm": 1.569853865239183, |
| "learning_rate": 3.066293088752203e-05, |
| "loss": 1.4638, |
| "step": 32540 |
| }, |
| { |
| "epoch": 1251.8867924528302, |
| "grad_norm": 1.704579985134968, |
| "learning_rate": 3.062149811609051e-05, |
| "loss": 1.492, |
| "step": 32560 |
| }, |
| { |
| "epoch": 1252.6415094339623, |
| "grad_norm": 1.7794864973864697, |
| "learning_rate": 3.058008145625118e-05, |
| "loss": 1.4705, |
| "step": 32580 |
| }, |
| { |
| "epoch": 1253.3962264150944, |
| "grad_norm": 1.8222736973302784, |
| "learning_rate": 3.053868096418017e-05, |
| "loss": 1.4893, |
| "step": 32600 |
| }, |
| { |
| "epoch": 1254.1509433962265, |
| "grad_norm": 1.5789611538013155, |
| "learning_rate": 3.0497296696031678e-05, |
| "loss": 1.4665, |
| "step": 32620 |
| }, |
| { |
| "epoch": 1254.9056603773586, |
| "grad_norm": 1.657785958532039, |
| "learning_rate": 3.0455928707937924e-05, |
| "loss": 1.491, |
| "step": 32640 |
| }, |
| { |
| "epoch": 1255.6603773584907, |
| "grad_norm": 1.3254023383839637, |
| "learning_rate": 3.0414577056008995e-05, |
| "loss": 1.4823, |
| "step": 32660 |
| }, |
| { |
| "epoch": 1256.4150943396226, |
| "grad_norm": 1.5602437010509045, |
| "learning_rate": 3.0373241796332887e-05, |
| "loss": 1.4704, |
| "step": 32680 |
| }, |
| { |
| "epoch": 1257.1698113207547, |
| "grad_norm": 2.029474586920305, |
| "learning_rate": 3.0331922984975316e-05, |
| "loss": 1.4765, |
| "step": 32700 |
| }, |
| { |
| "epoch": 1257.9245283018868, |
| "grad_norm": 1.8553896972815955, |
| "learning_rate": 3.0290620677979688e-05, |
| "loss": 1.5096, |
| "step": 32720 |
| }, |
| { |
| "epoch": 1258.6792452830189, |
| "grad_norm": 1.4989759048156965, |
| "learning_rate": 3.0249334931367046e-05, |
| "loss": 1.5122, |
| "step": 32740 |
| }, |
| { |
| "epoch": 1259.433962264151, |
| "grad_norm": 1.6763111597334728, |
| "learning_rate": 3.0208065801135942e-05, |
| "loss": 1.4787, |
| "step": 32760 |
| }, |
| { |
| "epoch": 1260.188679245283, |
| "grad_norm": 1.469251133196546, |
| "learning_rate": 3.016681334326244e-05, |
| "loss": 1.4854, |
| "step": 32780 |
| }, |
| { |
| "epoch": 1260.9433962264152, |
| "grad_norm": 1.8501919367454238, |
| "learning_rate": 3.0125577613699926e-05, |
| "loss": 1.4929, |
| "step": 32800 |
| }, |
| { |
| "epoch": 1261.698113207547, |
| "grad_norm": 1.5790438820656068, |
| "learning_rate": 3.0084358668379155e-05, |
| "loss": 1.5055, |
| "step": 32820 |
| }, |
| { |
| "epoch": 1262.4528301886792, |
| "grad_norm": 1.5952733717783116, |
| "learning_rate": 3.004315656320806e-05, |
| "loss": 1.4907, |
| "step": 32840 |
| }, |
| { |
| "epoch": 1263.2075471698113, |
| "grad_norm": 1.6182930520428953, |
| "learning_rate": 3.0001971354071772e-05, |
| "loss": 1.4909, |
| "step": 32860 |
| }, |
| { |
| "epoch": 1263.9622641509434, |
| "grad_norm": 2.2886630268428663, |
| "learning_rate": 2.996080309683252e-05, |
| "loss": 1.4992, |
| "step": 32880 |
| }, |
| { |
| "epoch": 1264.7169811320755, |
| "grad_norm": 1.3793974197803296, |
| "learning_rate": 2.9919651847329483e-05, |
| "loss": 1.5061, |
| "step": 32900 |
| }, |
| { |
| "epoch": 1265.4716981132076, |
| "grad_norm": 1.39182833894468, |
| "learning_rate": 2.9878517661378828e-05, |
| "loss": 1.4591, |
| "step": 32920 |
| }, |
| { |
| "epoch": 1266.2264150943397, |
| "grad_norm": 1.6904437738848905, |
| "learning_rate": 2.9837400594773515e-05, |
| "loss": 1.5118, |
| "step": 32940 |
| }, |
| { |
| "epoch": 1266.9811320754718, |
| "grad_norm": 1.6447748796714898, |
| "learning_rate": 2.979630070328336e-05, |
| "loss": 1.4881, |
| "step": 32960 |
| }, |
| { |
| "epoch": 1267.7358490566037, |
| "grad_norm": 1.3512114550316146, |
| "learning_rate": 2.975521804265484e-05, |
| "loss": 1.4719, |
| "step": 32980 |
| }, |
| { |
| "epoch": 1268.4905660377358, |
| "grad_norm": 1.6317892668767962, |
| "learning_rate": 2.971415266861105e-05, |
| "loss": 1.5057, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1269.245283018868, |
| "grad_norm": 1.6596450520295813, |
| "learning_rate": 2.967310463685166e-05, |
| "loss": 1.481, |
| "step": 33020 |
| }, |
| { |
| "epoch": 1270.0, |
| "grad_norm": 1.6548890468178368, |
| "learning_rate": 2.9632074003052808e-05, |
| "loss": 1.5136, |
| "step": 33040 |
| }, |
| { |
| "epoch": 1270.754716981132, |
| "grad_norm": 1.5074284840254797, |
| "learning_rate": 2.9591060822867042e-05, |
| "loss": 1.4971, |
| "step": 33060 |
| }, |
| { |
| "epoch": 1271.5094339622642, |
| "grad_norm": 1.5075074748556512, |
| "learning_rate": 2.9550065151923238e-05, |
| "loss": 1.4647, |
| "step": 33080 |
| }, |
| { |
| "epoch": 1272.2641509433963, |
| "grad_norm": 1.7144775848474376, |
| "learning_rate": 2.9509087045826505e-05, |
| "loss": 1.5145, |
| "step": 33100 |
| }, |
| { |
| "epoch": 1273.0188679245282, |
| "grad_norm": 1.5547570517351919, |
| "learning_rate": 2.946812656015815e-05, |
| "loss": 1.4806, |
| "step": 33120 |
| }, |
| { |
| "epoch": 1273.7735849056603, |
| "grad_norm": 1.91096744807036, |
| "learning_rate": 2.942718375047554e-05, |
| "loss": 1.4953, |
| "step": 33140 |
| }, |
| { |
| "epoch": 1274.5283018867924, |
| "grad_norm": 1.690681911094072, |
| "learning_rate": 2.9386258672312143e-05, |
| "loss": 1.5043, |
| "step": 33160 |
| }, |
| { |
| "epoch": 1275.2830188679245, |
| "grad_norm": 1.6094990513366627, |
| "learning_rate": 2.93453513811773e-05, |
| "loss": 1.4656, |
| "step": 33180 |
| }, |
| { |
| "epoch": 1276.0377358490566, |
| "grad_norm": 1.7166760221415358, |
| "learning_rate": 2.9304461932556262e-05, |
| "loss": 1.5049, |
| "step": 33200 |
| }, |
| { |
| "epoch": 1276.7924528301887, |
| "grad_norm": 1.4781436729661779, |
| "learning_rate": 2.9263590381910078e-05, |
| "loss": 1.4901, |
| "step": 33220 |
| }, |
| { |
| "epoch": 1277.5471698113208, |
| "grad_norm": 1.6055713664381628, |
| "learning_rate": 2.9222736784675506e-05, |
| "loss": 1.4744, |
| "step": 33240 |
| }, |
| { |
| "epoch": 1278.301886792453, |
| "grad_norm": 1.6185246350349134, |
| "learning_rate": 2.9181901196264983e-05, |
| "loss": 1.4809, |
| "step": 33260 |
| }, |
| { |
| "epoch": 1279.0566037735848, |
| "grad_norm": 1.876852753612874, |
| "learning_rate": 2.9141083672066472e-05, |
| "loss": 1.4737, |
| "step": 33280 |
| }, |
| { |
| "epoch": 1279.811320754717, |
| "grad_norm": 1.646333221814719, |
| "learning_rate": 2.910028426744349e-05, |
| "loss": 1.4807, |
| "step": 33300 |
| }, |
| { |
| "epoch": 1280.566037735849, |
| "grad_norm": 1.4950158846180641, |
| "learning_rate": 2.9059503037734925e-05, |
| "loss": 1.4871, |
| "step": 33320 |
| }, |
| { |
| "epoch": 1281.3207547169811, |
| "grad_norm": 2.5440304246025702, |
| "learning_rate": 2.9018740038255044e-05, |
| "loss": 1.4869, |
| "step": 33340 |
| }, |
| { |
| "epoch": 1282.0754716981132, |
| "grad_norm": 1.5221803613837093, |
| "learning_rate": 2.897799532429339e-05, |
| "loss": 1.4756, |
| "step": 33360 |
| }, |
| { |
| "epoch": 1282.8301886792453, |
| "grad_norm": 1.459833552438949, |
| "learning_rate": 2.8937268951114686e-05, |
| "loss": 1.4782, |
| "step": 33380 |
| }, |
| { |
| "epoch": 1283.5849056603774, |
| "grad_norm": 1.5193291412259906, |
| "learning_rate": 2.8896560973958796e-05, |
| "loss": 1.4925, |
| "step": 33400 |
| }, |
| { |
| "epoch": 1284.3396226415093, |
| "grad_norm": 1.457579254538571, |
| "learning_rate": 2.88558714480406e-05, |
| "loss": 1.4865, |
| "step": 33420 |
| }, |
| { |
| "epoch": 1285.0943396226414, |
| "grad_norm": 2.116390864572185, |
| "learning_rate": 2.8815200428549985e-05, |
| "loss": 1.4823, |
| "step": 33440 |
| }, |
| { |
| "epoch": 1285.8490566037735, |
| "grad_norm": 2.333973476529065, |
| "learning_rate": 2.8774547970651747e-05, |
| "loss": 1.4701, |
| "step": 33460 |
| }, |
| { |
| "epoch": 1286.6037735849056, |
| "grad_norm": 1.4347402180741313, |
| "learning_rate": 2.8733914129485457e-05, |
| "loss": 1.4964, |
| "step": 33480 |
| }, |
| { |
| "epoch": 1287.3584905660377, |
| "grad_norm": 1.5219049837257324, |
| "learning_rate": 2.8693298960165473e-05, |
| "loss": 1.4845, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1288.1132075471698, |
| "grad_norm": 1.7295744561903763, |
| "learning_rate": 2.8652702517780815e-05, |
| "loss": 1.4729, |
| "step": 33520 |
| }, |
| { |
| "epoch": 1288.867924528302, |
| "grad_norm": 1.3491913340767474, |
| "learning_rate": 2.8612124857395097e-05, |
| "loss": 1.4734, |
| "step": 33540 |
| }, |
| { |
| "epoch": 1289.622641509434, |
| "grad_norm": 1.612399971127458, |
| "learning_rate": 2.8571566034046486e-05, |
| "loss": 1.4717, |
| "step": 33560 |
| }, |
| { |
| "epoch": 1290.377358490566, |
| "grad_norm": 1.523340229132746, |
| "learning_rate": 2.8531026102747552e-05, |
| "loss": 1.4784, |
| "step": 33580 |
| }, |
| { |
| "epoch": 1291.132075471698, |
| "grad_norm": 1.391650177787444, |
| "learning_rate": 2.849050511848529e-05, |
| "loss": 1.4968, |
| "step": 33600 |
| }, |
| { |
| "epoch": 1291.8867924528302, |
| "grad_norm": 1.631972432390494, |
| "learning_rate": 2.845000313622095e-05, |
| "loss": 1.4783, |
| "step": 33620 |
| }, |
| { |
| "epoch": 1292.6415094339623, |
| "grad_norm": 1.4676382942374402, |
| "learning_rate": 2.840952021089003e-05, |
| "loss": 1.4724, |
| "step": 33640 |
| }, |
| { |
| "epoch": 1293.3962264150944, |
| "grad_norm": 1.5025191965428788, |
| "learning_rate": 2.83690563974022e-05, |
| "loss": 1.4958, |
| "step": 33660 |
| }, |
| { |
| "epoch": 1294.1509433962265, |
| "grad_norm": 1.6379644083109945, |
| "learning_rate": 2.832861175064119e-05, |
| "loss": 1.4834, |
| "step": 33680 |
| }, |
| { |
| "epoch": 1294.9056603773586, |
| "grad_norm": 1.7312099049664693, |
| "learning_rate": 2.8288186325464705e-05, |
| "loss": 1.4941, |
| "step": 33700 |
| }, |
| { |
| "epoch": 1295.6603773584907, |
| "grad_norm": 1.5113721107585405, |
| "learning_rate": 2.8247780176704408e-05, |
| "loss": 1.4863, |
| "step": 33720 |
| }, |
| { |
| "epoch": 1296.4150943396226, |
| "grad_norm": 1.4187238404455875, |
| "learning_rate": 2.8207393359165837e-05, |
| "loss": 1.4635, |
| "step": 33740 |
| }, |
| { |
| "epoch": 1297.1698113207547, |
| "grad_norm": 1.5036198246572734, |
| "learning_rate": 2.8167025927628266e-05, |
| "loss": 1.4663, |
| "step": 33760 |
| }, |
| { |
| "epoch": 1297.9245283018868, |
| "grad_norm": 1.656299435435026, |
| "learning_rate": 2.8126677936844698e-05, |
| "loss": 1.4809, |
| "step": 33780 |
| }, |
| { |
| "epoch": 1298.6792452830189, |
| "grad_norm": 1.7227294745544, |
| "learning_rate": 2.808634944154176e-05, |
| "loss": 1.4518, |
| "step": 33800 |
| }, |
| { |
| "epoch": 1299.433962264151, |
| "grad_norm": 1.554440422068932, |
| "learning_rate": 2.8046040496419622e-05, |
| "loss": 1.4858, |
| "step": 33820 |
| }, |
| { |
| "epoch": 1300.188679245283, |
| "grad_norm": 1.5684395687858594, |
| "learning_rate": 2.8005751156151996e-05, |
| "loss": 1.4939, |
| "step": 33840 |
| }, |
| { |
| "epoch": 1300.9433962264152, |
| "grad_norm": 1.4791453327586883, |
| "learning_rate": 2.7965481475385922e-05, |
| "loss": 1.4981, |
| "step": 33860 |
| }, |
| { |
| "epoch": 1301.698113207547, |
| "grad_norm": 1.8682361890592045, |
| "learning_rate": 2.792523150874184e-05, |
| "loss": 1.485, |
| "step": 33880 |
| }, |
| { |
| "epoch": 1302.4528301886792, |
| "grad_norm": 1.4376784070576631, |
| "learning_rate": 2.7885001310813394e-05, |
| "loss": 1.4771, |
| "step": 33900 |
| }, |
| { |
| "epoch": 1303.2075471698113, |
| "grad_norm": 1.4919487782728726, |
| "learning_rate": 2.7844790936167448e-05, |
| "loss": 1.4818, |
| "step": 33920 |
| }, |
| { |
| "epoch": 1303.9622641509434, |
| "grad_norm": 1.5926644935407461, |
| "learning_rate": 2.7804600439344004e-05, |
| "loss": 1.481, |
| "step": 33940 |
| }, |
| { |
| "epoch": 1304.7169811320755, |
| "grad_norm": 2.129672326977145, |
| "learning_rate": 2.776442987485605e-05, |
| "loss": 1.4809, |
| "step": 33960 |
| }, |
| { |
| "epoch": 1305.4716981132076, |
| "grad_norm": 1.4661184798946012, |
| "learning_rate": 2.7724279297189564e-05, |
| "loss": 1.4734, |
| "step": 33980 |
| }, |
| { |
| "epoch": 1306.2264150943397, |
| "grad_norm": 1.6422416038082728, |
| "learning_rate": 2.7684148760803404e-05, |
| "loss": 1.4706, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1306.9811320754718, |
| "grad_norm": 1.6788541325557527, |
| "learning_rate": 2.7644038320129247e-05, |
| "loss": 1.4734, |
| "step": 34020 |
| }, |
| { |
| "epoch": 1307.7358490566037, |
| "grad_norm": 1.5820996412366164, |
| "learning_rate": 2.7603948029571546e-05, |
| "loss": 1.4731, |
| "step": 34040 |
| }, |
| { |
| "epoch": 1308.4905660377358, |
| "grad_norm": 1.8093817496261688, |
| "learning_rate": 2.756387794350737e-05, |
| "loss": 1.4876, |
| "step": 34060 |
| }, |
| { |
| "epoch": 1309.245283018868, |
| "grad_norm": 1.4611414622430816, |
| "learning_rate": 2.7523828116286425e-05, |
| "loss": 1.4958, |
| "step": 34080 |
| }, |
| { |
| "epoch": 1310.0, |
| "grad_norm": 1.4982681857066789, |
| "learning_rate": 2.7483798602230905e-05, |
| "loss": 1.4713, |
| "step": 34100 |
| }, |
| { |
| "epoch": 1310.754716981132, |
| "grad_norm": 1.7049190400136933, |
| "learning_rate": 2.744378945563547e-05, |
| "loss": 1.4698, |
| "step": 34120 |
| }, |
| { |
| "epoch": 1311.5094339622642, |
| "grad_norm": 1.465072325468145, |
| "learning_rate": 2.7403800730767165e-05, |
| "loss": 1.4814, |
| "step": 34140 |
| }, |
| { |
| "epoch": 1312.2641509433963, |
| "grad_norm": 1.6806290813940998, |
| "learning_rate": 2.7363832481865326e-05, |
| "loss": 1.4623, |
| "step": 34160 |
| }, |
| { |
| "epoch": 1313.0188679245282, |
| "grad_norm": 1.422949129304357, |
| "learning_rate": 2.7323884763141494e-05, |
| "loss": 1.4798, |
| "step": 34180 |
| }, |
| { |
| "epoch": 1313.7735849056603, |
| "grad_norm": 1.5386955048633302, |
| "learning_rate": 2.728395762877941e-05, |
| "loss": 1.4588, |
| "step": 34200 |
| }, |
| { |
| "epoch": 1314.5283018867924, |
| "grad_norm": 1.5472770555424338, |
| "learning_rate": 2.7244051132934836e-05, |
| "loss": 1.451, |
| "step": 34220 |
| }, |
| { |
| "epoch": 1315.2830188679245, |
| "grad_norm": 1.435168914934391, |
| "learning_rate": 2.72041653297356e-05, |
| "loss": 1.4943, |
| "step": 34240 |
| }, |
| { |
| "epoch": 1316.0377358490566, |
| "grad_norm": 1.4183350034608622, |
| "learning_rate": 2.716430027328143e-05, |
| "loss": 1.4519, |
| "step": 34260 |
| }, |
| { |
| "epoch": 1316.7924528301887, |
| "grad_norm": 1.7134876611489063, |
| "learning_rate": 2.7124456017643914e-05, |
| "loss": 1.4658, |
| "step": 34280 |
| }, |
| { |
| "epoch": 1317.5471698113208, |
| "grad_norm": 1.4042660927164932, |
| "learning_rate": 2.7084632616866437e-05, |
| "loss": 1.4665, |
| "step": 34300 |
| }, |
| { |
| "epoch": 1318.301886792453, |
| "grad_norm": 1.7236176772036846, |
| "learning_rate": 2.7044830124964073e-05, |
| "loss": 1.4598, |
| "step": 34320 |
| }, |
| { |
| "epoch": 1319.0566037735848, |
| "grad_norm": 1.7345912564178498, |
| "learning_rate": 2.7005048595923597e-05, |
| "loss": 1.4941, |
| "step": 34340 |
| }, |
| { |
| "epoch": 1319.811320754717, |
| "grad_norm": 1.6553359599381614, |
| "learning_rate": 2.696528808370328e-05, |
| "loss": 1.448, |
| "step": 34360 |
| }, |
| { |
| "epoch": 1320.566037735849, |
| "grad_norm": 1.654924545197036, |
| "learning_rate": 2.6925548642232916e-05, |
| "loss": 1.453, |
| "step": 34380 |
| }, |
| { |
| "epoch": 1321.3207547169811, |
| "grad_norm": 1.966241914838029, |
| "learning_rate": 2.6885830325413732e-05, |
| "loss": 1.4791, |
| "step": 34400 |
| }, |
| { |
| "epoch": 1322.0754716981132, |
| "grad_norm": 1.613098173730771, |
| "learning_rate": 2.6846133187118266e-05, |
| "loss": 1.4456, |
| "step": 34420 |
| }, |
| { |
| "epoch": 1322.8301886792453, |
| "grad_norm": 1.694164161340185, |
| "learning_rate": 2.6806457281190392e-05, |
| "loss": 1.4697, |
| "step": 34440 |
| }, |
| { |
| "epoch": 1323.5849056603774, |
| "grad_norm": 1.7709910517494127, |
| "learning_rate": 2.6766802661445123e-05, |
| "loss": 1.4767, |
| "step": 34460 |
| }, |
| { |
| "epoch": 1324.3396226415093, |
| "grad_norm": 2.1757270130771547, |
| "learning_rate": 2.672716938166863e-05, |
| "loss": 1.5023, |
| "step": 34480 |
| }, |
| { |
| "epoch": 1325.0943396226414, |
| "grad_norm": 1.618966012864335, |
| "learning_rate": 2.66875574956181e-05, |
| "loss": 1.4459, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1325.8490566037735, |
| "grad_norm": 1.6395370020860782, |
| "learning_rate": 2.6647967057021783e-05, |
| "loss": 1.4716, |
| "step": 34520 |
| }, |
| { |
| "epoch": 1326.6037735849056, |
| "grad_norm": 1.458865429611614, |
| "learning_rate": 2.6608398119578777e-05, |
| "loss": 1.4509, |
| "step": 34540 |
| }, |
| { |
| "epoch": 1327.3584905660377, |
| "grad_norm": 1.8785096977087146, |
| "learning_rate": 2.656885073695903e-05, |
| "loss": 1.4563, |
| "step": 34560 |
| }, |
| { |
| "epoch": 1328.1132075471698, |
| "grad_norm": 1.9390316222323336, |
| "learning_rate": 2.652932496280323e-05, |
| "loss": 1.4851, |
| "step": 34580 |
| }, |
| { |
| "epoch": 1328.867924528302, |
| "grad_norm": 5.310289949887802, |
| "learning_rate": 2.6489820850722802e-05, |
| "loss": 1.4768, |
| "step": 34600 |
| }, |
| { |
| "epoch": 1329.622641509434, |
| "grad_norm": 1.4684731158219795, |
| "learning_rate": 2.6450338454299786e-05, |
| "loss": 1.4516, |
| "step": 34620 |
| }, |
| { |
| "epoch": 1330.377358490566, |
| "grad_norm": 1.639577731583303, |
| "learning_rate": 2.641087782708672e-05, |
| "loss": 1.4654, |
| "step": 34640 |
| }, |
| { |
| "epoch": 1331.132075471698, |
| "grad_norm": 1.6849901015256106, |
| "learning_rate": 2.6371439022606665e-05, |
| "loss": 1.4615, |
| "step": 34660 |
| }, |
| { |
| "epoch": 1331.8867924528302, |
| "grad_norm": 1.619952725687253, |
| "learning_rate": 2.6332022094353024e-05, |
| "loss": 1.4461, |
| "step": 34680 |
| }, |
| { |
| "epoch": 1332.6415094339623, |
| "grad_norm": 1.5608967063706551, |
| "learning_rate": 2.6292627095789594e-05, |
| "loss": 1.4523, |
| "step": 34700 |
| }, |
| { |
| "epoch": 1333.3962264150944, |
| "grad_norm": 1.7568408459896505, |
| "learning_rate": 2.625325408035041e-05, |
| "loss": 1.4758, |
| "step": 34720 |
| }, |
| { |
| "epoch": 1334.1509433962265, |
| "grad_norm": 1.5186845485994895, |
| "learning_rate": 2.6213903101439668e-05, |
| "loss": 1.4527, |
| "step": 34740 |
| }, |
| { |
| "epoch": 1334.9056603773586, |
| "grad_norm": 1.9016010055715276, |
| "learning_rate": 2.6174574212431673e-05, |
| "loss": 1.4708, |
| "step": 34760 |
| }, |
| { |
| "epoch": 1335.6603773584907, |
| "grad_norm": 1.3914584691450766, |
| "learning_rate": 2.6135267466670776e-05, |
| "loss": 1.4519, |
| "step": 34780 |
| }, |
| { |
| "epoch": 1336.4150943396226, |
| "grad_norm": 1.7920706183325235, |
| "learning_rate": 2.6095982917471312e-05, |
| "loss": 1.4551, |
| "step": 34800 |
| }, |
| { |
| "epoch": 1337.1698113207547, |
| "grad_norm": 1.7415199040517522, |
| "learning_rate": 2.6056720618117508e-05, |
| "loss": 1.4618, |
| "step": 34820 |
| }, |
| { |
| "epoch": 1337.9245283018868, |
| "grad_norm": 2.0387577968023423, |
| "learning_rate": 2.6017480621863382e-05, |
| "loss": 1.4336, |
| "step": 34840 |
| }, |
| { |
| "epoch": 1338.6792452830189, |
| "grad_norm": 1.7452335041516622, |
| "learning_rate": 2.5978262981932716e-05, |
| "loss": 1.4845, |
| "step": 34860 |
| }, |
| { |
| "epoch": 1339.433962264151, |
| "grad_norm": 1.8221491527113842, |
| "learning_rate": 2.5939067751518968e-05, |
| "loss": 1.4509, |
| "step": 34880 |
| }, |
| { |
| "epoch": 1340.188679245283, |
| "grad_norm": 1.573534969706598, |
| "learning_rate": 2.58998949837852e-05, |
| "loss": 1.4597, |
| "step": 34900 |
| }, |
| { |
| "epoch": 1340.9433962264152, |
| "grad_norm": 1.7418894779202971, |
| "learning_rate": 2.5860744731864037e-05, |
| "loss": 1.4509, |
| "step": 34920 |
| }, |
| { |
| "epoch": 1341.698113207547, |
| "grad_norm": 2.3533748801857612, |
| "learning_rate": 2.5821617048857514e-05, |
| "loss": 1.4707, |
| "step": 34940 |
| }, |
| { |
| "epoch": 1342.4528301886792, |
| "grad_norm": 1.6384303594662744, |
| "learning_rate": 2.5782511987837087e-05, |
| "loss": 1.4483, |
| "step": 34960 |
| }, |
| { |
| "epoch": 1343.2075471698113, |
| "grad_norm": 1.7437935503570192, |
| "learning_rate": 2.5743429601843493e-05, |
| "loss": 1.4708, |
| "step": 34980 |
| }, |
| { |
| "epoch": 1343.9622641509434, |
| "grad_norm": 1.6299173329516294, |
| "learning_rate": 2.5704369943886763e-05, |
| "loss": 1.4487, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1344.7169811320755, |
| "grad_norm": 1.5340708358576824, |
| "learning_rate": 2.5665333066946082e-05, |
| "loss": 1.4659, |
| "step": 35020 |
| }, |
| { |
| "epoch": 1345.4716981132076, |
| "grad_norm": 1.70280338168885, |
| "learning_rate": 2.5626319023969715e-05, |
| "loss": 1.4547, |
| "step": 35040 |
| }, |
| { |
| "epoch": 1346.2264150943397, |
| "grad_norm": 1.6585665666032239, |
| "learning_rate": 2.558732786787497e-05, |
| "loss": 1.4514, |
| "step": 35060 |
| }, |
| { |
| "epoch": 1346.9811320754718, |
| "grad_norm": 1.562613257380082, |
| "learning_rate": 2.5548359651548126e-05, |
| "loss": 1.4661, |
| "step": 35080 |
| }, |
| { |
| "epoch": 1347.7358490566037, |
| "grad_norm": 1.7392138600300024, |
| "learning_rate": 2.550941442784431e-05, |
| "loss": 1.4546, |
| "step": 35100 |
| }, |
| { |
| "epoch": 1348.4905660377358, |
| "grad_norm": 1.9111375288571992, |
| "learning_rate": 2.5470492249587522e-05, |
| "loss": 1.4478, |
| "step": 35120 |
| }, |
| { |
| "epoch": 1349.245283018868, |
| "grad_norm": 1.4950805686503206, |
| "learning_rate": 2.5431593169570446e-05, |
| "loss": 1.4535, |
| "step": 35140 |
| }, |
| { |
| "epoch": 1350.0, |
| "grad_norm": 2.553809298230812, |
| "learning_rate": 2.539271724055444e-05, |
| "loss": 1.464, |
| "step": 35160 |
| }, |
| { |
| "epoch": 1350.754716981132, |
| "grad_norm": 1.562798272416066, |
| "learning_rate": 2.5353864515269525e-05, |
| "loss": 1.4665, |
| "step": 35180 |
| }, |
| { |
| "epoch": 1351.5094339622642, |
| "grad_norm": 1.5956415565820565, |
| "learning_rate": 2.531503504641416e-05, |
| "loss": 1.4174, |
| "step": 35200 |
| }, |
| { |
| "epoch": 1352.2641509433963, |
| "grad_norm": 1.745867042261029, |
| "learning_rate": 2.5276228886655333e-05, |
| "loss": 1.4738, |
| "step": 35220 |
| }, |
| { |
| "epoch": 1353.0188679245282, |
| "grad_norm": 1.8454370598772634, |
| "learning_rate": 2.5237446088628384e-05, |
| "loss": 1.4407, |
| "step": 35240 |
| }, |
| { |
| "epoch": 1353.7735849056603, |
| "grad_norm": 1.70704191729437, |
| "learning_rate": 2.5198686704936945e-05, |
| "loss": 1.4617, |
| "step": 35260 |
| }, |
| { |
| "epoch": 1354.5283018867924, |
| "grad_norm": 1.410719238952515, |
| "learning_rate": 2.5159950788152942e-05, |
| "loss": 1.4397, |
| "step": 35280 |
| }, |
| { |
| "epoch": 1355.2830188679245, |
| "grad_norm": 1.811804083528806, |
| "learning_rate": 2.512123839081642e-05, |
| "loss": 1.443, |
| "step": 35300 |
| }, |
| { |
| "epoch": 1356.0377358490566, |
| "grad_norm": 1.659319824434148, |
| "learning_rate": 2.508254956543557e-05, |
| "loss": 1.4577, |
| "step": 35320 |
| }, |
| { |
| "epoch": 1356.7924528301887, |
| "grad_norm": 1.5084615900612242, |
| "learning_rate": 2.504388436448657e-05, |
| "loss": 1.4702, |
| "step": 35340 |
| }, |
| { |
| "epoch": 1357.5471698113208, |
| "grad_norm": 1.6272545133599885, |
| "learning_rate": 2.500524284041357e-05, |
| "loss": 1.4397, |
| "step": 35360 |
| }, |
| { |
| "epoch": 1358.301886792453, |
| "grad_norm": 1.470645864952112, |
| "learning_rate": 2.4966625045628615e-05, |
| "loss": 1.4435, |
| "step": 35380 |
| }, |
| { |
| "epoch": 1359.0566037735848, |
| "grad_norm": 1.455775463587072, |
| "learning_rate": 2.4928031032511544e-05, |
| "loss": 1.4554, |
| "step": 35400 |
| }, |
| { |
| "epoch": 1359.811320754717, |
| "grad_norm": 1.6787988136879601, |
| "learning_rate": 2.4889460853409974e-05, |
| "loss": 1.4692, |
| "step": 35420 |
| }, |
| { |
| "epoch": 1360.566037735849, |
| "grad_norm": 1.5640507822516196, |
| "learning_rate": 2.485091456063916e-05, |
| "loss": 1.4528, |
| "step": 35440 |
| }, |
| { |
| "epoch": 1361.3207547169811, |
| "grad_norm": 1.396621608357886, |
| "learning_rate": 2.4812392206481945e-05, |
| "loss": 1.4371, |
| "step": 35460 |
| }, |
| { |
| "epoch": 1362.0754716981132, |
| "grad_norm": 1.8537494645554213, |
| "learning_rate": 2.477389384318876e-05, |
| "loss": 1.4395, |
| "step": 35480 |
| }, |
| { |
| "epoch": 1362.8301886792453, |
| "grad_norm": 1.512928732642698, |
| "learning_rate": 2.4735419522977467e-05, |
| "loss": 1.4914, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1363.5849056603774, |
| "grad_norm": 1.6340922613193214, |
| "learning_rate": 2.46969692980333e-05, |
| "loss": 1.4654, |
| "step": 35520 |
| }, |
| { |
| "epoch": 1364.3396226415093, |
| "grad_norm": 1.5378015561259157, |
| "learning_rate": 2.465854322050881e-05, |
| "loss": 1.4246, |
| "step": 35540 |
| }, |
| { |
| "epoch": 1365.0943396226414, |
| "grad_norm": 1.8471949838761705, |
| "learning_rate": 2.462014134252384e-05, |
| "loss": 1.4386, |
| "step": 35560 |
| }, |
| { |
| "epoch": 1365.8490566037735, |
| "grad_norm": 2.139477793232749, |
| "learning_rate": 2.4581763716165345e-05, |
| "loss": 1.4314, |
| "step": 35580 |
| }, |
| { |
| "epoch": 1366.6037735849056, |
| "grad_norm": 1.5366713623147805, |
| "learning_rate": 2.454341039348746e-05, |
| "loss": 1.4514, |
| "step": 35600 |
| }, |
| { |
| "epoch": 1367.3584905660377, |
| "grad_norm": 2.6632963736452018, |
| "learning_rate": 2.4505081426511286e-05, |
| "loss": 1.4244, |
| "step": 35620 |
| }, |
| { |
| "epoch": 1368.1132075471698, |
| "grad_norm": 1.7507517183403924, |
| "learning_rate": 2.4466776867224914e-05, |
| "loss": 1.4401, |
| "step": 35640 |
| }, |
| { |
| "epoch": 1368.867924528302, |
| "grad_norm": 1.7263277038654796, |
| "learning_rate": 2.4428496767583355e-05, |
| "loss": 1.4569, |
| "step": 35660 |
| }, |
| { |
| "epoch": 1369.622641509434, |
| "grad_norm": 1.664393561735168, |
| "learning_rate": 2.4390241179508404e-05, |
| "loss": 1.4387, |
| "step": 35680 |
| }, |
| { |
| "epoch": 1370.377358490566, |
| "grad_norm": 1.723479394345894, |
| "learning_rate": 2.435201015488865e-05, |
| "loss": 1.4411, |
| "step": 35700 |
| }, |
| { |
| "epoch": 1371.132075471698, |
| "grad_norm": 1.434976866992101, |
| "learning_rate": 2.4313803745579318e-05, |
| "loss": 1.4284, |
| "step": 35720 |
| }, |
| { |
| "epoch": 1371.8867924528302, |
| "grad_norm": 1.4785579710843697, |
| "learning_rate": 2.4275622003402272e-05, |
| "loss": 1.442, |
| "step": 35740 |
| }, |
| { |
| "epoch": 1372.6415094339623, |
| "grad_norm": 1.4377021405339876, |
| "learning_rate": 2.4237464980145938e-05, |
| "loss": 1.4585, |
| "step": 35760 |
| }, |
| { |
| "epoch": 1373.3962264150944, |
| "grad_norm": 1.4439423657468624, |
| "learning_rate": 2.4199332727565162e-05, |
| "loss": 1.4415, |
| "step": 35780 |
| }, |
| { |
| "epoch": 1374.1509433962265, |
| "grad_norm": 1.6286933767716432, |
| "learning_rate": 2.4161225297381257e-05, |
| "loss": 1.4191, |
| "step": 35800 |
| }, |
| { |
| "epoch": 1374.9056603773586, |
| "grad_norm": 1.8061947503706157, |
| "learning_rate": 2.412314274128181e-05, |
| "loss": 1.4328, |
| "step": 35820 |
| }, |
| { |
| "epoch": 1375.6603773584907, |
| "grad_norm": 1.4892866827277318, |
| "learning_rate": 2.408508511092069e-05, |
| "loss": 1.426, |
| "step": 35840 |
| }, |
| { |
| "epoch": 1376.4150943396226, |
| "grad_norm": 2.1944517889347206, |
| "learning_rate": 2.4047052457917976e-05, |
| "loss": 1.4383, |
| "step": 35860 |
| }, |
| { |
| "epoch": 1377.1698113207547, |
| "grad_norm": 1.657764612011157, |
| "learning_rate": 2.4009044833859837e-05, |
| "loss": 1.4335, |
| "step": 35880 |
| }, |
| { |
| "epoch": 1377.9245283018868, |
| "grad_norm": 1.6641457685651413, |
| "learning_rate": 2.397106229029853e-05, |
| "loss": 1.449, |
| "step": 35900 |
| }, |
| { |
| "epoch": 1378.6792452830189, |
| "grad_norm": 1.6180638342974163, |
| "learning_rate": 2.3933104878752255e-05, |
| "loss": 1.4531, |
| "step": 35920 |
| }, |
| { |
| "epoch": 1379.433962264151, |
| "grad_norm": 1.4294375910343768, |
| "learning_rate": 2.3895172650705135e-05, |
| "loss": 1.394, |
| "step": 35940 |
| }, |
| { |
| "epoch": 1380.188679245283, |
| "grad_norm": 1.8277501896092694, |
| "learning_rate": 2.3857265657607175e-05, |
| "loss": 1.3907, |
| "step": 35960 |
| }, |
| { |
| "epoch": 1380.9433962264152, |
| "grad_norm": 1.498142714401942, |
| "learning_rate": 2.381938395087408e-05, |
| "loss": 1.427, |
| "step": 35980 |
| }, |
| { |
| "epoch": 1381.698113207547, |
| "grad_norm": 1.6446695245077154, |
| "learning_rate": 2.3781527581887328e-05, |
| "loss": 1.4267, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1382.4528301886792, |
| "grad_norm": 2.126047948088478, |
| "learning_rate": 2.3743696601993973e-05, |
| "loss": 1.4513, |
| "step": 36020 |
| }, |
| { |
| "epoch": 1383.2075471698113, |
| "grad_norm": 1.5906073184513956, |
| "learning_rate": 2.3705891062506686e-05, |
| "loss": 1.4468, |
| "step": 36040 |
| }, |
| { |
| "epoch": 1383.9622641509434, |
| "grad_norm": 1.6659051387541641, |
| "learning_rate": 2.366811101470359e-05, |
| "loss": 1.4397, |
| "step": 36060 |
| }, |
| { |
| "epoch": 1384.7169811320755, |
| "grad_norm": 1.7950603394090476, |
| "learning_rate": 2.363035650982822e-05, |
| "loss": 1.4314, |
| "step": 36080 |
| }, |
| { |
| "epoch": 1385.4716981132076, |
| "grad_norm": 1.7227503126171113, |
| "learning_rate": 2.359262759908953e-05, |
| "loss": 1.4305, |
| "step": 36100 |
| }, |
| { |
| "epoch": 1386.2264150943397, |
| "grad_norm": 1.5686879263532916, |
| "learning_rate": 2.355492433366169e-05, |
| "loss": 1.4606, |
| "step": 36120 |
| }, |
| { |
| "epoch": 1386.9811320754718, |
| "grad_norm": 1.6010165898998077, |
| "learning_rate": 2.3517246764684138e-05, |
| "loss": 1.441, |
| "step": 36140 |
| }, |
| { |
| "epoch": 1387.7358490566037, |
| "grad_norm": 3.491710911332113, |
| "learning_rate": 2.3479594943261428e-05, |
| "loss": 1.4341, |
| "step": 36160 |
| }, |
| { |
| "epoch": 1388.4905660377358, |
| "grad_norm": 1.6931483101249463, |
| "learning_rate": 2.3441968920463175e-05, |
| "loss": 1.4059, |
| "step": 36180 |
| }, |
| { |
| "epoch": 1389.245283018868, |
| "grad_norm": 1.5814288881168233, |
| "learning_rate": 2.340436874732406e-05, |
| "loss": 1.4494, |
| "step": 36200 |
| }, |
| { |
| "epoch": 1390.0, |
| "grad_norm": 1.7550476965929234, |
| "learning_rate": 2.3366794474843636e-05, |
| "loss": 1.4461, |
| "step": 36220 |
| }, |
| { |
| "epoch": 1390.754716981132, |
| "grad_norm": 1.6037325519139611, |
| "learning_rate": 2.332924615398638e-05, |
| "loss": 1.4324, |
| "step": 36240 |
| }, |
| { |
| "epoch": 1391.5094339622642, |
| "grad_norm": 1.5872440902961078, |
| "learning_rate": 2.3291723835681542e-05, |
| "loss": 1.4229, |
| "step": 36260 |
| }, |
| { |
| "epoch": 1392.2641509433963, |
| "grad_norm": 1.6075974238110624, |
| "learning_rate": 2.3254227570823088e-05, |
| "loss": 1.4319, |
| "step": 36280 |
| }, |
| { |
| "epoch": 1393.0188679245282, |
| "grad_norm": 1.664082496030561, |
| "learning_rate": 2.3216757410269688e-05, |
| "loss": 1.4133, |
| "step": 36300 |
| }, |
| { |
| "epoch": 1393.7735849056603, |
| "grad_norm": 1.868185444331913, |
| "learning_rate": 2.3179313404844556e-05, |
| "loss": 1.4303, |
| "step": 36320 |
| }, |
| { |
| "epoch": 1394.5283018867924, |
| "grad_norm": 1.5709216565532331, |
| "learning_rate": 2.314189560533549e-05, |
| "loss": 1.4136, |
| "step": 36340 |
| }, |
| { |
| "epoch": 1395.2830188679245, |
| "grad_norm": 1.6461901097795721, |
| "learning_rate": 2.3104504062494673e-05, |
| "loss": 1.4359, |
| "step": 36360 |
| }, |
| { |
| "epoch": 1396.0377358490566, |
| "grad_norm": 1.4737937485692245, |
| "learning_rate": 2.306713882703874e-05, |
| "loss": 1.4417, |
| "step": 36380 |
| }, |
| { |
| "epoch": 1396.7924528301887, |
| "grad_norm": 1.62600468664324, |
| "learning_rate": 2.3029799949648578e-05, |
| "loss": 1.4471, |
| "step": 36400 |
| }, |
| { |
| "epoch": 1397.5471698113208, |
| "grad_norm": 2.4473530264247914, |
| "learning_rate": 2.2992487480969405e-05, |
| "loss": 1.4239, |
| "step": 36420 |
| }, |
| { |
| "epoch": 1398.301886792453, |
| "grad_norm": 1.451788707298732, |
| "learning_rate": 2.295520147161054e-05, |
| "loss": 1.4213, |
| "step": 36440 |
| }, |
| { |
| "epoch": 1399.0566037735848, |
| "grad_norm": 1.6561495842890779, |
| "learning_rate": 2.2917941972145448e-05, |
| "loss": 1.4289, |
| "step": 36460 |
| }, |
| { |
| "epoch": 1399.811320754717, |
| "grad_norm": 1.7199804756862742, |
| "learning_rate": 2.288070903311165e-05, |
| "loss": 1.4089, |
| "step": 36480 |
| }, |
| { |
| "epoch": 1400.566037735849, |
| "grad_norm": 1.3767860468778748, |
| "learning_rate": 2.2843502705010602e-05, |
| "loss": 1.43, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1401.3207547169811, |
| "grad_norm": 1.629044071752712, |
| "learning_rate": 2.2806323038307724e-05, |
| "loss": 1.4353, |
| "step": 36520 |
| }, |
| { |
| "epoch": 1402.0754716981132, |
| "grad_norm": 1.5402931594748135, |
| "learning_rate": 2.2769170083432224e-05, |
| "loss": 1.4002, |
| "step": 36540 |
| }, |
| { |
| "epoch": 1402.8301886792453, |
| "grad_norm": 1.6851610727649395, |
| "learning_rate": 2.273204389077707e-05, |
| "loss": 1.4303, |
| "step": 36560 |
| }, |
| { |
| "epoch": 1403.5849056603774, |
| "grad_norm": 1.6351932980143555, |
| "learning_rate": 2.2694944510698992e-05, |
| "loss": 1.4324, |
| "step": 36580 |
| }, |
| { |
| "epoch": 1404.3396226415093, |
| "grad_norm": 1.3360407707287731, |
| "learning_rate": 2.265787199351829e-05, |
| "loss": 1.4296, |
| "step": 36600 |
| }, |
| { |
| "epoch": 1405.0943396226414, |
| "grad_norm": 1.6229856547835415, |
| "learning_rate": 2.2620826389518878e-05, |
| "loss": 1.4132, |
| "step": 36620 |
| }, |
| { |
| "epoch": 1405.8490566037735, |
| "grad_norm": 1.5762261444691155, |
| "learning_rate": 2.258380774894813e-05, |
| "loss": 1.4189, |
| "step": 36640 |
| }, |
| { |
| "epoch": 1406.6037735849056, |
| "grad_norm": 1.6330786646124598, |
| "learning_rate": 2.254681612201684e-05, |
| "loss": 1.4229, |
| "step": 36660 |
| }, |
| { |
| "epoch": 1407.3584905660377, |
| "grad_norm": 1.6074464661210397, |
| "learning_rate": 2.2509851558899212e-05, |
| "loss": 1.4438, |
| "step": 36680 |
| }, |
| { |
| "epoch": 1408.1132075471698, |
| "grad_norm": 2.912277484153031, |
| "learning_rate": 2.2472914109732686e-05, |
| "loss": 1.4195, |
| "step": 36700 |
| }, |
| { |
| "epoch": 1408.867924528302, |
| "grad_norm": 1.6223740817719732, |
| "learning_rate": 2.2436003824617963e-05, |
| "loss": 1.4099, |
| "step": 36720 |
| }, |
| { |
| "epoch": 1409.622641509434, |
| "grad_norm": 1.766781857646511, |
| "learning_rate": 2.2399120753618896e-05, |
| "loss": 1.4168, |
| "step": 36740 |
| }, |
| { |
| "epoch": 1410.377358490566, |
| "grad_norm": 1.5296965456959557, |
| "learning_rate": 2.2362264946762392e-05, |
| "loss": 1.4118, |
| "step": 36760 |
| }, |
| { |
| "epoch": 1411.132075471698, |
| "grad_norm": 1.6610041335566879, |
| "learning_rate": 2.232543645403842e-05, |
| "loss": 1.4166, |
| "step": 36780 |
| }, |
| { |
| "epoch": 1411.8867924528302, |
| "grad_norm": 1.5205836616470723, |
| "learning_rate": 2.228863532539987e-05, |
| "loss": 1.4246, |
| "step": 36800 |
| }, |
| { |
| "epoch": 1412.6415094339623, |
| "grad_norm": 2.018497485986653, |
| "learning_rate": 2.2251861610762556e-05, |
| "loss": 1.4219, |
| "step": 36820 |
| }, |
| { |
| "epoch": 1413.3962264150944, |
| "grad_norm": 1.495393210690481, |
| "learning_rate": 2.221511536000505e-05, |
| "loss": 1.4201, |
| "step": 36840 |
| }, |
| { |
| "epoch": 1414.1509433962265, |
| "grad_norm": 1.5817177891641536, |
| "learning_rate": 2.2178396622968714e-05, |
| "loss": 1.4301, |
| "step": 36860 |
| }, |
| { |
| "epoch": 1414.9056603773586, |
| "grad_norm": 1.5602680564678848, |
| "learning_rate": 2.2141705449457588e-05, |
| "loss": 1.4246, |
| "step": 36880 |
| }, |
| { |
| "epoch": 1415.6603773584907, |
| "grad_norm": 1.5687723652001904, |
| "learning_rate": 2.2105041889238327e-05, |
| "loss": 1.4291, |
| "step": 36900 |
| }, |
| { |
| "epoch": 1416.4150943396226, |
| "grad_norm": 1.6516438298835592, |
| "learning_rate": 2.2068405992040127e-05, |
| "loss": 1.4186, |
| "step": 36920 |
| }, |
| { |
| "epoch": 1417.1698113207547, |
| "grad_norm": 1.6972963029742167, |
| "learning_rate": 2.2031797807554646e-05, |
| "loss": 1.4026, |
| "step": 36940 |
| }, |
| { |
| "epoch": 1417.9245283018868, |
| "grad_norm": 1.6936263753645908, |
| "learning_rate": 2.1995217385435962e-05, |
| "loss": 1.3882, |
| "step": 36960 |
| }, |
| { |
| "epoch": 1418.6792452830189, |
| "grad_norm": 1.697372534880421, |
| "learning_rate": 2.1958664775300517e-05, |
| "loss": 1.4228, |
| "step": 36980 |
| }, |
| { |
| "epoch": 1419.433962264151, |
| "grad_norm": 1.4972148012217616, |
| "learning_rate": 2.192214002672703e-05, |
| "loss": 1.3961, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1420.188679245283, |
| "grad_norm": 1.6926137674291781, |
| "learning_rate": 2.1885643189256404e-05, |
| "loss": 1.4005, |
| "step": 37020 |
| }, |
| { |
| "epoch": 1420.9433962264152, |
| "grad_norm": 1.6182171449313734, |
| "learning_rate": 2.1849174312391693e-05, |
| "loss": 1.3939, |
| "step": 37040 |
| }, |
| { |
| "epoch": 1421.698113207547, |
| "grad_norm": 1.6235165658387523, |
| "learning_rate": 2.181273344559802e-05, |
| "loss": 1.414, |
| "step": 37060 |
| }, |
| { |
| "epoch": 1422.4528301886792, |
| "grad_norm": 1.7354641628437306, |
| "learning_rate": 2.1776320638302533e-05, |
| "loss": 1.4039, |
| "step": 37080 |
| }, |
| { |
| "epoch": 1423.2075471698113, |
| "grad_norm": 1.7598777416483105, |
| "learning_rate": 2.1739935939894332e-05, |
| "loss": 1.4319, |
| "step": 37100 |
| }, |
| { |
| "epoch": 1423.9622641509434, |
| "grad_norm": 1.6119817066147992, |
| "learning_rate": 2.170357939972436e-05, |
| "loss": 1.4083, |
| "step": 37120 |
| }, |
| { |
| "epoch": 1424.7169811320755, |
| "grad_norm": 1.5177195143064601, |
| "learning_rate": 2.1667251067105383e-05, |
| "loss": 1.4084, |
| "step": 37140 |
| }, |
| { |
| "epoch": 1425.4716981132076, |
| "grad_norm": 1.6531623474873094, |
| "learning_rate": 2.1630950991311884e-05, |
| "loss": 1.3961, |
| "step": 37160 |
| }, |
| { |
| "epoch": 1426.2264150943397, |
| "grad_norm": 1.9866189092494402, |
| "learning_rate": 2.159467922158006e-05, |
| "loss": 1.4205, |
| "step": 37180 |
| }, |
| { |
| "epoch": 1426.9811320754718, |
| "grad_norm": 1.6409536663163726, |
| "learning_rate": 2.15584358071077e-05, |
| "loss": 1.4065, |
| "step": 37200 |
| }, |
| { |
| "epoch": 1427.7358490566037, |
| "grad_norm": 1.5972136032609723, |
| "learning_rate": 2.1522220797054117e-05, |
| "loss": 1.3999, |
| "step": 37220 |
| }, |
| { |
| "epoch": 1428.4905660377358, |
| "grad_norm": 1.7176147072411343, |
| "learning_rate": 2.1486034240540095e-05, |
| "loss": 1.4077, |
| "step": 37240 |
| }, |
| { |
| "epoch": 1429.245283018868, |
| "grad_norm": 2.2258677114656655, |
| "learning_rate": 2.1449876186647868e-05, |
| "loss": 1.4174, |
| "step": 37260 |
| }, |
| { |
| "epoch": 1430.0, |
| "grad_norm": 1.702909141767608, |
| "learning_rate": 2.1413746684420938e-05, |
| "loss": 1.3745, |
| "step": 37280 |
| }, |
| { |
| "epoch": 1430.754716981132, |
| "grad_norm": 2.1998514759828915, |
| "learning_rate": 2.1377645782864164e-05, |
| "loss": 1.421, |
| "step": 37300 |
| }, |
| { |
| "epoch": 1431.5094339622642, |
| "grad_norm": 1.4634078104497494, |
| "learning_rate": 2.134157353094355e-05, |
| "loss": 1.4219, |
| "step": 37320 |
| }, |
| { |
| "epoch": 1432.2641509433963, |
| "grad_norm": 1.7232746233155163, |
| "learning_rate": 2.1305529977586244e-05, |
| "loss": 1.4236, |
| "step": 37340 |
| }, |
| { |
| "epoch": 1433.0188679245282, |
| "grad_norm": 1.7372503788909404, |
| "learning_rate": 2.1269515171680505e-05, |
| "loss": 1.391, |
| "step": 37360 |
| }, |
| { |
| "epoch": 1433.7735849056603, |
| "grad_norm": 1.8471672382610358, |
| "learning_rate": 2.1233529162075586e-05, |
| "loss": 1.4087, |
| "step": 37380 |
| }, |
| { |
| "epoch": 1434.5283018867924, |
| "grad_norm": 1.6217649320497987, |
| "learning_rate": 2.1197571997581665e-05, |
| "loss": 1.4239, |
| "step": 37400 |
| }, |
| { |
| "epoch": 1435.2830188679245, |
| "grad_norm": 1.5296478429731253, |
| "learning_rate": 2.1161643726969807e-05, |
| "loss": 1.3958, |
| "step": 37420 |
| }, |
| { |
| "epoch": 1436.0377358490566, |
| "grad_norm": 1.560888539193858, |
| "learning_rate": 2.1125744398971865e-05, |
| "loss": 1.3979, |
| "step": 37440 |
| }, |
| { |
| "epoch": 1436.7924528301887, |
| "grad_norm": 4.524184692042414, |
| "learning_rate": 2.1089874062280467e-05, |
| "loss": 1.4068, |
| "step": 37460 |
| }, |
| { |
| "epoch": 1437.5471698113208, |
| "grad_norm": 1.9907802095010148, |
| "learning_rate": 2.1054032765548943e-05, |
| "loss": 1.4128, |
| "step": 37480 |
| }, |
| { |
| "epoch": 1438.301886792453, |
| "grad_norm": 1.5158745007996666, |
| "learning_rate": 2.1018220557391152e-05, |
| "loss": 1.4206, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1439.0566037735848, |
| "grad_norm": 1.922346737191028, |
| "learning_rate": 2.0982437486381567e-05, |
| "loss": 1.4155, |
| "step": 37520 |
| }, |
| { |
| "epoch": 1439.811320754717, |
| "grad_norm": 1.6671399833246607, |
| "learning_rate": 2.094668360105509e-05, |
| "loss": 1.4052, |
| "step": 37540 |
| }, |
| { |
| "epoch": 1440.566037735849, |
| "grad_norm": 1.6551308179910114, |
| "learning_rate": 2.0910958949907086e-05, |
| "loss": 1.3986, |
| "step": 37560 |
| }, |
| { |
| "epoch": 1441.3207547169811, |
| "grad_norm": 1.5091823329163863, |
| "learning_rate": 2.087526358139325e-05, |
| "loss": 1.3842, |
| "step": 37580 |
| }, |
| { |
| "epoch": 1442.0754716981132, |
| "grad_norm": 1.5775979624954766, |
| "learning_rate": 2.0839597543929547e-05, |
| "loss": 1.3695, |
| "step": 37600 |
| }, |
| { |
| "epoch": 1442.8301886792453, |
| "grad_norm": 1.65888589339979, |
| "learning_rate": 2.0803960885892166e-05, |
| "loss": 1.4212, |
| "step": 37620 |
| }, |
| { |
| "epoch": 1443.5849056603774, |
| "grad_norm": 1.7548961067858515, |
| "learning_rate": 2.0768353655617437e-05, |
| "loss": 1.4113, |
| "step": 37640 |
| }, |
| { |
| "epoch": 1444.3396226415093, |
| "grad_norm": 1.6568444527605615, |
| "learning_rate": 2.0732775901401787e-05, |
| "loss": 1.4097, |
| "step": 37660 |
| }, |
| { |
| "epoch": 1445.0943396226414, |
| "grad_norm": 1.569007851847122, |
| "learning_rate": 2.0697227671501686e-05, |
| "loss": 1.4025, |
| "step": 37680 |
| }, |
| { |
| "epoch": 1445.8490566037735, |
| "grad_norm": 2.054795832820314, |
| "learning_rate": 2.0661709014133507e-05, |
| "loss": 1.379, |
| "step": 37700 |
| }, |
| { |
| "epoch": 1446.6037735849056, |
| "grad_norm": 1.825419627550906, |
| "learning_rate": 2.0626219977473546e-05, |
| "loss": 1.4141, |
| "step": 37720 |
| }, |
| { |
| "epoch": 1447.3584905660377, |
| "grad_norm": 1.5477059924334846, |
| "learning_rate": 2.05907606096579e-05, |
| "loss": 1.3764, |
| "step": 37740 |
| }, |
| { |
| "epoch": 1448.1132075471698, |
| "grad_norm": 1.6756003225697567, |
| "learning_rate": 2.0555330958782456e-05, |
| "loss": 1.3943, |
| "step": 37760 |
| }, |
| { |
| "epoch": 1448.867924528302, |
| "grad_norm": 1.6181024178942431, |
| "learning_rate": 2.0519931072902775e-05, |
| "loss": 1.3828, |
| "step": 37780 |
| }, |
| { |
| "epoch": 1449.622641509434, |
| "grad_norm": 1.5075137093108786, |
| "learning_rate": 2.0484561000034048e-05, |
| "loss": 1.3993, |
| "step": 37800 |
| }, |
| { |
| "epoch": 1450.377358490566, |
| "grad_norm": 1.7611806649373956, |
| "learning_rate": 2.0449220788151017e-05, |
| "loss": 1.4025, |
| "step": 37820 |
| }, |
| { |
| "epoch": 1451.132075471698, |
| "grad_norm": 1.5899695714047575, |
| "learning_rate": 2.0413910485187918e-05, |
| "loss": 1.4011, |
| "step": 37840 |
| }, |
| { |
| "epoch": 1451.8867924528302, |
| "grad_norm": 1.9368764939203147, |
| "learning_rate": 2.0378630139038477e-05, |
| "loss": 1.3914, |
| "step": 37860 |
| }, |
| { |
| "epoch": 1452.6415094339623, |
| "grad_norm": 2.2298811573087938, |
| "learning_rate": 2.0343379797555718e-05, |
| "loss": 1.4096, |
| "step": 37880 |
| }, |
| { |
| "epoch": 1453.3962264150944, |
| "grad_norm": 1.8812446095800621, |
| "learning_rate": 2.0308159508552003e-05, |
| "loss": 1.3994, |
| "step": 37900 |
| }, |
| { |
| "epoch": 1454.1509433962265, |
| "grad_norm": 1.546640573436516, |
| "learning_rate": 2.0272969319798898e-05, |
| "loss": 1.3901, |
| "step": 37920 |
| }, |
| { |
| "epoch": 1454.9056603773586, |
| "grad_norm": 1.6385943250375863, |
| "learning_rate": 2.0237809279027187e-05, |
| "loss": 1.3954, |
| "step": 37940 |
| }, |
| { |
| "epoch": 1455.6603773584907, |
| "grad_norm": 1.5716563974399815, |
| "learning_rate": 2.0202679433926757e-05, |
| "loss": 1.3935, |
| "step": 37960 |
| }, |
| { |
| "epoch": 1456.4150943396226, |
| "grad_norm": 1.735984609092, |
| "learning_rate": 2.0167579832146505e-05, |
| "loss": 1.4118, |
| "step": 37980 |
| }, |
| { |
| "epoch": 1457.1698113207547, |
| "grad_norm": 1.7313373844094564, |
| "learning_rate": 2.013251052129433e-05, |
| "loss": 1.3767, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1457.9245283018868, |
| "grad_norm": 1.7960534040522838, |
| "learning_rate": 2.0097471548937024e-05, |
| "loss": 1.3803, |
| "step": 38020 |
| }, |
| { |
| "epoch": 1458.6792452830189, |
| "grad_norm": 2.5283310401144434, |
| "learning_rate": 2.0062462962600258e-05, |
| "loss": 1.3763, |
| "step": 38040 |
| }, |
| { |
| "epoch": 1459.433962264151, |
| "grad_norm": 1.5401697781512245, |
| "learning_rate": 2.0027484809768506e-05, |
| "loss": 1.3768, |
| "step": 38060 |
| }, |
| { |
| "epoch": 1460.188679245283, |
| "grad_norm": 2.291171375246112, |
| "learning_rate": 1.9992537137884905e-05, |
| "loss": 1.389, |
| "step": 38080 |
| }, |
| { |
| "epoch": 1460.9433962264152, |
| "grad_norm": 1.6878149956470094, |
| "learning_rate": 1.9957619994351278e-05, |
| "loss": 1.3978, |
| "step": 38100 |
| }, |
| { |
| "epoch": 1461.698113207547, |
| "grad_norm": 1.9279881821004916, |
| "learning_rate": 1.9922733426528033e-05, |
| "loss": 1.3576, |
| "step": 38120 |
| }, |
| { |
| "epoch": 1462.4528301886792, |
| "grad_norm": 1.6593792348690906, |
| "learning_rate": 1.9887877481734122e-05, |
| "loss": 1.3827, |
| "step": 38140 |
| }, |
| { |
| "epoch": 1463.2075471698113, |
| "grad_norm": 1.6870370599265458, |
| "learning_rate": 1.9853052207246967e-05, |
| "loss": 1.3498, |
| "step": 38160 |
| }, |
| { |
| "epoch": 1463.9622641509434, |
| "grad_norm": 1.5201114526632646, |
| "learning_rate": 1.981825765030236e-05, |
| "loss": 1.3972, |
| "step": 38180 |
| }, |
| { |
| "epoch": 1464.7169811320755, |
| "grad_norm": 1.676738216954013, |
| "learning_rate": 1.9783493858094444e-05, |
| "loss": 1.3751, |
| "step": 38200 |
| }, |
| { |
| "epoch": 1465.4716981132076, |
| "grad_norm": 1.7174190166635537, |
| "learning_rate": 1.9748760877775622e-05, |
| "loss": 1.3723, |
| "step": 38220 |
| }, |
| { |
| "epoch": 1466.2264150943397, |
| "grad_norm": 1.6832142484740018, |
| "learning_rate": 1.9714058756456533e-05, |
| "loss": 1.383, |
| "step": 38240 |
| }, |
| { |
| "epoch": 1466.9811320754718, |
| "grad_norm": 1.6409050107329164, |
| "learning_rate": 1.9679387541205946e-05, |
| "loss": 1.3868, |
| "step": 38260 |
| }, |
| { |
| "epoch": 1467.7358490566037, |
| "grad_norm": 1.870348325922022, |
| "learning_rate": 1.96447472790507e-05, |
| "loss": 1.4093, |
| "step": 38280 |
| }, |
| { |
| "epoch": 1468.4905660377358, |
| "grad_norm": 1.622451000807429, |
| "learning_rate": 1.9610138016975643e-05, |
| "loss": 1.3908, |
| "step": 38300 |
| }, |
| { |
| "epoch": 1469.245283018868, |
| "grad_norm": 1.5580142525601877, |
| "learning_rate": 1.9575559801923602e-05, |
| "loss": 1.3519, |
| "step": 38320 |
| }, |
| { |
| "epoch": 1470.0, |
| "grad_norm": 5.308564996067893, |
| "learning_rate": 1.95410126807953e-05, |
| "loss": 1.3806, |
| "step": 38340 |
| }, |
| { |
| "epoch": 1470.754716981132, |
| "grad_norm": 1.697128624848967, |
| "learning_rate": 1.9506496700449247e-05, |
| "loss": 1.4021, |
| "step": 38360 |
| }, |
| { |
| "epoch": 1471.5094339622642, |
| "grad_norm": 1.6407506564774348, |
| "learning_rate": 1.9472011907701736e-05, |
| "loss": 1.3889, |
| "step": 38380 |
| }, |
| { |
| "epoch": 1472.2641509433963, |
| "grad_norm": 2.5227247039872567, |
| "learning_rate": 1.9437558349326745e-05, |
| "loss": 1.3656, |
| "step": 38400 |
| }, |
| { |
| "epoch": 1473.0188679245282, |
| "grad_norm": 2.0102914189329155, |
| "learning_rate": 1.9403136072055903e-05, |
| "loss": 1.3631, |
| "step": 38420 |
| }, |
| { |
| "epoch": 1473.7735849056603, |
| "grad_norm": 1.8549253610315775, |
| "learning_rate": 1.9368745122578427e-05, |
| "loss": 1.3835, |
| "step": 38440 |
| }, |
| { |
| "epoch": 1474.5283018867924, |
| "grad_norm": 1.7474837425802672, |
| "learning_rate": 1.9334385547541004e-05, |
| "loss": 1.3876, |
| "step": 38460 |
| }, |
| { |
| "epoch": 1475.2830188679245, |
| "grad_norm": 1.4221446206180013, |
| "learning_rate": 1.930005739354778e-05, |
| "loss": 1.3875, |
| "step": 38480 |
| }, |
| { |
| "epoch": 1476.0377358490566, |
| "grad_norm": 1.567704112230289, |
| "learning_rate": 1.926576070716028e-05, |
| "loss": 1.3787, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1476.7924528301887, |
| "grad_norm": 1.8092743011121888, |
| "learning_rate": 1.9231495534897356e-05, |
| "loss": 1.3746, |
| "step": 38520 |
| }, |
| { |
| "epoch": 1477.5471698113208, |
| "grad_norm": 1.9871542434365639, |
| "learning_rate": 1.919726192323512e-05, |
| "loss": 1.4062, |
| "step": 38540 |
| }, |
| { |
| "epoch": 1478.301886792453, |
| "grad_norm": 2.100192891688555, |
| "learning_rate": 1.916305991860687e-05, |
| "loss": 1.372, |
| "step": 38560 |
| }, |
| { |
| "epoch": 1479.0566037735848, |
| "grad_norm": 1.689968827696773, |
| "learning_rate": 1.912888956740302e-05, |
| "loss": 1.3994, |
| "step": 38580 |
| }, |
| { |
| "epoch": 1479.811320754717, |
| "grad_norm": 1.59619952456533, |
| "learning_rate": 1.9094750915971053e-05, |
| "loss": 1.3547, |
| "step": 38600 |
| }, |
| { |
| "epoch": 1480.566037735849, |
| "grad_norm": 3.206605320072948, |
| "learning_rate": 1.9060644010615473e-05, |
| "loss": 1.4052, |
| "step": 38620 |
| }, |
| { |
| "epoch": 1481.3207547169811, |
| "grad_norm": 1.5795369303879008, |
| "learning_rate": 1.9026568897597735e-05, |
| "loss": 1.3921, |
| "step": 38640 |
| }, |
| { |
| "epoch": 1482.0754716981132, |
| "grad_norm": 2.1910690965934467, |
| "learning_rate": 1.8992525623136132e-05, |
| "loss": 1.3563, |
| "step": 38660 |
| }, |
| { |
| "epoch": 1482.8301886792453, |
| "grad_norm": 1.5353645456337577, |
| "learning_rate": 1.8958514233405793e-05, |
| "loss": 1.4077, |
| "step": 38680 |
| }, |
| { |
| "epoch": 1483.5849056603774, |
| "grad_norm": 1.7836996022414107, |
| "learning_rate": 1.8924534774538593e-05, |
| "loss": 1.3824, |
| "step": 38700 |
| }, |
| { |
| "epoch": 1484.3396226415093, |
| "grad_norm": 1.6136317181444138, |
| "learning_rate": 1.8890587292623113e-05, |
| "loss": 1.3511, |
| "step": 38720 |
| }, |
| { |
| "epoch": 1485.0943396226414, |
| "grad_norm": 1.8211866581007339, |
| "learning_rate": 1.8856671833704565e-05, |
| "loss": 1.3725, |
| "step": 38740 |
| }, |
| { |
| "epoch": 1485.8490566037735, |
| "grad_norm": 1.5979573815344084, |
| "learning_rate": 1.8822788443784704e-05, |
| "loss": 1.3571, |
| "step": 38760 |
| }, |
| { |
| "epoch": 1486.6037735849056, |
| "grad_norm": 1.6365464316772047, |
| "learning_rate": 1.878893716882177e-05, |
| "loss": 1.3588, |
| "step": 38780 |
| }, |
| { |
| "epoch": 1487.3584905660377, |
| "grad_norm": 2.811912712405292, |
| "learning_rate": 1.8755118054730514e-05, |
| "loss": 1.3823, |
| "step": 38800 |
| }, |
| { |
| "epoch": 1488.1132075471698, |
| "grad_norm": 1.684830593576563, |
| "learning_rate": 1.8721331147381986e-05, |
| "loss": 1.3604, |
| "step": 38820 |
| }, |
| { |
| "epoch": 1488.867924528302, |
| "grad_norm": 1.5101192259883982, |
| "learning_rate": 1.868757649260362e-05, |
| "loss": 1.3712, |
| "step": 38840 |
| }, |
| { |
| "epoch": 1489.622641509434, |
| "grad_norm": 1.8516527403548584, |
| "learning_rate": 1.8653854136179047e-05, |
| "loss": 1.3576, |
| "step": 38860 |
| }, |
| { |
| "epoch": 1490.377358490566, |
| "grad_norm": 1.5630443819078437, |
| "learning_rate": 1.8620164123848113e-05, |
| "loss": 1.3729, |
| "step": 38880 |
| }, |
| { |
| "epoch": 1491.132075471698, |
| "grad_norm": 1.9558078371048477, |
| "learning_rate": 1.8586506501306792e-05, |
| "loss": 1.3466, |
| "step": 38900 |
| }, |
| { |
| "epoch": 1491.8867924528302, |
| "grad_norm": 1.6581722869425195, |
| "learning_rate": 1.8552881314207158e-05, |
| "loss": 1.3547, |
| "step": 38920 |
| }, |
| { |
| "epoch": 1492.6415094339623, |
| "grad_norm": 1.9162311420660751, |
| "learning_rate": 1.8519288608157236e-05, |
| "loss": 1.3995, |
| "step": 38940 |
| }, |
| { |
| "epoch": 1493.3962264150944, |
| "grad_norm": 2.8463480242853874, |
| "learning_rate": 1.8485728428721025e-05, |
| "loss": 1.3609, |
| "step": 38960 |
| }, |
| { |
| "epoch": 1494.1509433962265, |
| "grad_norm": 1.7832047879021928, |
| "learning_rate": 1.845220082141838e-05, |
| "loss": 1.3966, |
| "step": 38980 |
| }, |
| { |
| "epoch": 1494.9056603773586, |
| "grad_norm": 1.628697490406908, |
| "learning_rate": 1.841870583172502e-05, |
| "loss": 1.3577, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1495.6603773584907, |
| "grad_norm": 1.7499682485349517, |
| "learning_rate": 1.8385243505072403e-05, |
| "loss": 1.3634, |
| "step": 39020 |
| }, |
| { |
| "epoch": 1496.4150943396226, |
| "grad_norm": 2.369232208734949, |
| "learning_rate": 1.835181388684767e-05, |
| "loss": 1.3804, |
| "step": 39040 |
| }, |
| { |
| "epoch": 1497.1698113207547, |
| "grad_norm": 2.002186669217615, |
| "learning_rate": 1.8318417022393614e-05, |
| "loss": 1.3775, |
| "step": 39060 |
| }, |
| { |
| "epoch": 1497.9245283018868, |
| "grad_norm": 1.7745981101584183, |
| "learning_rate": 1.8285052957008572e-05, |
| "loss": 1.3678, |
| "step": 39080 |
| }, |
| { |
| "epoch": 1498.6792452830189, |
| "grad_norm": 2.3506034698380027, |
| "learning_rate": 1.825172173594644e-05, |
| "loss": 1.3819, |
| "step": 39100 |
| }, |
| { |
| "epoch": 1499.433962264151, |
| "grad_norm": 1.5587811175176152, |
| "learning_rate": 1.8218423404416543e-05, |
| "loss": 1.3623, |
| "step": 39120 |
| }, |
| { |
| "epoch": 1500.188679245283, |
| "grad_norm": 1.5407388891782507, |
| "learning_rate": 1.818515800758359e-05, |
| "loss": 1.3737, |
| "step": 39140 |
| }, |
| { |
| "epoch": 1500.9433962264152, |
| "grad_norm": 1.7105290658502008, |
| "learning_rate": 1.8151925590567624e-05, |
| "loss": 1.3416, |
| "step": 39160 |
| }, |
| { |
| "epoch": 1501.698113207547, |
| "grad_norm": 2.1160472894699973, |
| "learning_rate": 1.811872619844394e-05, |
| "loss": 1.3596, |
| "step": 39180 |
| }, |
| { |
| "epoch": 1502.4528301886792, |
| "grad_norm": 1.7134114803577327, |
| "learning_rate": 1.8085559876243068e-05, |
| "loss": 1.3486, |
| "step": 39200 |
| }, |
| { |
| "epoch": 1503.2075471698113, |
| "grad_norm": 1.5742520539626361, |
| "learning_rate": 1.805242666895068e-05, |
| "loss": 1.3737, |
| "step": 39220 |
| }, |
| { |
| "epoch": 1503.9622641509434, |
| "grad_norm": 1.6841300143409803, |
| "learning_rate": 1.8019326621507504e-05, |
| "loss": 1.3593, |
| "step": 39240 |
| }, |
| { |
| "epoch": 1504.7169811320755, |
| "grad_norm": 1.4678089134086005, |
| "learning_rate": 1.7986259778809304e-05, |
| "loss": 1.3332, |
| "step": 39260 |
| }, |
| { |
| "epoch": 1505.4716981132076, |
| "grad_norm": 1.5583137022685134, |
| "learning_rate": 1.7953226185706828e-05, |
| "loss": 1.3532, |
| "step": 39280 |
| }, |
| { |
| "epoch": 1506.2264150943397, |
| "grad_norm": 1.754522974870956, |
| "learning_rate": 1.7920225887005686e-05, |
| "loss": 1.3969, |
| "step": 39300 |
| }, |
| { |
| "epoch": 1506.9811320754718, |
| "grad_norm": 2.329959975485945, |
| "learning_rate": 1.788725892746638e-05, |
| "loss": 1.3693, |
| "step": 39320 |
| }, |
| { |
| "epoch": 1507.7358490566037, |
| "grad_norm": 1.9736365954487893, |
| "learning_rate": 1.7854325351804138e-05, |
| "loss": 1.3545, |
| "step": 39340 |
| }, |
| { |
| "epoch": 1508.4905660377358, |
| "grad_norm": 2.0873147005956274, |
| "learning_rate": 1.782142520468893e-05, |
| "loss": 1.357, |
| "step": 39360 |
| }, |
| { |
| "epoch": 1509.245283018868, |
| "grad_norm": 1.8565982391914584, |
| "learning_rate": 1.7788558530745406e-05, |
| "loss": 1.3574, |
| "step": 39380 |
| }, |
| { |
| "epoch": 1510.0, |
| "grad_norm": 1.3940404402455406, |
| "learning_rate": 1.7755725374552767e-05, |
| "loss": 1.3322, |
| "step": 39400 |
| }, |
| { |
| "epoch": 1510.754716981132, |
| "grad_norm": 1.6757375238937267, |
| "learning_rate": 1.772292578064481e-05, |
| "loss": 1.3562, |
| "step": 39420 |
| }, |
| { |
| "epoch": 1511.5094339622642, |
| "grad_norm": 2.021397727623104, |
| "learning_rate": 1.769015979350977e-05, |
| "loss": 1.3494, |
| "step": 39440 |
| }, |
| { |
| "epoch": 1512.2641509433963, |
| "grad_norm": 1.9073518931594837, |
| "learning_rate": 1.7657427457590277e-05, |
| "loss": 1.3469, |
| "step": 39460 |
| }, |
| { |
| "epoch": 1513.0188679245282, |
| "grad_norm": 1.5299281651503949, |
| "learning_rate": 1.7624728817283386e-05, |
| "loss": 1.3347, |
| "step": 39480 |
| }, |
| { |
| "epoch": 1513.7735849056603, |
| "grad_norm": 1.6171286973533487, |
| "learning_rate": 1.7592063916940385e-05, |
| "loss": 1.3781, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1514.5283018867924, |
| "grad_norm": 2.3151167774892283, |
| "learning_rate": 1.7559432800866844e-05, |
| "loss": 1.3389, |
| "step": 39520 |
| }, |
| { |
| "epoch": 1515.2830188679245, |
| "grad_norm": 1.8404517143557557, |
| "learning_rate": 1.752683551332248e-05, |
| "loss": 1.3809, |
| "step": 39540 |
| }, |
| { |
| "epoch": 1516.0377358490566, |
| "grad_norm": 1.80165740372062, |
| "learning_rate": 1.749427209852112e-05, |
| "loss": 1.3647, |
| "step": 39560 |
| }, |
| { |
| "epoch": 1516.7924528301887, |
| "grad_norm": 1.5503005965319303, |
| "learning_rate": 1.7461742600630684e-05, |
| "loss": 1.3553, |
| "step": 39580 |
| }, |
| { |
| "epoch": 1517.5471698113208, |
| "grad_norm": 1.7389286642537964, |
| "learning_rate": 1.7429247063773047e-05, |
| "loss": 1.3566, |
| "step": 39600 |
| }, |
| { |
| "epoch": 1518.301886792453, |
| "grad_norm": 1.5514338805704833, |
| "learning_rate": 1.7396785532024062e-05, |
| "loss": 1.3771, |
| "step": 39620 |
| }, |
| { |
| "epoch": 1519.0566037735848, |
| "grad_norm": 1.738553891820026, |
| "learning_rate": 1.7364358049413427e-05, |
| "loss": 1.3608, |
| "step": 39640 |
| }, |
| { |
| "epoch": 1519.811320754717, |
| "grad_norm": 2.2590021667446476, |
| "learning_rate": 1.7331964659924647e-05, |
| "loss": 1.3594, |
| "step": 39660 |
| }, |
| { |
| "epoch": 1520.566037735849, |
| "grad_norm": 1.8008534873454645, |
| "learning_rate": 1.729960540749503e-05, |
| "loss": 1.3446, |
| "step": 39680 |
| }, |
| { |
| "epoch": 1521.3207547169811, |
| "grad_norm": 1.9823359457338208, |
| "learning_rate": 1.7267280336015543e-05, |
| "loss": 1.3604, |
| "step": 39700 |
| }, |
| { |
| "epoch": 1522.0754716981132, |
| "grad_norm": 1.6630862297023916, |
| "learning_rate": 1.723498948933081e-05, |
| "loss": 1.3831, |
| "step": 39720 |
| }, |
| { |
| "epoch": 1522.8301886792453, |
| "grad_norm": 1.9271729195919085, |
| "learning_rate": 1.720273291123901e-05, |
| "loss": 1.3571, |
| "step": 39740 |
| }, |
| { |
| "epoch": 1523.5849056603774, |
| "grad_norm": 1.6944904437475812, |
| "learning_rate": 1.7170510645491884e-05, |
| "loss": 1.3845, |
| "step": 39760 |
| }, |
| { |
| "epoch": 1524.3396226415093, |
| "grad_norm": 2.0111059446030164, |
| "learning_rate": 1.7138322735794582e-05, |
| "loss": 1.3464, |
| "step": 39780 |
| }, |
| { |
| "epoch": 1525.0943396226414, |
| "grad_norm": 1.6636863494806655, |
| "learning_rate": 1.7106169225805703e-05, |
| "loss": 1.3472, |
| "step": 39800 |
| }, |
| { |
| "epoch": 1525.8490566037735, |
| "grad_norm": 1.654778862655826, |
| "learning_rate": 1.7074050159137155e-05, |
| "loss": 1.3517, |
| "step": 39820 |
| }, |
| { |
| "epoch": 1526.6037735849056, |
| "grad_norm": 1.9255620043148591, |
| "learning_rate": 1.7041965579354115e-05, |
| "loss": 1.359, |
| "step": 39840 |
| }, |
| { |
| "epoch": 1527.3584905660377, |
| "grad_norm": 1.7612527068488755, |
| "learning_rate": 1.7009915529975046e-05, |
| "loss": 1.3535, |
| "step": 39860 |
| }, |
| { |
| "epoch": 1528.1132075471698, |
| "grad_norm": 1.6440495946289901, |
| "learning_rate": 1.69779000544715e-05, |
| "loss": 1.3275, |
| "step": 39880 |
| }, |
| { |
| "epoch": 1528.867924528302, |
| "grad_norm": 1.8436152132956103, |
| "learning_rate": 1.6945919196268195e-05, |
| "loss": 1.3269, |
| "step": 39900 |
| }, |
| { |
| "epoch": 1529.622641509434, |
| "grad_norm": 1.5249165119761414, |
| "learning_rate": 1.6913972998742855e-05, |
| "loss": 1.3528, |
| "step": 39920 |
| }, |
| { |
| "epoch": 1530.377358490566, |
| "grad_norm": 1.7116936271233165, |
| "learning_rate": 1.6882061505226197e-05, |
| "loss": 1.3351, |
| "step": 39940 |
| }, |
| { |
| "epoch": 1531.132075471698, |
| "grad_norm": 1.9127818443391411, |
| "learning_rate": 1.68501847590019e-05, |
| "loss": 1.3649, |
| "step": 39960 |
| }, |
| { |
| "epoch": 1531.8867924528302, |
| "grad_norm": 1.7968703663627887, |
| "learning_rate": 1.681834280330646e-05, |
| "loss": 1.3664, |
| "step": 39980 |
| }, |
| { |
| "epoch": 1532.6415094339623, |
| "grad_norm": 2.01086719476703, |
| "learning_rate": 1.6786535681329242e-05, |
| "loss": 1.3354, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1533.3962264150944, |
| "grad_norm": 1.8971146877595166, |
| "learning_rate": 1.6754763436212318e-05, |
| "loss": 1.3459, |
| "step": 40020 |
| }, |
| { |
| "epoch": 1534.1509433962265, |
| "grad_norm": 1.5538558777058122, |
| "learning_rate": 1.6723026111050465e-05, |
| "loss": 1.348, |
| "step": 40040 |
| }, |
| { |
| "epoch": 1534.9056603773586, |
| "grad_norm": 1.6899684943437072, |
| "learning_rate": 1.6691323748891116e-05, |
| "loss": 1.3219, |
| "step": 40060 |
| }, |
| { |
| "epoch": 1535.6603773584907, |
| "grad_norm": 1.5696983044378243, |
| "learning_rate": 1.6659656392734248e-05, |
| "loss": 1.3523, |
| "step": 40080 |
| }, |
| { |
| "epoch": 1536.4150943396226, |
| "grad_norm": 2.008558955924781, |
| "learning_rate": 1.6628024085532394e-05, |
| "loss": 1.3507, |
| "step": 40100 |
| }, |
| { |
| "epoch": 1537.1698113207547, |
| "grad_norm": 1.6814099912171956, |
| "learning_rate": 1.6596426870190517e-05, |
| "loss": 1.3271, |
| "step": 40120 |
| }, |
| { |
| "epoch": 1537.9245283018868, |
| "grad_norm": 1.6392939792056798, |
| "learning_rate": 1.6564864789566017e-05, |
| "loss": 1.3628, |
| "step": 40140 |
| }, |
| { |
| "epoch": 1538.6792452830189, |
| "grad_norm": 1.6937327457602671, |
| "learning_rate": 1.6533337886468593e-05, |
| "loss": 1.3457, |
| "step": 40160 |
| }, |
| { |
| "epoch": 1539.433962264151, |
| "grad_norm": 2.0312461746808674, |
| "learning_rate": 1.650184620366025e-05, |
| "loss": 1.345, |
| "step": 40180 |
| }, |
| { |
| "epoch": 1540.188679245283, |
| "grad_norm": 1.6324403361347462, |
| "learning_rate": 1.647038978385525e-05, |
| "loss": 1.3614, |
| "step": 40200 |
| }, |
| { |
| "epoch": 1540.9433962264152, |
| "grad_norm": 1.8937794347785448, |
| "learning_rate": 1.643896866971998e-05, |
| "loss": 1.3485, |
| "step": 40220 |
| }, |
| { |
| "epoch": 1541.698113207547, |
| "grad_norm": 1.66458657626364, |
| "learning_rate": 1.6407582903872977e-05, |
| "loss": 1.3201, |
| "step": 40240 |
| }, |
| { |
| "epoch": 1542.4528301886792, |
| "grad_norm": 1.637256903291043, |
| "learning_rate": 1.637623252888481e-05, |
| "loss": 1.3287, |
| "step": 40260 |
| }, |
| { |
| "epoch": 1543.2075471698113, |
| "grad_norm": 1.771255607485422, |
| "learning_rate": 1.634491758727804e-05, |
| "loss": 1.3386, |
| "step": 40280 |
| }, |
| { |
| "epoch": 1543.9622641509434, |
| "grad_norm": 2.294826209947056, |
| "learning_rate": 1.6313638121527195e-05, |
| "loss": 1.3443, |
| "step": 40300 |
| }, |
| { |
| "epoch": 1544.7169811320755, |
| "grad_norm": 1.5369973618999444, |
| "learning_rate": 1.6282394174058652e-05, |
| "loss": 1.3199, |
| "step": 40320 |
| }, |
| { |
| "epoch": 1545.4716981132076, |
| "grad_norm": 1.7805574251016163, |
| "learning_rate": 1.6251185787250646e-05, |
| "loss": 1.3427, |
| "step": 40340 |
| }, |
| { |
| "epoch": 1546.2264150943397, |
| "grad_norm": 1.7055546669575088, |
| "learning_rate": 1.6220013003433163e-05, |
| "loss": 1.3595, |
| "step": 40360 |
| }, |
| { |
| "epoch": 1546.9811320754718, |
| "grad_norm": 1.6493151345521173, |
| "learning_rate": 1.618887586488787e-05, |
| "loss": 1.3417, |
| "step": 40380 |
| }, |
| { |
| "epoch": 1547.7358490566037, |
| "grad_norm": 1.7099299752279526, |
| "learning_rate": 1.6157774413848147e-05, |
| "loss": 1.3286, |
| "step": 40400 |
| }, |
| { |
| "epoch": 1548.4905660377358, |
| "grad_norm": 1.6461054638879455, |
| "learning_rate": 1.61267086924989e-05, |
| "loss": 1.3651, |
| "step": 40420 |
| }, |
| { |
| "epoch": 1549.245283018868, |
| "grad_norm": 2.239209937375333, |
| "learning_rate": 1.6095678742976643e-05, |
| "loss": 1.3402, |
| "step": 40440 |
| }, |
| { |
| "epoch": 1550.0, |
| "grad_norm": 1.9293560078530108, |
| "learning_rate": 1.6064684607369317e-05, |
| "loss": 1.3566, |
| "step": 40460 |
| }, |
| { |
| "epoch": 1550.754716981132, |
| "grad_norm": 1.7850904902946119, |
| "learning_rate": 1.603372632771629e-05, |
| "loss": 1.3522, |
| "step": 40480 |
| }, |
| { |
| "epoch": 1551.5094339622642, |
| "grad_norm": 1.8694667431709797, |
| "learning_rate": 1.6002803946008334e-05, |
| "loss": 1.3254, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1552.2641509433963, |
| "grad_norm": 1.8970095488709016, |
| "learning_rate": 1.5971917504187483e-05, |
| "loss": 1.3456, |
| "step": 40520 |
| }, |
| { |
| "epoch": 1553.0188679245282, |
| "grad_norm": 1.473985828394077, |
| "learning_rate": 1.5941067044147068e-05, |
| "loss": 1.3425, |
| "step": 40540 |
| }, |
| { |
| "epoch": 1553.7735849056603, |
| "grad_norm": 2.4173810451888436, |
| "learning_rate": 1.591025260773159e-05, |
| "loss": 1.3616, |
| "step": 40560 |
| }, |
| { |
| "epoch": 1554.5283018867924, |
| "grad_norm": 4.978545098723231, |
| "learning_rate": 1.587947423673667e-05, |
| "loss": 1.3302, |
| "step": 40580 |
| }, |
| { |
| "epoch": 1555.2830188679245, |
| "grad_norm": 1.8210531663627934, |
| "learning_rate": 1.5848731972909058e-05, |
| "loss": 1.3208, |
| "step": 40600 |
| }, |
| { |
| "epoch": 1556.0377358490566, |
| "grad_norm": 1.7214332519076236, |
| "learning_rate": 1.5818025857946504e-05, |
| "loss": 1.3429, |
| "step": 40620 |
| }, |
| { |
| "epoch": 1556.7924528301887, |
| "grad_norm": 1.7715531344419837, |
| "learning_rate": 1.5787355933497722e-05, |
| "loss": 1.3236, |
| "step": 40640 |
| }, |
| { |
| "epoch": 1557.5471698113208, |
| "grad_norm": 1.6774415891925254, |
| "learning_rate": 1.5756722241162336e-05, |
| "loss": 1.3038, |
| "step": 40660 |
| }, |
| { |
| "epoch": 1558.301886792453, |
| "grad_norm": 1.7378768400910978, |
| "learning_rate": 1.5726124822490856e-05, |
| "loss": 1.3393, |
| "step": 40680 |
| }, |
| { |
| "epoch": 1559.0566037735848, |
| "grad_norm": 1.5050468895919773, |
| "learning_rate": 1.569556371898455e-05, |
| "loss": 1.3169, |
| "step": 40700 |
| }, |
| { |
| "epoch": 1559.811320754717, |
| "grad_norm": 1.7494465787076923, |
| "learning_rate": 1.5665038972095462e-05, |
| "loss": 1.3219, |
| "step": 40720 |
| }, |
| { |
| "epoch": 1560.566037735849, |
| "grad_norm": 1.942070137365104, |
| "learning_rate": 1.563455062322631e-05, |
| "loss": 1.3331, |
| "step": 40740 |
| }, |
| { |
| "epoch": 1561.3207547169811, |
| "grad_norm": 1.4760834184650184, |
| "learning_rate": 1.560409871373043e-05, |
| "loss": 1.3371, |
| "step": 40760 |
| }, |
| { |
| "epoch": 1562.0754716981132, |
| "grad_norm": 1.794169465456889, |
| "learning_rate": 1.5573683284911766e-05, |
| "loss": 1.361, |
| "step": 40780 |
| }, |
| { |
| "epoch": 1562.8301886792453, |
| "grad_norm": 1.5717564295595021, |
| "learning_rate": 1.5543304378024745e-05, |
| "loss": 1.3198, |
| "step": 40800 |
| }, |
| { |
| "epoch": 1563.5849056603774, |
| "grad_norm": 1.679663629392091, |
| "learning_rate": 1.5512962034274292e-05, |
| "loss": 1.3225, |
| "step": 40820 |
| }, |
| { |
| "epoch": 1564.3396226415093, |
| "grad_norm": 1.5963236435216681, |
| "learning_rate": 1.5482656294815706e-05, |
| "loss": 1.3475, |
| "step": 40840 |
| }, |
| { |
| "epoch": 1565.0943396226414, |
| "grad_norm": 1.6282234240786269, |
| "learning_rate": 1.5452387200754648e-05, |
| "loss": 1.341, |
| "step": 40860 |
| }, |
| { |
| "epoch": 1565.8490566037735, |
| "grad_norm": 1.8356401444891661, |
| "learning_rate": 1.542215479314709e-05, |
| "loss": 1.3093, |
| "step": 40880 |
| }, |
| { |
| "epoch": 1566.6037735849056, |
| "grad_norm": 1.802086287293627, |
| "learning_rate": 1.5391959112999222e-05, |
| "loss": 1.3234, |
| "step": 40900 |
| }, |
| { |
| "epoch": 1567.3584905660377, |
| "grad_norm": 1.596543322520551, |
| "learning_rate": 1.536180020126744e-05, |
| "loss": 1.3207, |
| "step": 40920 |
| }, |
| { |
| "epoch": 1568.1132075471698, |
| "grad_norm": 1.5925040590351016, |
| "learning_rate": 1.5331678098858253e-05, |
| "loss": 1.3434, |
| "step": 40940 |
| }, |
| { |
| "epoch": 1568.867924528302, |
| "grad_norm": 1.7805635796964523, |
| "learning_rate": 1.5301592846628236e-05, |
| "loss": 1.3189, |
| "step": 40960 |
| }, |
| { |
| "epoch": 1569.622641509434, |
| "grad_norm": 2.270882122194477, |
| "learning_rate": 1.5271544485384005e-05, |
| "loss": 1.3331, |
| "step": 40980 |
| }, |
| { |
| "epoch": 1570.377358490566, |
| "grad_norm": 1.5460374945916004, |
| "learning_rate": 1.524153305588211e-05, |
| "loss": 1.3307, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1571.132075471698, |
| "grad_norm": 1.5240728933202146, |
| "learning_rate": 1.5211558598829046e-05, |
| "loss": 1.3261, |
| "step": 41020 |
| }, |
| { |
| "epoch": 1571.8867924528302, |
| "grad_norm": 1.6551356652204947, |
| "learning_rate": 1.518162115488113e-05, |
| "loss": 1.3444, |
| "step": 41040 |
| }, |
| { |
| "epoch": 1572.6415094339623, |
| "grad_norm": 2.3662389207443897, |
| "learning_rate": 1.5151720764644462e-05, |
| "loss": 1.3078, |
| "step": 41060 |
| }, |
| { |
| "epoch": 1573.3962264150944, |
| "grad_norm": 2.0201453815678336, |
| "learning_rate": 1.5121857468674923e-05, |
| "loss": 1.2931, |
| "step": 41080 |
| }, |
| { |
| "epoch": 1574.1509433962265, |
| "grad_norm": 2.1833616270471428, |
| "learning_rate": 1.509203130747807e-05, |
| "loss": 1.3113, |
| "step": 41100 |
| }, |
| { |
| "epoch": 1574.9056603773586, |
| "grad_norm": 1.6606152578025972, |
| "learning_rate": 1.506224232150908e-05, |
| "loss": 1.3488, |
| "step": 41120 |
| }, |
| { |
| "epoch": 1575.6603773584907, |
| "grad_norm": 2.2621888669728776, |
| "learning_rate": 1.5032490551172706e-05, |
| "loss": 1.293, |
| "step": 41140 |
| }, |
| { |
| "epoch": 1576.4150943396226, |
| "grad_norm": 1.8118753564672168, |
| "learning_rate": 1.5002776036823215e-05, |
| "loss": 1.3288, |
| "step": 41160 |
| }, |
| { |
| "epoch": 1577.1698113207547, |
| "grad_norm": 1.4675478833771125, |
| "learning_rate": 1.4973098818764368e-05, |
| "loss": 1.3181, |
| "step": 41180 |
| }, |
| { |
| "epoch": 1577.9245283018868, |
| "grad_norm": 1.5426718546178322, |
| "learning_rate": 1.4943458937249337e-05, |
| "loss": 1.3041, |
| "step": 41200 |
| }, |
| { |
| "epoch": 1578.6792452830189, |
| "grad_norm": 1.7915801444691424, |
| "learning_rate": 1.4913856432480624e-05, |
| "loss": 1.3244, |
| "step": 41220 |
| }, |
| { |
| "epoch": 1579.433962264151, |
| "grad_norm": 1.6284356095147676, |
| "learning_rate": 1.4884291344610055e-05, |
| "loss": 1.3623, |
| "step": 41240 |
| }, |
| { |
| "epoch": 1580.188679245283, |
| "grad_norm": 2.018808164067539, |
| "learning_rate": 1.4854763713738692e-05, |
| "loss": 1.3265, |
| "step": 41260 |
| }, |
| { |
| "epoch": 1580.9433962264152, |
| "grad_norm": 1.9982741446146173, |
| "learning_rate": 1.48252735799168e-05, |
| "loss": 1.3174, |
| "step": 41280 |
| }, |
| { |
| "epoch": 1581.698113207547, |
| "grad_norm": 1.6089408891188777, |
| "learning_rate": 1.4795820983143804e-05, |
| "loss": 1.3054, |
| "step": 41300 |
| }, |
| { |
| "epoch": 1582.4528301886792, |
| "grad_norm": 1.9322291254142352, |
| "learning_rate": 1.4766405963368183e-05, |
| "loss": 1.3288, |
| "step": 41320 |
| }, |
| { |
| "epoch": 1583.2075471698113, |
| "grad_norm": 2.1500428010731105, |
| "learning_rate": 1.4737028560487459e-05, |
| "loss": 1.3251, |
| "step": 41340 |
| }, |
| { |
| "epoch": 1583.9622641509434, |
| "grad_norm": 1.6754742169090076, |
| "learning_rate": 1.470768881434812e-05, |
| "loss": 1.3111, |
| "step": 41360 |
| }, |
| { |
| "epoch": 1584.7169811320755, |
| "grad_norm": 2.0456793462392864, |
| "learning_rate": 1.4678386764745604e-05, |
| "loss": 1.2852, |
| "step": 41380 |
| }, |
| { |
| "epoch": 1585.4716981132076, |
| "grad_norm": 1.6265186141557229, |
| "learning_rate": 1.4649122451424216e-05, |
| "loss": 1.3246, |
| "step": 41400 |
| }, |
| { |
| "epoch": 1586.2264150943397, |
| "grad_norm": 1.7034501168928484, |
| "learning_rate": 1.4619895914077052e-05, |
| "loss": 1.3061, |
| "step": 41420 |
| }, |
| { |
| "epoch": 1586.9811320754718, |
| "grad_norm": 2.5172683919046834, |
| "learning_rate": 1.459070719234599e-05, |
| "loss": 1.3287, |
| "step": 41440 |
| }, |
| { |
| "epoch": 1587.7358490566037, |
| "grad_norm": 1.681004851075849, |
| "learning_rate": 1.4561556325821593e-05, |
| "loss": 1.307, |
| "step": 41460 |
| }, |
| { |
| "epoch": 1588.4905660377358, |
| "grad_norm": 1.9545864689840218, |
| "learning_rate": 1.4532443354043108e-05, |
| "loss": 1.3015, |
| "step": 41480 |
| }, |
| { |
| "epoch": 1589.245283018868, |
| "grad_norm": 2.2238938165489186, |
| "learning_rate": 1.4503368316498385e-05, |
| "loss": 1.3323, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1590.0, |
| "grad_norm": 2.0846781514374704, |
| "learning_rate": 1.4474331252623795e-05, |
| "loss": 1.3273, |
| "step": 41520 |
| }, |
| { |
| "epoch": 1590.754716981132, |
| "grad_norm": 1.677709576953602, |
| "learning_rate": 1.44453322018042e-05, |
| "loss": 1.3035, |
| "step": 41540 |
| }, |
| { |
| "epoch": 1591.5094339622642, |
| "grad_norm": 1.8283093943528037, |
| "learning_rate": 1.4416371203372931e-05, |
| "loss": 1.3261, |
| "step": 41560 |
| }, |
| { |
| "epoch": 1592.2641509433963, |
| "grad_norm": 1.5077647603013566, |
| "learning_rate": 1.4387448296611699e-05, |
| "loss": 1.3039, |
| "step": 41580 |
| }, |
| { |
| "epoch": 1593.0188679245282, |
| "grad_norm": 1.7410910545917078, |
| "learning_rate": 1.4358563520750539e-05, |
| "loss": 1.3073, |
| "step": 41600 |
| }, |
| { |
| "epoch": 1593.7735849056603, |
| "grad_norm": 1.7856680678107866, |
| "learning_rate": 1.4329716914967761e-05, |
| "loss": 1.3128, |
| "step": 41620 |
| }, |
| { |
| "epoch": 1594.5283018867924, |
| "grad_norm": 2.291591913470402, |
| "learning_rate": 1.4300908518389904e-05, |
| "loss": 1.3067, |
| "step": 41640 |
| }, |
| { |
| "epoch": 1595.2830188679245, |
| "grad_norm": 1.785529683540441, |
| "learning_rate": 1.42721383700917e-05, |
| "loss": 1.3338, |
| "step": 41660 |
| }, |
| { |
| "epoch": 1596.0377358490566, |
| "grad_norm": 1.5806566985978232, |
| "learning_rate": 1.4243406509096e-05, |
| "loss": 1.3212, |
| "step": 41680 |
| }, |
| { |
| "epoch": 1596.7924528301887, |
| "grad_norm": 1.6779162023356309, |
| "learning_rate": 1.4214712974373703e-05, |
| "loss": 1.304, |
| "step": 41700 |
| }, |
| { |
| "epoch": 1597.5471698113208, |
| "grad_norm": 1.7562533688231816, |
| "learning_rate": 1.418605780484373e-05, |
| "loss": 1.2875, |
| "step": 41720 |
| }, |
| { |
| "epoch": 1598.301886792453, |
| "grad_norm": 1.7567734296175508, |
| "learning_rate": 1.4157441039372966e-05, |
| "loss": 1.3145, |
| "step": 41740 |
| }, |
| { |
| "epoch": 1599.0566037735848, |
| "grad_norm": 1.722702892703264, |
| "learning_rate": 1.4128862716776218e-05, |
| "loss": 1.3019, |
| "step": 41760 |
| }, |
| { |
| "epoch": 1599.811320754717, |
| "grad_norm": 1.9956191020255551, |
| "learning_rate": 1.4100322875816148e-05, |
| "loss": 1.3114, |
| "step": 41780 |
| }, |
| { |
| "epoch": 1600.566037735849, |
| "grad_norm": 1.6531331340969986, |
| "learning_rate": 1.4071821555203213e-05, |
| "loss": 1.2966, |
| "step": 41800 |
| }, |
| { |
| "epoch": 1601.3207547169811, |
| "grad_norm": 2.0591826060061376, |
| "learning_rate": 1.4043358793595621e-05, |
| "loss": 1.3015, |
| "step": 41820 |
| }, |
| { |
| "epoch": 1602.0754716981132, |
| "grad_norm": 1.6984765409093496, |
| "learning_rate": 1.4014934629599273e-05, |
| "loss": 1.2995, |
| "step": 41840 |
| }, |
| { |
| "epoch": 1602.8301886792453, |
| "grad_norm": 2.2995147164644165, |
| "learning_rate": 1.3986549101767747e-05, |
| "loss": 1.3184, |
| "step": 41860 |
| }, |
| { |
| "epoch": 1603.5849056603774, |
| "grad_norm": 1.6563391450177631, |
| "learning_rate": 1.39582022486022e-05, |
| "loss": 1.3379, |
| "step": 41880 |
| }, |
| { |
| "epoch": 1604.3396226415093, |
| "grad_norm": 1.8150129139182571, |
| "learning_rate": 1.3929894108551327e-05, |
| "loss": 1.2831, |
| "step": 41900 |
| }, |
| { |
| "epoch": 1605.0943396226414, |
| "grad_norm": 1.6835080581169954, |
| "learning_rate": 1.390162472001131e-05, |
| "loss": 1.3275, |
| "step": 41920 |
| }, |
| { |
| "epoch": 1605.8490566037735, |
| "grad_norm": 1.6294691180875247, |
| "learning_rate": 1.3873394121325766e-05, |
| "loss": 1.2913, |
| "step": 41940 |
| }, |
| { |
| "epoch": 1606.6037735849056, |
| "grad_norm": 1.7180885898850626, |
| "learning_rate": 1.3845202350785745e-05, |
| "loss": 1.2965, |
| "step": 41960 |
| }, |
| { |
| "epoch": 1607.3584905660377, |
| "grad_norm": 1.5583008263797746, |
| "learning_rate": 1.3817049446629576e-05, |
| "loss": 1.2832, |
| "step": 41980 |
| }, |
| { |
| "epoch": 1608.1132075471698, |
| "grad_norm": 1.8079371040717394, |
| "learning_rate": 1.3788935447042895e-05, |
| "loss": 1.2954, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1608.867924528302, |
| "grad_norm": 1.8337600854116936, |
| "learning_rate": 1.3760860390158554e-05, |
| "loss": 1.309, |
| "step": 42020 |
| }, |
| { |
| "epoch": 1609.622641509434, |
| "grad_norm": 1.6419903042017507, |
| "learning_rate": 1.3732824314056604e-05, |
| "loss": 1.3068, |
| "step": 42040 |
| }, |
| { |
| "epoch": 1610.377358490566, |
| "grad_norm": 1.948536117708095, |
| "learning_rate": 1.370482725676423e-05, |
| "loss": 1.3399, |
| "step": 42060 |
| }, |
| { |
| "epoch": 1611.132075471698, |
| "grad_norm": 1.9254279275726736, |
| "learning_rate": 1.3676869256255669e-05, |
| "loss": 1.3151, |
| "step": 42080 |
| }, |
| { |
| "epoch": 1611.8867924528302, |
| "grad_norm": 1.802137918813917, |
| "learning_rate": 1.3648950350452192e-05, |
| "loss": 1.2844, |
| "step": 42100 |
| }, |
| { |
| "epoch": 1612.6415094339623, |
| "grad_norm": 3.3181585562433358, |
| "learning_rate": 1.3621070577222036e-05, |
| "loss": 1.3125, |
| "step": 42120 |
| }, |
| { |
| "epoch": 1613.3962264150944, |
| "grad_norm": 1.6631495329844195, |
| "learning_rate": 1.3593229974380375e-05, |
| "loss": 1.2908, |
| "step": 42140 |
| }, |
| { |
| "epoch": 1614.1509433962265, |
| "grad_norm": 1.8984095064618975, |
| "learning_rate": 1.3565428579689256e-05, |
| "loss": 1.2937, |
| "step": 42160 |
| }, |
| { |
| "epoch": 1614.9056603773586, |
| "grad_norm": 1.8422792147059388, |
| "learning_rate": 1.3537666430857535e-05, |
| "loss": 1.284, |
| "step": 42180 |
| }, |
| { |
| "epoch": 1615.6603773584907, |
| "grad_norm": 1.6231527761557085, |
| "learning_rate": 1.3509943565540833e-05, |
| "loss": 1.297, |
| "step": 42200 |
| }, |
| { |
| "epoch": 1616.4150943396226, |
| "grad_norm": 1.9453735113539294, |
| "learning_rate": 1.3482260021341475e-05, |
| "loss": 1.2902, |
| "step": 42220 |
| }, |
| { |
| "epoch": 1617.1698113207547, |
| "grad_norm": 1.7641921684369601, |
| "learning_rate": 1.345461583580849e-05, |
| "loss": 1.282, |
| "step": 42240 |
| }, |
| { |
| "epoch": 1617.9245283018868, |
| "grad_norm": 2.0387885601326228, |
| "learning_rate": 1.3427011046437513e-05, |
| "loss": 1.2898, |
| "step": 42260 |
| }, |
| { |
| "epoch": 1618.6792452830189, |
| "grad_norm": 1.777342395003277, |
| "learning_rate": 1.3399445690670713e-05, |
| "loss": 1.3168, |
| "step": 42280 |
| }, |
| { |
| "epoch": 1619.433962264151, |
| "grad_norm": 1.6370450829023924, |
| "learning_rate": 1.33719198058968e-05, |
| "loss": 1.3075, |
| "step": 42300 |
| }, |
| { |
| "epoch": 1620.188679245283, |
| "grad_norm": 1.779662328060948, |
| "learning_rate": 1.334443342945093e-05, |
| "loss": 1.2919, |
| "step": 42320 |
| }, |
| { |
| "epoch": 1620.9433962264152, |
| "grad_norm": 1.7581747425039895, |
| "learning_rate": 1.3316986598614685e-05, |
| "loss": 1.3074, |
| "step": 42340 |
| }, |
| { |
| "epoch": 1621.698113207547, |
| "grad_norm": 1.8150800521846453, |
| "learning_rate": 1.3289579350616015e-05, |
| "loss": 1.2807, |
| "step": 42360 |
| }, |
| { |
| "epoch": 1622.4528301886792, |
| "grad_norm": 1.7354020247655273, |
| "learning_rate": 1.3262211722629166e-05, |
| "loss": 1.2826, |
| "step": 42380 |
| }, |
| { |
| "epoch": 1623.2075471698113, |
| "grad_norm": 1.468102174253192, |
| "learning_rate": 1.3234883751774644e-05, |
| "loss": 1.288, |
| "step": 42400 |
| }, |
| { |
| "epoch": 1623.9622641509434, |
| "grad_norm": 1.657454196093378, |
| "learning_rate": 1.3207595475119152e-05, |
| "loss": 1.3106, |
| "step": 42420 |
| }, |
| { |
| "epoch": 1624.7169811320755, |
| "grad_norm": 1.6557640300068772, |
| "learning_rate": 1.3180346929675611e-05, |
| "loss": 1.2731, |
| "step": 42440 |
| }, |
| { |
| "epoch": 1625.4716981132076, |
| "grad_norm": 1.7735353149818989, |
| "learning_rate": 1.3153138152402996e-05, |
| "loss": 1.2763, |
| "step": 42460 |
| }, |
| { |
| "epoch": 1626.2264150943397, |
| "grad_norm": 1.9382385391993158, |
| "learning_rate": 1.3125969180206349e-05, |
| "loss": 1.319, |
| "step": 42480 |
| }, |
| { |
| "epoch": 1626.9811320754718, |
| "grad_norm": 2.136894507958651, |
| "learning_rate": 1.3098840049936733e-05, |
| "loss": 1.2805, |
| "step": 42500 |
| }, |
| { |
| "epoch": 1627.7358490566037, |
| "grad_norm": 2.1483143587792366, |
| "learning_rate": 1.3071750798391171e-05, |
| "loss": 1.2853, |
| "step": 42520 |
| }, |
| { |
| "epoch": 1628.4905660377358, |
| "grad_norm": 1.710874022513826, |
| "learning_rate": 1.304470146231261e-05, |
| "loss": 1.2806, |
| "step": 42540 |
| }, |
| { |
| "epoch": 1629.245283018868, |
| "grad_norm": 1.8951457389499449, |
| "learning_rate": 1.3017692078389823e-05, |
| "loss": 1.2932, |
| "step": 42560 |
| }, |
| { |
| "epoch": 1630.0, |
| "grad_norm": 1.5984669511746095, |
| "learning_rate": 1.299072268325742e-05, |
| "loss": 1.2931, |
| "step": 42580 |
| }, |
| { |
| "epoch": 1630.754716981132, |
| "grad_norm": 1.8192427048671964, |
| "learning_rate": 1.2963793313495747e-05, |
| "loss": 1.2736, |
| "step": 42600 |
| }, |
| { |
| "epoch": 1631.5094339622642, |
| "grad_norm": 1.7556743408681688, |
| "learning_rate": 1.2936904005630886e-05, |
| "loss": 1.2844, |
| "step": 42620 |
| }, |
| { |
| "epoch": 1632.2641509433963, |
| "grad_norm": 1.9584621506348525, |
| "learning_rate": 1.2910054796134588e-05, |
| "loss": 1.2903, |
| "step": 42640 |
| }, |
| { |
| "epoch": 1633.0188679245282, |
| "grad_norm": 1.818910778704821, |
| "learning_rate": 1.2883245721424182e-05, |
| "loss": 1.2982, |
| "step": 42660 |
| }, |
| { |
| "epoch": 1633.7735849056603, |
| "grad_norm": 1.6564371207191282, |
| "learning_rate": 1.2856476817862578e-05, |
| "loss": 1.2719, |
| "step": 42680 |
| }, |
| { |
| "epoch": 1634.5283018867924, |
| "grad_norm": 2.176105338983291, |
| "learning_rate": 1.2829748121758186e-05, |
| "loss": 1.2703, |
| "step": 42700 |
| }, |
| { |
| "epoch": 1635.2830188679245, |
| "grad_norm": 2.053268530527867, |
| "learning_rate": 1.280305966936491e-05, |
| "loss": 1.2745, |
| "step": 42720 |
| }, |
| { |
| "epoch": 1636.0377358490566, |
| "grad_norm": 1.9471448193829715, |
| "learning_rate": 1.2776411496882053e-05, |
| "loss": 1.2924, |
| "step": 42740 |
| }, |
| { |
| "epoch": 1636.7924528301887, |
| "grad_norm": 1.7439666627316233, |
| "learning_rate": 1.2749803640454274e-05, |
| "loss": 1.2883, |
| "step": 42760 |
| }, |
| { |
| "epoch": 1637.5471698113208, |
| "grad_norm": 1.7616849064246298, |
| "learning_rate": 1.2723236136171557e-05, |
| "loss": 1.2901, |
| "step": 42780 |
| }, |
| { |
| "epoch": 1638.301886792453, |
| "grad_norm": 1.9785596426579124, |
| "learning_rate": 1.2696709020069137e-05, |
| "loss": 1.2806, |
| "step": 42800 |
| }, |
| { |
| "epoch": 1639.0566037735848, |
| "grad_norm": 1.8267869166287358, |
| "learning_rate": 1.2670222328127502e-05, |
| "loss": 1.2915, |
| "step": 42820 |
| }, |
| { |
| "epoch": 1639.811320754717, |
| "grad_norm": 1.8394119712662444, |
| "learning_rate": 1.2643776096272298e-05, |
| "loss": 1.2959, |
| "step": 42840 |
| }, |
| { |
| "epoch": 1640.566037735849, |
| "grad_norm": 1.5745567144671013, |
| "learning_rate": 1.2617370360374272e-05, |
| "loss": 1.2542, |
| "step": 42860 |
| }, |
| { |
| "epoch": 1641.3207547169811, |
| "grad_norm": 1.759927673486542, |
| "learning_rate": 1.2591005156249265e-05, |
| "loss": 1.2957, |
| "step": 42880 |
| }, |
| { |
| "epoch": 1642.0754716981132, |
| "grad_norm": 1.8212375700098509, |
| "learning_rate": 1.2564680519658124e-05, |
| "loss": 1.2911, |
| "step": 42900 |
| }, |
| { |
| "epoch": 1642.8301886792453, |
| "grad_norm": 1.7170892376270965, |
| "learning_rate": 1.2538396486306685e-05, |
| "loss": 1.2815, |
| "step": 42920 |
| }, |
| { |
| "epoch": 1643.5849056603774, |
| "grad_norm": 1.9707121868823774, |
| "learning_rate": 1.2512153091845724e-05, |
| "loss": 1.2817, |
| "step": 42940 |
| }, |
| { |
| "epoch": 1644.3396226415093, |
| "grad_norm": 1.6048036207691687, |
| "learning_rate": 1.2485950371870873e-05, |
| "loss": 1.2318, |
| "step": 42960 |
| }, |
| { |
| "epoch": 1645.0943396226414, |
| "grad_norm": 1.7616652416059821, |
| "learning_rate": 1.2459788361922582e-05, |
| "loss": 1.2482, |
| "step": 42980 |
| }, |
| { |
| "epoch": 1645.8490566037735, |
| "grad_norm": 1.971912319619838, |
| "learning_rate": 1.2433667097486137e-05, |
| "loss": 1.2732, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1646.6037735849056, |
| "grad_norm": 1.8554872676424141, |
| "learning_rate": 1.2407586613991493e-05, |
| "loss": 1.2862, |
| "step": 43020 |
| }, |
| { |
| "epoch": 1647.3584905660377, |
| "grad_norm": 1.699680830714332, |
| "learning_rate": 1.2381546946813345e-05, |
| "loss": 1.2783, |
| "step": 43040 |
| }, |
| { |
| "epoch": 1648.1132075471698, |
| "grad_norm": 2.0246817924294174, |
| "learning_rate": 1.2355548131271e-05, |
| "loss": 1.2913, |
| "step": 43060 |
| }, |
| { |
| "epoch": 1648.867924528302, |
| "grad_norm": 1.8248553236680727, |
| "learning_rate": 1.2329590202628339e-05, |
| "loss": 1.2982, |
| "step": 43080 |
| }, |
| { |
| "epoch": 1649.622641509434, |
| "grad_norm": 1.5949265423747017, |
| "learning_rate": 1.2303673196093838e-05, |
| "loss": 1.2682, |
| "step": 43100 |
| }, |
| { |
| "epoch": 1650.377358490566, |
| "grad_norm": 1.8162411778047456, |
| "learning_rate": 1.2277797146820398e-05, |
| "loss": 1.2775, |
| "step": 43120 |
| }, |
| { |
| "epoch": 1651.132075471698, |
| "grad_norm": 1.8713447665462608, |
| "learning_rate": 1.225196208990544e-05, |
| "loss": 1.2816, |
| "step": 43140 |
| }, |
| { |
| "epoch": 1651.8867924528302, |
| "grad_norm": 1.7680462074180785, |
| "learning_rate": 1.2226168060390733e-05, |
| "loss": 1.2583, |
| "step": 43160 |
| }, |
| { |
| "epoch": 1652.6415094339623, |
| "grad_norm": 1.7924027918393708, |
| "learning_rate": 1.2200415093262394e-05, |
| "loss": 1.2631, |
| "step": 43180 |
| }, |
| { |
| "epoch": 1653.3962264150944, |
| "grad_norm": 1.8736863718504475, |
| "learning_rate": 1.2174703223450895e-05, |
| "loss": 1.2841, |
| "step": 43200 |
| }, |
| { |
| "epoch": 1654.1509433962265, |
| "grad_norm": 2.4263803178613257, |
| "learning_rate": 1.2149032485830917e-05, |
| "loss": 1.2549, |
| "step": 43220 |
| }, |
| { |
| "epoch": 1654.9056603773586, |
| "grad_norm": 2.0941904006879746, |
| "learning_rate": 1.212340291522137e-05, |
| "loss": 1.2723, |
| "step": 43240 |
| }, |
| { |
| "epoch": 1655.6603773584907, |
| "grad_norm": 1.8402209538224543, |
| "learning_rate": 1.2097814546385328e-05, |
| "loss": 1.2974, |
| "step": 43260 |
| }, |
| { |
| "epoch": 1656.4150943396226, |
| "grad_norm": 1.6993013644974577, |
| "learning_rate": 1.2072267414029963e-05, |
| "loss": 1.2513, |
| "step": 43280 |
| }, |
| { |
| "epoch": 1657.1698113207547, |
| "grad_norm": 1.7170522867791609, |
| "learning_rate": 1.2046761552806534e-05, |
| "loss": 1.2805, |
| "step": 43300 |
| }, |
| { |
| "epoch": 1657.9245283018868, |
| "grad_norm": 2.0350355441018007, |
| "learning_rate": 1.2021296997310335e-05, |
| "loss": 1.2705, |
| "step": 43320 |
| }, |
| { |
| "epoch": 1658.6792452830189, |
| "grad_norm": 2.4347287101096127, |
| "learning_rate": 1.1995873782080597e-05, |
| "loss": 1.3121, |
| "step": 43340 |
| }, |
| { |
| "epoch": 1659.433962264151, |
| "grad_norm": 2.4941517416955423, |
| "learning_rate": 1.1970491941600483e-05, |
| "loss": 1.283, |
| "step": 43360 |
| }, |
| { |
| "epoch": 1660.188679245283, |
| "grad_norm": 1.5936769777901518, |
| "learning_rate": 1.1945151510297077e-05, |
| "loss": 1.3007, |
| "step": 43380 |
| }, |
| { |
| "epoch": 1660.9433962264152, |
| "grad_norm": 1.7694343184984302, |
| "learning_rate": 1.191985252254125e-05, |
| "loss": 1.2624, |
| "step": 43400 |
| }, |
| { |
| "epoch": 1661.698113207547, |
| "grad_norm": 1.795958862054882, |
| "learning_rate": 1.1894595012647705e-05, |
| "loss": 1.2845, |
| "step": 43420 |
| }, |
| { |
| "epoch": 1662.4528301886792, |
| "grad_norm": 1.7885172471497286, |
| "learning_rate": 1.1869379014874838e-05, |
| "loss": 1.2917, |
| "step": 43440 |
| }, |
| { |
| "epoch": 1663.2075471698113, |
| "grad_norm": 2.120293693613178, |
| "learning_rate": 1.1844204563424761e-05, |
| "loss": 1.2772, |
| "step": 43460 |
| }, |
| { |
| "epoch": 1663.9622641509434, |
| "grad_norm": 1.9597227989228387, |
| "learning_rate": 1.1819071692443259e-05, |
| "loss": 1.2795, |
| "step": 43480 |
| }, |
| { |
| "epoch": 1664.7169811320755, |
| "grad_norm": 1.6400049564311938, |
| "learning_rate": 1.1793980436019665e-05, |
| "loss": 1.2698, |
| "step": 43500 |
| }, |
| { |
| "epoch": 1665.4716981132076, |
| "grad_norm": 1.8644080059944403, |
| "learning_rate": 1.1768930828186929e-05, |
| "loss": 1.2587, |
| "step": 43520 |
| }, |
| { |
| "epoch": 1666.2264150943397, |
| "grad_norm": 1.8282116731915254, |
| "learning_rate": 1.1743922902921463e-05, |
| "loss": 1.3132, |
| "step": 43540 |
| }, |
| { |
| "epoch": 1666.9811320754718, |
| "grad_norm": 1.7249443500233614, |
| "learning_rate": 1.1718956694143148e-05, |
| "loss": 1.2723, |
| "step": 43560 |
| }, |
| { |
| "epoch": 1667.7358490566037, |
| "grad_norm": 2.2697663843146665, |
| "learning_rate": 1.1694032235715316e-05, |
| "loss": 1.2568, |
| "step": 43580 |
| }, |
| { |
| "epoch": 1668.4905660377358, |
| "grad_norm": 1.530647627622187, |
| "learning_rate": 1.1669149561444626e-05, |
| "loss": 1.2717, |
| "step": 43600 |
| }, |
| { |
| "epoch": 1669.245283018868, |
| "grad_norm": 1.9077370898342472, |
| "learning_rate": 1.1644308705081098e-05, |
| "loss": 1.252, |
| "step": 43620 |
| }, |
| { |
| "epoch": 1670.0, |
| "grad_norm": 2.44039353912842, |
| "learning_rate": 1.1619509700318012e-05, |
| "loss": 1.2829, |
| "step": 43640 |
| }, |
| { |
| "epoch": 1670.754716981132, |
| "grad_norm": 1.7743292552737207, |
| "learning_rate": 1.159475258079188e-05, |
| "loss": 1.2831, |
| "step": 43660 |
| }, |
| { |
| "epoch": 1671.5094339622642, |
| "grad_norm": 1.747479187441525, |
| "learning_rate": 1.1570037380082422e-05, |
| "loss": 1.2933, |
| "step": 43680 |
| }, |
| { |
| "epoch": 1672.2641509433963, |
| "grad_norm": 1.6300209643052874, |
| "learning_rate": 1.154536413171247e-05, |
| "loss": 1.262, |
| "step": 43700 |
| }, |
| { |
| "epoch": 1673.0188679245282, |
| "grad_norm": 1.6492759638436003, |
| "learning_rate": 1.1520732869147992e-05, |
| "loss": 1.2733, |
| "step": 43720 |
| }, |
| { |
| "epoch": 1673.7735849056603, |
| "grad_norm": 1.8650633960066672, |
| "learning_rate": 1.149614362579798e-05, |
| "loss": 1.2536, |
| "step": 43740 |
| }, |
| { |
| "epoch": 1674.5283018867924, |
| "grad_norm": 2.1343348774015154, |
| "learning_rate": 1.1471596435014422e-05, |
| "loss": 1.2813, |
| "step": 43760 |
| }, |
| { |
| "epoch": 1675.2830188679245, |
| "grad_norm": 1.8893198944715273, |
| "learning_rate": 1.144709133009231e-05, |
| "loss": 1.2563, |
| "step": 43780 |
| }, |
| { |
| "epoch": 1676.0377358490566, |
| "grad_norm": 2.1120967291275416, |
| "learning_rate": 1.1422628344269509e-05, |
| "loss": 1.2821, |
| "step": 43800 |
| }, |
| { |
| "epoch": 1676.7924528301887, |
| "grad_norm": 1.9495522685303381, |
| "learning_rate": 1.1398207510726789e-05, |
| "loss": 1.2517, |
| "step": 43820 |
| }, |
| { |
| "epoch": 1677.5471698113208, |
| "grad_norm": 2.467904007991342, |
| "learning_rate": 1.1373828862587707e-05, |
| "loss": 1.2561, |
| "step": 43840 |
| }, |
| { |
| "epoch": 1678.301886792453, |
| "grad_norm": 2.0894802109018364, |
| "learning_rate": 1.1349492432918656e-05, |
| "loss": 1.2435, |
| "step": 43860 |
| }, |
| { |
| "epoch": 1679.0566037735848, |
| "grad_norm": 1.828171013483477, |
| "learning_rate": 1.1325198254728714e-05, |
| "loss": 1.2622, |
| "step": 43880 |
| }, |
| { |
| "epoch": 1679.811320754717, |
| "grad_norm": 1.801681773387057, |
| "learning_rate": 1.1300946360969663e-05, |
| "loss": 1.2558, |
| "step": 43900 |
| }, |
| { |
| "epoch": 1680.566037735849, |
| "grad_norm": 1.6735648229781173, |
| "learning_rate": 1.127673678453596e-05, |
| "loss": 1.2758, |
| "step": 43920 |
| }, |
| { |
| "epoch": 1681.3207547169811, |
| "grad_norm": 1.8439021982026453, |
| "learning_rate": 1.1252569558264623e-05, |
| "loss": 1.2711, |
| "step": 43940 |
| }, |
| { |
| "epoch": 1682.0754716981132, |
| "grad_norm": 2.1505206283933016, |
| "learning_rate": 1.1228444714935267e-05, |
| "loss": 1.2654, |
| "step": 43960 |
| }, |
| { |
| "epoch": 1682.8301886792453, |
| "grad_norm": 2.3535064036412763, |
| "learning_rate": 1.1204362287269989e-05, |
| "loss": 1.2864, |
| "step": 43980 |
| }, |
| { |
| "epoch": 1683.5849056603774, |
| "grad_norm": 1.9848669397447662, |
| "learning_rate": 1.1180322307933367e-05, |
| "loss": 1.2678, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1684.3396226415093, |
| "grad_norm": 1.9913924876704123, |
| "learning_rate": 1.1156324809532414e-05, |
| "loss": 1.2676, |
| "step": 44020 |
| }, |
| { |
| "epoch": 1685.0943396226414, |
| "grad_norm": 1.7514208349630622, |
| "learning_rate": 1.1132369824616499e-05, |
| "loss": 1.2616, |
| "step": 44040 |
| }, |
| { |
| "epoch": 1685.8490566037735, |
| "grad_norm": 2.2320848012235888, |
| "learning_rate": 1.1108457385677357e-05, |
| "loss": 1.2342, |
| "step": 44060 |
| }, |
| { |
| "epoch": 1686.6037735849056, |
| "grad_norm": 1.5942465466549, |
| "learning_rate": 1.1084587525148977e-05, |
| "loss": 1.2645, |
| "step": 44080 |
| }, |
| { |
| "epoch": 1687.3584905660377, |
| "grad_norm": 1.7930621881455668, |
| "learning_rate": 1.1060760275407643e-05, |
| "loss": 1.2534, |
| "step": 44100 |
| }, |
| { |
| "epoch": 1688.1132075471698, |
| "grad_norm": 1.9526833117644506, |
| "learning_rate": 1.1036975668771807e-05, |
| "loss": 1.2609, |
| "step": 44120 |
| }, |
| { |
| "epoch": 1688.867924528302, |
| "grad_norm": 2.7437511360247084, |
| "learning_rate": 1.1013233737502087e-05, |
| "loss": 1.2343, |
| "step": 44140 |
| }, |
| { |
| "epoch": 1689.622641509434, |
| "grad_norm": 1.9223463912813794, |
| "learning_rate": 1.098953451380124e-05, |
| "loss": 1.2442, |
| "step": 44160 |
| }, |
| { |
| "epoch": 1690.377358490566, |
| "grad_norm": 1.8610095181805815, |
| "learning_rate": 1.0965878029814056e-05, |
| "loss": 1.2754, |
| "step": 44180 |
| }, |
| { |
| "epoch": 1691.132075471698, |
| "grad_norm": 1.8877853703015002, |
| "learning_rate": 1.0942264317627406e-05, |
| "loss": 1.2491, |
| "step": 44200 |
| }, |
| { |
| "epoch": 1691.8867924528302, |
| "grad_norm": 1.7714097265467896, |
| "learning_rate": 1.09186934092701e-05, |
| "loss": 1.2405, |
| "step": 44220 |
| }, |
| { |
| "epoch": 1692.6415094339623, |
| "grad_norm": 1.7637174019223203, |
| "learning_rate": 1.0895165336712904e-05, |
| "loss": 1.2829, |
| "step": 44240 |
| }, |
| { |
| "epoch": 1693.3962264150944, |
| "grad_norm": 1.7656304793121242, |
| "learning_rate": 1.087168013186851e-05, |
| "loss": 1.2702, |
| "step": 44260 |
| }, |
| { |
| "epoch": 1694.1509433962265, |
| "grad_norm": 1.7808808271916323, |
| "learning_rate": 1.0848237826591417e-05, |
| "loss": 1.2587, |
| "step": 44280 |
| }, |
| { |
| "epoch": 1694.9056603773586, |
| "grad_norm": 2.0266053428110538, |
| "learning_rate": 1.0824838452677987e-05, |
| "loss": 1.2926, |
| "step": 44300 |
| }, |
| { |
| "epoch": 1695.6603773584907, |
| "grad_norm": 1.8115058261411354, |
| "learning_rate": 1.0801482041866307e-05, |
| "loss": 1.2694, |
| "step": 44320 |
| }, |
| { |
| "epoch": 1696.4150943396226, |
| "grad_norm": 2.4945233871026526, |
| "learning_rate": 1.0778168625836231e-05, |
| "loss": 1.2699, |
| "step": 44340 |
| }, |
| { |
| "epoch": 1697.1698113207547, |
| "grad_norm": 1.6502310134668141, |
| "learning_rate": 1.0754898236209268e-05, |
| "loss": 1.2614, |
| "step": 44360 |
| }, |
| { |
| "epoch": 1697.9245283018868, |
| "grad_norm": 1.8453532789577662, |
| "learning_rate": 1.0731670904548564e-05, |
| "loss": 1.2823, |
| "step": 44380 |
| }, |
| { |
| "epoch": 1698.6792452830189, |
| "grad_norm": 1.860653653518071, |
| "learning_rate": 1.070848666235889e-05, |
| "loss": 1.2641, |
| "step": 44400 |
| }, |
| { |
| "epoch": 1699.433962264151, |
| "grad_norm": 2.5399359657927856, |
| "learning_rate": 1.0685345541086543e-05, |
| "loss": 1.2654, |
| "step": 44420 |
| }, |
| { |
| "epoch": 1700.188679245283, |
| "grad_norm": 2.0018966910355798, |
| "learning_rate": 1.0662247572119366e-05, |
| "loss": 1.2504, |
| "step": 44440 |
| }, |
| { |
| "epoch": 1700.9433962264152, |
| "grad_norm": 1.9371452956267547, |
| "learning_rate": 1.0639192786786632e-05, |
| "loss": 1.2599, |
| "step": 44460 |
| }, |
| { |
| "epoch": 1701.698113207547, |
| "grad_norm": 1.6521207456435931, |
| "learning_rate": 1.061618121635906e-05, |
| "loss": 1.2391, |
| "step": 44480 |
| }, |
| { |
| "epoch": 1702.4528301886792, |
| "grad_norm": 1.8656871033709692, |
| "learning_rate": 1.0593212892048769e-05, |
| "loss": 1.2724, |
| "step": 44500 |
| }, |
| { |
| "epoch": 1703.2075471698113, |
| "grad_norm": 1.8848695534792095, |
| "learning_rate": 1.0570287845009191e-05, |
| "loss": 1.2528, |
| "step": 44520 |
| }, |
| { |
| "epoch": 1703.9622641509434, |
| "grad_norm": 1.9187788004054305, |
| "learning_rate": 1.0547406106335084e-05, |
| "loss": 1.2518, |
| "step": 44540 |
| }, |
| { |
| "epoch": 1704.7169811320755, |
| "grad_norm": 1.6729690153958676, |
| "learning_rate": 1.0524567707062449e-05, |
| "loss": 1.2437, |
| "step": 44560 |
| }, |
| { |
| "epoch": 1705.4716981132076, |
| "grad_norm": 1.8430409315959264, |
| "learning_rate": 1.0501772678168493e-05, |
| "loss": 1.2467, |
| "step": 44580 |
| }, |
| { |
| "epoch": 1706.2264150943397, |
| "grad_norm": 1.8213698778380842, |
| "learning_rate": 1.0479021050571638e-05, |
| "loss": 1.261, |
| "step": 44600 |
| }, |
| { |
| "epoch": 1706.9811320754718, |
| "grad_norm": 1.7756575044684015, |
| "learning_rate": 1.0456312855131388e-05, |
| "loss": 1.2278, |
| "step": 44620 |
| }, |
| { |
| "epoch": 1707.7358490566037, |
| "grad_norm": 1.4850849315300283, |
| "learning_rate": 1.0433648122648373e-05, |
| "loss": 1.242, |
| "step": 44640 |
| }, |
| { |
| "epoch": 1708.4905660377358, |
| "grad_norm": 1.9352490992820244, |
| "learning_rate": 1.0411026883864254e-05, |
| "loss": 1.2507, |
| "step": 44660 |
| }, |
| { |
| "epoch": 1709.245283018868, |
| "grad_norm": 2.2842368933958634, |
| "learning_rate": 1.0388449169461693e-05, |
| "loss": 1.2614, |
| "step": 44680 |
| }, |
| { |
| "epoch": 1710.0, |
| "grad_norm": 1.716195015782983, |
| "learning_rate": 1.0365915010064342e-05, |
| "loss": 1.2467, |
| "step": 44700 |
| }, |
| { |
| "epoch": 1710.754716981132, |
| "grad_norm": 2.1393035099583524, |
| "learning_rate": 1.0343424436236746e-05, |
| "loss": 1.2697, |
| "step": 44720 |
| }, |
| { |
| "epoch": 1711.5094339622642, |
| "grad_norm": 2.0773856374828354, |
| "learning_rate": 1.0320977478484364e-05, |
| "loss": 1.2642, |
| "step": 44740 |
| }, |
| { |
| "epoch": 1712.2641509433963, |
| "grad_norm": 1.862877983575214, |
| "learning_rate": 1.0298574167253475e-05, |
| "loss": 1.2269, |
| "step": 44760 |
| }, |
| { |
| "epoch": 1713.0188679245282, |
| "grad_norm": 1.8342101414521328, |
| "learning_rate": 1.0276214532931146e-05, |
| "loss": 1.2535, |
| "step": 44780 |
| }, |
| { |
| "epoch": 1713.7735849056603, |
| "grad_norm": 1.9601396356391216, |
| "learning_rate": 1.0253898605845225e-05, |
| "loss": 1.2327, |
| "step": 44800 |
| }, |
| { |
| "epoch": 1714.5283018867924, |
| "grad_norm": 4.66393766300096, |
| "learning_rate": 1.0231626416264286e-05, |
| "loss": 1.2503, |
| "step": 44820 |
| }, |
| { |
| "epoch": 1715.2830188679245, |
| "grad_norm": 1.8180258292414466, |
| "learning_rate": 1.020939799439755e-05, |
| "loss": 1.2401, |
| "step": 44840 |
| }, |
| { |
| "epoch": 1716.0377358490566, |
| "grad_norm": 2.106671537780403, |
| "learning_rate": 1.0187213370394877e-05, |
| "loss": 1.2536, |
| "step": 44860 |
| }, |
| { |
| "epoch": 1716.7924528301887, |
| "grad_norm": 2.006353528787222, |
| "learning_rate": 1.016507257434674e-05, |
| "loss": 1.2669, |
| "step": 44880 |
| }, |
| { |
| "epoch": 1717.5471698113208, |
| "grad_norm": 1.9080849159374786, |
| "learning_rate": 1.0142975636284143e-05, |
| "loss": 1.2509, |
| "step": 44900 |
| }, |
| { |
| "epoch": 1718.301886792453, |
| "grad_norm": 1.8773707581872159, |
| "learning_rate": 1.0120922586178633e-05, |
| "loss": 1.2675, |
| "step": 44920 |
| }, |
| { |
| "epoch": 1719.0566037735848, |
| "grad_norm": 2.0076660138565647, |
| "learning_rate": 1.00989134539422e-05, |
| "loss": 1.2534, |
| "step": 44940 |
| }, |
| { |
| "epoch": 1719.811320754717, |
| "grad_norm": 2.33419651636862, |
| "learning_rate": 1.0076948269427267e-05, |
| "loss": 1.2397, |
| "step": 44960 |
| }, |
| { |
| "epoch": 1720.566037735849, |
| "grad_norm": 2.1404808355187552, |
| "learning_rate": 1.0055027062426677e-05, |
| "loss": 1.2533, |
| "step": 44980 |
| }, |
| { |
| "epoch": 1721.3207547169811, |
| "grad_norm": 1.8480400039657447, |
| "learning_rate": 1.003314986267358e-05, |
| "loss": 1.2493, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1722.0754716981132, |
| "grad_norm": 1.8827968491873732, |
| "learning_rate": 1.0011316699841473e-05, |
| "loss": 1.2622, |
| "step": 45020 |
| }, |
| { |
| "epoch": 1722.8301886792453, |
| "grad_norm": 1.805703534242214, |
| "learning_rate": 9.989527603544106e-06, |
| "loss": 1.2363, |
| "step": 45040 |
| }, |
| { |
| "epoch": 1723.5849056603774, |
| "grad_norm": 1.808082360236483, |
| "learning_rate": 9.967782603335458e-06, |
| "loss": 1.2487, |
| "step": 45060 |
| }, |
| { |
| "epoch": 1724.3396226415093, |
| "grad_norm": 1.7375867158357146, |
| "learning_rate": 9.946081728709704e-06, |
| "loss": 1.2495, |
| "step": 45080 |
| }, |
| { |
| "epoch": 1725.0943396226414, |
| "grad_norm": 1.9612535567440743, |
| "learning_rate": 9.92442500910116e-06, |
| "loss": 1.229, |
| "step": 45100 |
| }, |
| { |
| "epoch": 1725.8490566037735, |
| "grad_norm": 1.7862147453258874, |
| "learning_rate": 9.902812473884265e-06, |
| "loss": 1.257, |
| "step": 45120 |
| }, |
| { |
| "epoch": 1726.6037735849056, |
| "grad_norm": 1.9883007002332853, |
| "learning_rate": 9.881244152373517e-06, |
| "loss": 1.2449, |
| "step": 45140 |
| }, |
| { |
| "epoch": 1727.3584905660377, |
| "grad_norm": 1.9502309547963228, |
| "learning_rate": 9.859720073823439e-06, |
| "loss": 1.224, |
| "step": 45160 |
| }, |
| { |
| "epoch": 1728.1132075471698, |
| "grad_norm": 1.896645829195727, |
| "learning_rate": 9.838240267428569e-06, |
| "loss": 1.2396, |
| "step": 45180 |
| }, |
| { |
| "epoch": 1728.867924528302, |
| "grad_norm": 1.629877819405046, |
| "learning_rate": 9.816804762323362e-06, |
| "loss": 1.2227, |
| "step": 45200 |
| }, |
| { |
| "epoch": 1729.622641509434, |
| "grad_norm": 1.929708983579025, |
| "learning_rate": 9.795413587582212e-06, |
| "loss": 1.2516, |
| "step": 45220 |
| }, |
| { |
| "epoch": 1730.377358490566, |
| "grad_norm": 2.0413627464070543, |
| "learning_rate": 9.77406677221937e-06, |
| "loss": 1.2514, |
| "step": 45240 |
| }, |
| { |
| "epoch": 1731.132075471698, |
| "grad_norm": 1.9912183520226578, |
| "learning_rate": 9.75276434518892e-06, |
| "loss": 1.2414, |
| "step": 45260 |
| }, |
| { |
| "epoch": 1731.8867924528302, |
| "grad_norm": 1.9880956960393557, |
| "learning_rate": 9.731506335384743e-06, |
| "loss": 1.2419, |
| "step": 45280 |
| }, |
| { |
| "epoch": 1732.6415094339623, |
| "grad_norm": 1.7890362722548563, |
| "learning_rate": 9.710292771640488e-06, |
| "loss": 1.2369, |
| "step": 45300 |
| }, |
| { |
| "epoch": 1733.3962264150944, |
| "grad_norm": 1.7651257661243038, |
| "learning_rate": 9.689123682729494e-06, |
| "loss": 1.2311, |
| "step": 45320 |
| }, |
| { |
| "epoch": 1734.1509433962265, |
| "grad_norm": 2.2714752518928596, |
| "learning_rate": 9.667999097364786e-06, |
| "loss": 1.2367, |
| "step": 45340 |
| }, |
| { |
| "epoch": 1734.9056603773586, |
| "grad_norm": 1.6713670044384341, |
| "learning_rate": 9.646919044199022e-06, |
| "loss": 1.2404, |
| "step": 45360 |
| }, |
| { |
| "epoch": 1735.6603773584907, |
| "grad_norm": 1.6792236680717407, |
| "learning_rate": 9.625883551824463e-06, |
| "loss": 1.2196, |
| "step": 45380 |
| }, |
| { |
| "epoch": 1736.4150943396226, |
| "grad_norm": 2.115412691262234, |
| "learning_rate": 9.604892648772943e-06, |
| "loss": 1.266, |
| "step": 45400 |
| }, |
| { |
| "epoch": 1737.1698113207547, |
| "grad_norm": 1.8051416403865777, |
| "learning_rate": 9.583946363515793e-06, |
| "loss": 1.2226, |
| "step": 45420 |
| }, |
| { |
| "epoch": 1737.9245283018868, |
| "grad_norm": 1.6009792370635079, |
| "learning_rate": 9.563044724463834e-06, |
| "loss": 1.2201, |
| "step": 45440 |
| }, |
| { |
| "epoch": 1738.6792452830189, |
| "grad_norm": 1.6768644470720357, |
| "learning_rate": 9.542187759967324e-06, |
| "loss": 1.2421, |
| "step": 45460 |
| }, |
| { |
| "epoch": 1739.433962264151, |
| "grad_norm": 1.8206836356027367, |
| "learning_rate": 9.521375498315946e-06, |
| "loss": 1.2329, |
| "step": 45480 |
| }, |
| { |
| "epoch": 1740.188679245283, |
| "grad_norm": 1.938456827512391, |
| "learning_rate": 9.500607967738736e-06, |
| "loss": 1.2325, |
| "step": 45500 |
| }, |
| { |
| "epoch": 1740.9433962264152, |
| "grad_norm": 2.2438430820956277, |
| "learning_rate": 9.47988519640406e-06, |
| "loss": 1.2354, |
| "step": 45520 |
| }, |
| { |
| "epoch": 1741.698113207547, |
| "grad_norm": 1.8583994537682718, |
| "learning_rate": 9.459207212419571e-06, |
| "loss": 1.235, |
| "step": 45540 |
| }, |
| { |
| "epoch": 1742.4528301886792, |
| "grad_norm": 1.917428400078255, |
| "learning_rate": 9.438574043832166e-06, |
| "loss": 1.224, |
| "step": 45560 |
| }, |
| { |
| "epoch": 1743.2075471698113, |
| "grad_norm": 1.9824261322614047, |
| "learning_rate": 9.417985718627978e-06, |
| "loss": 1.2129, |
| "step": 45580 |
| }, |
| { |
| "epoch": 1743.9622641509434, |
| "grad_norm": 1.612460364379856, |
| "learning_rate": 9.397442264732312e-06, |
| "loss": 1.2377, |
| "step": 45600 |
| }, |
| { |
| "epoch": 1744.7169811320755, |
| "grad_norm": 1.887246888937515, |
| "learning_rate": 9.376943710009596e-06, |
| "loss": 1.239, |
| "step": 45620 |
| }, |
| { |
| "epoch": 1745.4716981132076, |
| "grad_norm": 1.8662303877058588, |
| "learning_rate": 9.35649008226336e-06, |
| "loss": 1.2157, |
| "step": 45640 |
| }, |
| { |
| "epoch": 1746.2264150943397, |
| "grad_norm": 1.8071426126559238, |
| "learning_rate": 9.336081409236198e-06, |
| "loss": 1.2428, |
| "step": 45660 |
| }, |
| { |
| "epoch": 1746.9811320754718, |
| "grad_norm": 2.0833393122383828, |
| "learning_rate": 9.315717718609757e-06, |
| "loss": 1.2492, |
| "step": 45680 |
| }, |
| { |
| "epoch": 1747.7358490566037, |
| "grad_norm": 1.6715168273471837, |
| "learning_rate": 9.295399038004633e-06, |
| "loss": 1.2266, |
| "step": 45700 |
| }, |
| { |
| "epoch": 1748.4905660377358, |
| "grad_norm": 2.195357416639307, |
| "learning_rate": 9.275125394980386e-06, |
| "loss": 1.2253, |
| "step": 45720 |
| }, |
| { |
| "epoch": 1749.245283018868, |
| "grad_norm": 1.6874680664457093, |
| "learning_rate": 9.254896817035483e-06, |
| "loss": 1.2173, |
| "step": 45740 |
| }, |
| { |
| "epoch": 1750.0, |
| "grad_norm": 1.711392020263029, |
| "learning_rate": 9.234713331607285e-06, |
| "loss": 1.2454, |
| "step": 45760 |
| }, |
| { |
| "epoch": 1750.754716981132, |
| "grad_norm": 2.3706684674067713, |
| "learning_rate": 9.214574966071978e-06, |
| "loss": 1.2308, |
| "step": 45780 |
| }, |
| { |
| "epoch": 1751.5094339622642, |
| "grad_norm": 1.6753069249066932, |
| "learning_rate": 9.19448174774455e-06, |
| "loss": 1.2413, |
| "step": 45800 |
| }, |
| { |
| "epoch": 1752.2641509433963, |
| "grad_norm": 1.769095060241516, |
| "learning_rate": 9.174433703878748e-06, |
| "loss": 1.2319, |
| "step": 45820 |
| }, |
| { |
| "epoch": 1753.0188679245282, |
| "grad_norm": 2.3962310618426756, |
| "learning_rate": 9.154430861667043e-06, |
| "loss": 1.2352, |
| "step": 45840 |
| }, |
| { |
| "epoch": 1753.7735849056603, |
| "grad_norm": 1.9412455526945074, |
| "learning_rate": 9.134473248240613e-06, |
| "loss": 1.2102, |
| "step": 45860 |
| }, |
| { |
| "epoch": 1754.5283018867924, |
| "grad_norm": 1.8463733972517142, |
| "learning_rate": 9.114560890669284e-06, |
| "loss": 1.2332, |
| "step": 45880 |
| }, |
| { |
| "epoch": 1755.2830188679245, |
| "grad_norm": 2.04310291045162, |
| "learning_rate": 9.094693815961489e-06, |
| "loss": 1.214, |
| "step": 45900 |
| }, |
| { |
| "epoch": 1756.0377358490566, |
| "grad_norm": 1.6171015922091012, |
| "learning_rate": 9.074872051064247e-06, |
| "loss": 1.2699, |
| "step": 45920 |
| }, |
| { |
| "epoch": 1756.7924528301887, |
| "grad_norm": 1.9834386299608673, |
| "learning_rate": 9.05509562286311e-06, |
| "loss": 1.2278, |
| "step": 45940 |
| }, |
| { |
| "epoch": 1757.5471698113208, |
| "grad_norm": 1.821750331122801, |
| "learning_rate": 9.035364558182156e-06, |
| "loss": 1.2209, |
| "step": 45960 |
| }, |
| { |
| "epoch": 1758.301886792453, |
| "grad_norm": 2.014697149659363, |
| "learning_rate": 9.01567888378393e-06, |
| "loss": 1.2467, |
| "step": 45980 |
| }, |
| { |
| "epoch": 1759.0566037735848, |
| "grad_norm": 1.7691564521949696, |
| "learning_rate": 8.9960386263694e-06, |
| "loss": 1.2387, |
| "step": 46000 |
| }, |
| { |
| "epoch": 1759.811320754717, |
| "grad_norm": 2.3970514513874353, |
| "learning_rate": 8.976443812577933e-06, |
| "loss": 1.2356, |
| "step": 46020 |
| }, |
| { |
| "epoch": 1760.566037735849, |
| "grad_norm": 1.7866985162824316, |
| "learning_rate": 8.956894468987255e-06, |
| "loss": 1.2192, |
| "step": 46040 |
| }, |
| { |
| "epoch": 1761.3207547169811, |
| "grad_norm": 1.4793276251372218, |
| "learning_rate": 8.93739062211343e-06, |
| "loss": 1.2255, |
| "step": 46060 |
| }, |
| { |
| "epoch": 1762.0754716981132, |
| "grad_norm": 1.629080653433639, |
| "learning_rate": 8.917932298410821e-06, |
| "loss": 1.2293, |
| "step": 46080 |
| }, |
| { |
| "epoch": 1762.8301886792453, |
| "grad_norm": 1.9159436924110016, |
| "learning_rate": 8.898519524272015e-06, |
| "loss": 1.2401, |
| "step": 46100 |
| }, |
| { |
| "epoch": 1763.5849056603774, |
| "grad_norm": 2.067014892731833, |
| "learning_rate": 8.879152326027837e-06, |
| "loss": 1.2344, |
| "step": 46120 |
| }, |
| { |
| "epoch": 1764.3396226415093, |
| "grad_norm": 1.8696210113324339, |
| "learning_rate": 8.859830729947271e-06, |
| "loss": 1.2223, |
| "step": 46140 |
| }, |
| { |
| "epoch": 1765.0943396226414, |
| "grad_norm": 2.110486612271203, |
| "learning_rate": 8.840554762237504e-06, |
| "loss": 1.243, |
| "step": 46160 |
| }, |
| { |
| "epoch": 1765.8490566037735, |
| "grad_norm": 2.123761822878677, |
| "learning_rate": 8.821324449043775e-06, |
| "loss": 1.219, |
| "step": 46180 |
| }, |
| { |
| "epoch": 1766.6037735849056, |
| "grad_norm": 1.9704006034099235, |
| "learning_rate": 8.802139816449425e-06, |
| "loss": 1.2274, |
| "step": 46200 |
| }, |
| { |
| "epoch": 1767.3584905660377, |
| "grad_norm": 2.577400619765411, |
| "learning_rate": 8.783000890475817e-06, |
| "loss": 1.2215, |
| "step": 46220 |
| }, |
| { |
| "epoch": 1768.1132075471698, |
| "grad_norm": 1.9304613762583265, |
| "learning_rate": 8.763907697082349e-06, |
| "loss": 1.2278, |
| "step": 46240 |
| }, |
| { |
| "epoch": 1768.867924528302, |
| "grad_norm": 9.077022357816322, |
| "learning_rate": 8.744860262166374e-06, |
| "loss": 1.2376, |
| "step": 46260 |
| }, |
| { |
| "epoch": 1769.622641509434, |
| "grad_norm": 1.7013870498396941, |
| "learning_rate": 8.72585861156318e-06, |
| "loss": 1.2435, |
| "step": 46280 |
| }, |
| { |
| "epoch": 1770.377358490566, |
| "grad_norm": 2.2733345450497597, |
| "learning_rate": 8.706902771045942e-06, |
| "loss": 1.2491, |
| "step": 46300 |
| }, |
| { |
| "epoch": 1771.132075471698, |
| "grad_norm": 1.7197101765888114, |
| "learning_rate": 8.687992766325712e-06, |
| "loss": 1.2308, |
| "step": 46320 |
| }, |
| { |
| "epoch": 1771.8867924528302, |
| "grad_norm": 1.722161318565123, |
| "learning_rate": 8.669128623051374e-06, |
| "loss": 1.2153, |
| "step": 46340 |
| }, |
| { |
| "epoch": 1772.6415094339623, |
| "grad_norm": 1.768434935423491, |
| "learning_rate": 8.650310366809618e-06, |
| "loss": 1.231, |
| "step": 46360 |
| }, |
| { |
| "epoch": 1773.3962264150944, |
| "grad_norm": 1.754239611346281, |
| "learning_rate": 8.631538023124864e-06, |
| "loss": 1.2132, |
| "step": 46380 |
| }, |
| { |
| "epoch": 1774.1509433962265, |
| "grad_norm": 1.8552614353082573, |
| "learning_rate": 8.612811617459285e-06, |
| "loss": 1.2112, |
| "step": 46400 |
| }, |
| { |
| "epoch": 1774.9056603773586, |
| "grad_norm": 2.0773862798469467, |
| "learning_rate": 8.594131175212718e-06, |
| "loss": 1.2189, |
| "step": 46420 |
| }, |
| { |
| "epoch": 1775.6603773584907, |
| "grad_norm": 2.3280607387947905, |
| "learning_rate": 8.57549672172269e-06, |
| "loss": 1.238, |
| "step": 46440 |
| }, |
| { |
| "epoch": 1776.4150943396226, |
| "grad_norm": 2.1755774262596717, |
| "learning_rate": 8.556908282264332e-06, |
| "loss": 1.2024, |
| "step": 46460 |
| }, |
| { |
| "epoch": 1777.1698113207547, |
| "grad_norm": 1.7187738055157478, |
| "learning_rate": 8.538365882050364e-06, |
| "loss": 1.2234, |
| "step": 46480 |
| }, |
| { |
| "epoch": 1777.9245283018868, |
| "grad_norm": 3.1309450039543165, |
| "learning_rate": 8.51986954623106e-06, |
| "loss": 1.2081, |
| "step": 46500 |
| }, |
| { |
| "epoch": 1778.6792452830189, |
| "grad_norm": 2.1042177578345567, |
| "learning_rate": 8.501419299894205e-06, |
| "loss": 1.1976, |
| "step": 46520 |
| }, |
| { |
| "epoch": 1779.433962264151, |
| "grad_norm": 2.4039696113928586, |
| "learning_rate": 8.483015168065095e-06, |
| "loss": 1.2068, |
| "step": 46540 |
| }, |
| { |
| "epoch": 1780.188679245283, |
| "grad_norm": 2.0537571832378605, |
| "learning_rate": 8.464657175706461e-06, |
| "loss": 1.2143, |
| "step": 46560 |
| }, |
| { |
| "epoch": 1780.9433962264152, |
| "grad_norm": 1.9918815720142324, |
| "learning_rate": 8.44634534771845e-06, |
| "loss": 1.2019, |
| "step": 46580 |
| }, |
| { |
| "epoch": 1781.698113207547, |
| "grad_norm": 3.5070134161926214, |
| "learning_rate": 8.428079708938597e-06, |
| "loss": 1.2117, |
| "step": 46600 |
| }, |
| { |
| "epoch": 1782.4528301886792, |
| "grad_norm": 1.9332698868995186, |
| "learning_rate": 8.409860284141776e-06, |
| "loss": 1.2109, |
| "step": 46620 |
| }, |
| { |
| "epoch": 1783.2075471698113, |
| "grad_norm": 1.8649611050997916, |
| "learning_rate": 8.391687098040202e-06, |
| "loss": 1.2127, |
| "step": 46640 |
| }, |
| { |
| "epoch": 1783.9622641509434, |
| "grad_norm": 2.1126115309707276, |
| "learning_rate": 8.373560175283366e-06, |
| "loss": 1.2071, |
| "step": 46660 |
| }, |
| { |
| "epoch": 1784.7169811320755, |
| "grad_norm": 2.1198410570984145, |
| "learning_rate": 8.355479540457997e-06, |
| "loss": 1.2136, |
| "step": 46680 |
| }, |
| { |
| "epoch": 1785.4716981132076, |
| "grad_norm": 1.6900109710024558, |
| "learning_rate": 8.337445218088043e-06, |
| "loss": 1.2524, |
| "step": 46700 |
| }, |
| { |
| "epoch": 1786.2264150943397, |
| "grad_norm": 1.8630113220385771, |
| "learning_rate": 8.31945723263464e-06, |
| "loss": 1.2265, |
| "step": 46720 |
| }, |
| { |
| "epoch": 1786.9811320754718, |
| "grad_norm": 1.8874455281957463, |
| "learning_rate": 8.301515608496088e-06, |
| "loss": 1.2177, |
| "step": 46740 |
| }, |
| { |
| "epoch": 1787.7358490566037, |
| "grad_norm": 2.014600854617101, |
| "learning_rate": 8.283620370007777e-06, |
| "loss": 1.2181, |
| "step": 46760 |
| }, |
| { |
| "epoch": 1788.4905660377358, |
| "grad_norm": 2.0564703961686885, |
| "learning_rate": 8.2657715414422e-06, |
| "loss": 1.234, |
| "step": 46780 |
| }, |
| { |
| "epoch": 1789.245283018868, |
| "grad_norm": 1.7463019171504772, |
| "learning_rate": 8.247969147008883e-06, |
| "loss": 1.2357, |
| "step": 46800 |
| }, |
| { |
| "epoch": 1790.0, |
| "grad_norm": 2.0207773867855345, |
| "learning_rate": 8.230213210854395e-06, |
| "loss": 1.2148, |
| "step": 46820 |
| }, |
| { |
| "epoch": 1790.754716981132, |
| "grad_norm": 2.3337870810525168, |
| "learning_rate": 8.21250375706228e-06, |
| "loss": 1.237, |
| "step": 46840 |
| }, |
| { |
| "epoch": 1791.5094339622642, |
| "grad_norm": 2.1435617881979563, |
| "learning_rate": 8.194840809653027e-06, |
| "loss": 1.2374, |
| "step": 46860 |
| }, |
| { |
| "epoch": 1792.2641509433963, |
| "grad_norm": 1.9102469560838522, |
| "learning_rate": 8.177224392584056e-06, |
| "loss": 1.209, |
| "step": 46880 |
| }, |
| { |
| "epoch": 1793.0188679245282, |
| "grad_norm": 2.1795923550151737, |
| "learning_rate": 8.159654529749662e-06, |
| "loss": 1.2063, |
| "step": 46900 |
| }, |
| { |
| "epoch": 1793.7735849056603, |
| "grad_norm": 1.823175394536622, |
| "learning_rate": 8.142131244981005e-06, |
| "loss": 1.1934, |
| "step": 46920 |
| }, |
| { |
| "epoch": 1794.5283018867924, |
| "grad_norm": 1.8053211353930545, |
| "learning_rate": 8.12465456204608e-06, |
| "loss": 1.2198, |
| "step": 46940 |
| }, |
| { |
| "epoch": 1795.2830188679245, |
| "grad_norm": 2.2947577379489195, |
| "learning_rate": 8.107224504649651e-06, |
| "loss": 1.2309, |
| "step": 46960 |
| }, |
| { |
| "epoch": 1796.0377358490566, |
| "grad_norm": 1.8475992608945049, |
| "learning_rate": 8.089841096433251e-06, |
| "loss": 1.2087, |
| "step": 46980 |
| }, |
| { |
| "epoch": 1796.7924528301887, |
| "grad_norm": 1.8272879309025556, |
| "learning_rate": 8.072504360975127e-06, |
| "loss": 1.2136, |
| "step": 47000 |
| }, |
| { |
| "epoch": 1797.5471698113208, |
| "grad_norm": 1.8165782997861282, |
| "learning_rate": 8.055214321790241e-06, |
| "loss": 1.1889, |
| "step": 47020 |
| }, |
| { |
| "epoch": 1798.301886792453, |
| "grad_norm": 2.3340672269584726, |
| "learning_rate": 8.03797100233022e-06, |
| "loss": 1.221, |
| "step": 47040 |
| }, |
| { |
| "epoch": 1799.0566037735848, |
| "grad_norm": 2.092467100741215, |
| "learning_rate": 8.020774425983296e-06, |
| "loss": 1.2128, |
| "step": 47060 |
| }, |
| { |
| "epoch": 1799.811320754717, |
| "grad_norm": 2.3746119444632, |
| "learning_rate": 8.003624616074315e-06, |
| "loss": 1.2182, |
| "step": 47080 |
| }, |
| { |
| "epoch": 1800.566037735849, |
| "grad_norm": 1.8281656528438364, |
| "learning_rate": 7.9865215958647e-06, |
| "loss": 1.2263, |
| "step": 47100 |
| }, |
| { |
| "epoch": 1801.3207547169811, |
| "grad_norm": 1.7918154594625133, |
| "learning_rate": 7.969465388552383e-06, |
| "loss": 1.2213, |
| "step": 47120 |
| }, |
| { |
| "epoch": 1802.0754716981132, |
| "grad_norm": 1.6967922986825377, |
| "learning_rate": 7.95245601727184e-06, |
| "loss": 1.2138, |
| "step": 47140 |
| }, |
| { |
| "epoch": 1802.8301886792453, |
| "grad_norm": 2.1758444336626437, |
| "learning_rate": 7.935493505093988e-06, |
| "loss": 1.2148, |
| "step": 47160 |
| }, |
| { |
| "epoch": 1803.5849056603774, |
| "grad_norm": 2.065548344188712, |
| "learning_rate": 7.918577875026188e-06, |
| "loss": 1.225, |
| "step": 47180 |
| }, |
| { |
| "epoch": 1804.3396226415093, |
| "grad_norm": 2.285598146488397, |
| "learning_rate": 7.901709150012234e-06, |
| "loss": 1.2029, |
| "step": 47200 |
| }, |
| { |
| "epoch": 1805.0943396226414, |
| "grad_norm": 1.845588983749011, |
| "learning_rate": 7.884887352932272e-06, |
| "loss": 1.2197, |
| "step": 47220 |
| }, |
| { |
| "epoch": 1805.8490566037735, |
| "grad_norm": 2.1058361117020095, |
| "learning_rate": 7.868112506602826e-06, |
| "loss": 1.2153, |
| "step": 47240 |
| }, |
| { |
| "epoch": 1806.6037735849056, |
| "grad_norm": 2.465710936967516, |
| "learning_rate": 7.851384633776713e-06, |
| "loss": 1.228, |
| "step": 47260 |
| }, |
| { |
| "epoch": 1807.3584905660377, |
| "grad_norm": 2.5705709977723905, |
| "learning_rate": 7.834703757143039e-06, |
| "loss": 1.2098, |
| "step": 47280 |
| }, |
| { |
| "epoch": 1808.1132075471698, |
| "grad_norm": 2.2374731183447105, |
| "learning_rate": 7.818069899327187e-06, |
| "loss": 1.2129, |
| "step": 47300 |
| }, |
| { |
| "epoch": 1808.867924528302, |
| "grad_norm": 1.9504362821950096, |
| "learning_rate": 7.801483082890734e-06, |
| "loss": 1.1901, |
| "step": 47320 |
| }, |
| { |
| "epoch": 1809.622641509434, |
| "grad_norm": 2.3420693435077813, |
| "learning_rate": 7.784943330331486e-06, |
| "loss": 1.211, |
| "step": 47340 |
| }, |
| { |
| "epoch": 1810.377358490566, |
| "grad_norm": 2.031062915249881, |
| "learning_rate": 7.768450664083389e-06, |
| "loss": 1.2156, |
| "step": 47360 |
| }, |
| { |
| "epoch": 1811.132075471698, |
| "grad_norm": 1.6147973470014159, |
| "learning_rate": 7.752005106516516e-06, |
| "loss": 1.2246, |
| "step": 47380 |
| }, |
| { |
| "epoch": 1811.8867924528302, |
| "grad_norm": 3.287052917624636, |
| "learning_rate": 7.735606679937075e-06, |
| "loss": 1.2064, |
| "step": 47400 |
| }, |
| { |
| "epoch": 1812.6415094339623, |
| "grad_norm": 2.2761297016427178, |
| "learning_rate": 7.719255406587317e-06, |
| "loss": 1.212, |
| "step": 47420 |
| }, |
| { |
| "epoch": 1813.3962264150944, |
| "grad_norm": 1.7704387481824377, |
| "learning_rate": 7.702951308645558e-06, |
| "loss": 1.2085, |
| "step": 47440 |
| }, |
| { |
| "epoch": 1814.1509433962265, |
| "grad_norm": 1.8350242336586524, |
| "learning_rate": 7.68669440822611e-06, |
| "loss": 1.222, |
| "step": 47460 |
| }, |
| { |
| "epoch": 1814.9056603773586, |
| "grad_norm": 2.2387441733555202, |
| "learning_rate": 7.67048472737927e-06, |
| "loss": 1.2227, |
| "step": 47480 |
| }, |
| { |
| "epoch": 1815.6603773584907, |
| "grad_norm": 2.0331586599518863, |
| "learning_rate": 7.654322288091307e-06, |
| "loss": 1.2105, |
| "step": 47500 |
| }, |
| { |
| "epoch": 1816.4150943396226, |
| "grad_norm": 1.7302486664188137, |
| "learning_rate": 7.638207112284387e-06, |
| "loss": 1.2006, |
| "step": 47520 |
| }, |
| { |
| "epoch": 1817.1698113207547, |
| "grad_norm": 2.102196000276882, |
| "learning_rate": 7.622139221816588e-06, |
| "loss": 1.2129, |
| "step": 47540 |
| }, |
| { |
| "epoch": 1817.9245283018868, |
| "grad_norm": 2.367853919945459, |
| "learning_rate": 7.606118638481834e-06, |
| "loss": 1.2137, |
| "step": 47560 |
| }, |
| { |
| "epoch": 1818.6792452830189, |
| "grad_norm": 1.7313717857059043, |
| "learning_rate": 7.5901453840099084e-06, |
| "loss": 1.1895, |
| "step": 47580 |
| }, |
| { |
| "epoch": 1819.433962264151, |
| "grad_norm": 1.90549898399535, |
| "learning_rate": 7.574219480066374e-06, |
| "loss": 1.2056, |
| "step": 47600 |
| }, |
| { |
| "epoch": 1820.188679245283, |
| "grad_norm": 2.037261933639343, |
| "learning_rate": 7.55834094825259e-06, |
| "loss": 1.2174, |
| "step": 47620 |
| }, |
| { |
| "epoch": 1820.9433962264152, |
| "grad_norm": 2.106635441636325, |
| "learning_rate": 7.542509810105648e-06, |
| "loss": 1.1982, |
| "step": 47640 |
| }, |
| { |
| "epoch": 1821.698113207547, |
| "grad_norm": 2.092038104009338, |
| "learning_rate": 7.526726087098354e-06, |
| "loss": 1.2218, |
| "step": 47660 |
| }, |
| { |
| "epoch": 1822.4528301886792, |
| "grad_norm": 1.997516919579926, |
| "learning_rate": 7.51098980063922e-06, |
| "loss": 1.2219, |
| "step": 47680 |
| }, |
| { |
| "epoch": 1823.2075471698113, |
| "grad_norm": 1.8136562199600643, |
| "learning_rate": 7.49530097207239e-06, |
| "loss": 1.1796, |
| "step": 47700 |
| }, |
| { |
| "epoch": 1823.9622641509434, |
| "grad_norm": 2.5616204147227934, |
| "learning_rate": 7.47965962267767e-06, |
| "loss": 1.1939, |
| "step": 47720 |
| }, |
| { |
| "epoch": 1824.7169811320755, |
| "grad_norm": 2.1387597203680815, |
| "learning_rate": 7.464065773670437e-06, |
| "loss": 1.1602, |
| "step": 47740 |
| }, |
| { |
| "epoch": 1825.4716981132076, |
| "grad_norm": 1.7803739365612326, |
| "learning_rate": 7.448519446201648e-06, |
| "loss": 1.2392, |
| "step": 47760 |
| }, |
| { |
| "epoch": 1826.2264150943397, |
| "grad_norm": 1.8173426913493826, |
| "learning_rate": 7.433020661357822e-06, |
| "loss": 1.1921, |
| "step": 47780 |
| }, |
| { |
| "epoch": 1826.9811320754718, |
| "grad_norm": 1.7763793595591069, |
| "learning_rate": 7.417569440160968e-06, |
| "loss": 1.2139, |
| "step": 47800 |
| }, |
| { |
| "epoch": 1827.7358490566037, |
| "grad_norm": 2.151890225358, |
| "learning_rate": 7.402165803568603e-06, |
| "loss": 1.1918, |
| "step": 47820 |
| }, |
| { |
| "epoch": 1828.4905660377358, |
| "grad_norm": 2.241696527607786, |
| "learning_rate": 7.386809772473682e-06, |
| "loss": 1.199, |
| "step": 47840 |
| }, |
| { |
| "epoch": 1829.245283018868, |
| "grad_norm": 1.904140122730207, |
| "learning_rate": 7.371501367704594e-06, |
| "loss": 1.175, |
| "step": 47860 |
| }, |
| { |
| "epoch": 1830.0, |
| "grad_norm": 2.2057960272933035, |
| "learning_rate": 7.356240610025147e-06, |
| "loss": 1.2026, |
| "step": 47880 |
| }, |
| { |
| "epoch": 1830.754716981132, |
| "grad_norm": 1.992268245473379, |
| "learning_rate": 7.341027520134496e-06, |
| "loss": 1.2226, |
| "step": 47900 |
| }, |
| { |
| "epoch": 1831.5094339622642, |
| "grad_norm": 1.8558557959544568, |
| "learning_rate": 7.325862118667166e-06, |
| "loss": 1.1879, |
| "step": 47920 |
| }, |
| { |
| "epoch": 1832.2641509433963, |
| "grad_norm": 2.649917575243484, |
| "learning_rate": 7.3107444261929805e-06, |
| "loss": 1.2128, |
| "step": 47940 |
| }, |
| { |
| "epoch": 1833.0188679245282, |
| "grad_norm": 1.838198084617481, |
| "learning_rate": 7.295674463217053e-06, |
| "loss": 1.1932, |
| "step": 47960 |
| }, |
| { |
| "epoch": 1833.7735849056603, |
| "grad_norm": 1.6953634210974582, |
| "learning_rate": 7.280652250179774e-06, |
| "loss": 1.1964, |
| "step": 47980 |
| }, |
| { |
| "epoch": 1834.5283018867924, |
| "grad_norm": 1.6312994813012875, |
| "learning_rate": 7.26567780745675e-06, |
| "loss": 1.1941, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1835.2830188679245, |
| "grad_norm": 2.0702401300059528, |
| "learning_rate": 7.250751155358808e-06, |
| "loss": 1.2005, |
| "step": 48020 |
| }, |
| { |
| "epoch": 1836.0377358490566, |
| "grad_norm": 2.2642885375841395, |
| "learning_rate": 7.2358723141319396e-06, |
| "loss": 1.1894, |
| "step": 48040 |
| }, |
| { |
| "epoch": 1836.7924528301887, |
| "grad_norm": 1.9527405946827057, |
| "learning_rate": 7.2210413039573e-06, |
| "loss": 1.182, |
| "step": 48060 |
| }, |
| { |
| "epoch": 1837.5471698113208, |
| "grad_norm": 2.3498816913200984, |
| "learning_rate": 7.206258144951163e-06, |
| "loss": 1.1913, |
| "step": 48080 |
| }, |
| { |
| "epoch": 1838.301886792453, |
| "grad_norm": 2.123796744980879, |
| "learning_rate": 7.1915228571648876e-06, |
| "loss": 1.2076, |
| "step": 48100 |
| }, |
| { |
| "epoch": 1839.0566037735848, |
| "grad_norm": 1.7252206439090503, |
| "learning_rate": 7.176835460584927e-06, |
| "loss": 1.1861, |
| "step": 48120 |
| }, |
| { |
| "epoch": 1839.811320754717, |
| "grad_norm": 1.8734365315429182, |
| "learning_rate": 7.162195975132747e-06, |
| "loss": 1.1826, |
| "step": 48140 |
| }, |
| { |
| "epoch": 1840.566037735849, |
| "grad_norm": 2.316186234026582, |
| "learning_rate": 7.147604420664858e-06, |
| "loss": 1.177, |
| "step": 48160 |
| }, |
| { |
| "epoch": 1841.3207547169811, |
| "grad_norm": 1.788911685930357, |
| "learning_rate": 7.133060816972735e-06, |
| "loss": 1.1844, |
| "step": 48180 |
| }, |
| { |
| "epoch": 1842.0754716981132, |
| "grad_norm": 1.9701957941688446, |
| "learning_rate": 7.118565183782816e-06, |
| "loss": 1.211, |
| "step": 48200 |
| }, |
| { |
| "epoch": 1842.8301886792453, |
| "grad_norm": 1.721205749217039, |
| "learning_rate": 7.104117540756494e-06, |
| "loss": 1.2045, |
| "step": 48220 |
| }, |
| { |
| "epoch": 1843.5849056603774, |
| "grad_norm": 1.73943762799037, |
| "learning_rate": 7.089717907490048e-06, |
| "loss": 1.2005, |
| "step": 48240 |
| }, |
| { |
| "epoch": 1844.3396226415093, |
| "grad_norm": 2.2562973087741587, |
| "learning_rate": 7.07536630351465e-06, |
| "loss": 1.21, |
| "step": 48260 |
| }, |
| { |
| "epoch": 1845.0943396226414, |
| "grad_norm": 2.10020840598067, |
| "learning_rate": 7.061062748296323e-06, |
| "loss": 1.191, |
| "step": 48280 |
| }, |
| { |
| "epoch": 1845.8490566037735, |
| "grad_norm": 1.8070339824697725, |
| "learning_rate": 7.0468072612359105e-06, |
| "loss": 1.193, |
| "step": 48300 |
| }, |
| { |
| "epoch": 1846.6037735849056, |
| "grad_norm": 1.997808139499102, |
| "learning_rate": 7.032599861669077e-06, |
| "loss": 1.2014, |
| "step": 48320 |
| }, |
| { |
| "epoch": 1847.3584905660377, |
| "grad_norm": 2.0691188983956277, |
| "learning_rate": 7.018440568866245e-06, |
| "loss": 1.1966, |
| "step": 48340 |
| }, |
| { |
| "epoch": 1848.1132075471698, |
| "grad_norm": 2.403979236635362, |
| "learning_rate": 7.004329402032594e-06, |
| "loss": 1.1782, |
| "step": 48360 |
| }, |
| { |
| "epoch": 1848.867924528302, |
| "grad_norm": 2.6004080171234385, |
| "learning_rate": 6.9902663803080305e-06, |
| "loss": 1.1804, |
| "step": 48380 |
| }, |
| { |
| "epoch": 1849.622641509434, |
| "grad_norm": 1.9209297506929766, |
| "learning_rate": 6.976251522767146e-06, |
| "loss": 1.1743, |
| "step": 48400 |
| }, |
| { |
| "epoch": 1850.377358490566, |
| "grad_norm": 1.870852128682101, |
| "learning_rate": 6.962284848419221e-06, |
| "loss": 1.1968, |
| "step": 48420 |
| }, |
| { |
| "epoch": 1851.132075471698, |
| "grad_norm": 2.335271083007723, |
| "learning_rate": 6.948366376208161e-06, |
| "loss": 1.1848, |
| "step": 48440 |
| }, |
| { |
| "epoch": 1851.8867924528302, |
| "grad_norm": 1.6640538101181919, |
| "learning_rate": 6.93449612501252e-06, |
| "loss": 1.201, |
| "step": 48460 |
| }, |
| { |
| "epoch": 1852.6415094339623, |
| "grad_norm": 1.8790103798312214, |
| "learning_rate": 6.920674113645418e-06, |
| "loss": 1.167, |
| "step": 48480 |
| }, |
| { |
| "epoch": 1853.3962264150944, |
| "grad_norm": 1.7578289543420753, |
| "learning_rate": 6.906900360854565e-06, |
| "loss": 1.2007, |
| "step": 48500 |
| }, |
| { |
| "epoch": 1854.1509433962265, |
| "grad_norm": 1.8615274702658844, |
| "learning_rate": 6.893174885322198e-06, |
| "loss": 1.2056, |
| "step": 48520 |
| }, |
| { |
| "epoch": 1854.9056603773586, |
| "grad_norm": 2.3236275376684143, |
| "learning_rate": 6.879497705665089e-06, |
| "loss": 1.1716, |
| "step": 48540 |
| }, |
| { |
| "epoch": 1855.6603773584907, |
| "grad_norm": 2.1963583105378213, |
| "learning_rate": 6.865868840434493e-06, |
| "loss": 1.1769, |
| "step": 48560 |
| }, |
| { |
| "epoch": 1856.4150943396226, |
| "grad_norm": 2.129799150022101, |
| "learning_rate": 6.852288308116133e-06, |
| "loss": 1.1861, |
| "step": 48580 |
| }, |
| { |
| "epoch": 1857.1698113207547, |
| "grad_norm": 1.978798574769679, |
| "learning_rate": 6.8387561271301765e-06, |
| "loss": 1.1971, |
| "step": 48600 |
| }, |
| { |
| "epoch": 1857.9245283018868, |
| "grad_norm": 2.2936585531474596, |
| "learning_rate": 6.8252723158312055e-06, |
| "loss": 1.1911, |
| "step": 48620 |
| }, |
| { |
| "epoch": 1858.6792452830189, |
| "grad_norm": 1.860277938482895, |
| "learning_rate": 6.81183689250821e-06, |
| "loss": 1.1566, |
| "step": 48640 |
| }, |
| { |
| "epoch": 1859.433962264151, |
| "grad_norm": 2.017669140863562, |
| "learning_rate": 6.79844987538453e-06, |
| "loss": 1.1728, |
| "step": 48660 |
| }, |
| { |
| "epoch": 1860.188679245283, |
| "grad_norm": 2.098145579199566, |
| "learning_rate": 6.785111282617849e-06, |
| "loss": 1.1934, |
| "step": 48680 |
| }, |
| { |
| "epoch": 1860.9433962264152, |
| "grad_norm": 1.835823989245946, |
| "learning_rate": 6.771821132300191e-06, |
| "loss": 1.1621, |
| "step": 48700 |
| }, |
| { |
| "epoch": 1861.698113207547, |
| "grad_norm": 1.9612631399268534, |
| "learning_rate": 6.7585794424578464e-06, |
| "loss": 1.1911, |
| "step": 48720 |
| }, |
| { |
| "epoch": 1862.4528301886792, |
| "grad_norm": 2.0755400141270464, |
| "learning_rate": 6.745386231051399e-06, |
| "loss": 1.1804, |
| "step": 48740 |
| }, |
| { |
| "epoch": 1863.2075471698113, |
| "grad_norm": 2.2075989618364984, |
| "learning_rate": 6.732241515975663e-06, |
| "loss": 1.1933, |
| "step": 48760 |
| }, |
| { |
| "epoch": 1863.9622641509434, |
| "grad_norm": 1.9214092744343696, |
| "learning_rate": 6.719145315059678e-06, |
| "loss": 1.1913, |
| "step": 48780 |
| }, |
| { |
| "epoch": 1864.7169811320755, |
| "grad_norm": 2.3557809190891703, |
| "learning_rate": 6.7060976460666846e-06, |
| "loss": 1.1905, |
| "step": 48800 |
| }, |
| { |
| "epoch": 1865.4716981132076, |
| "grad_norm": 2.114305919520162, |
| "learning_rate": 6.693098526694083e-06, |
| "loss": 1.2047, |
| "step": 48820 |
| }, |
| { |
| "epoch": 1866.2264150943397, |
| "grad_norm": 1.8242775313878226, |
| "learning_rate": 6.680147974573452e-06, |
| "loss": 1.1933, |
| "step": 48840 |
| }, |
| { |
| "epoch": 1866.9811320754718, |
| "grad_norm": 2.1056639763813956, |
| "learning_rate": 6.66724600727046e-06, |
| "loss": 1.1808, |
| "step": 48860 |
| }, |
| { |
| "epoch": 1867.7358490566037, |
| "grad_norm": 1.7165725449830957, |
| "learning_rate": 6.654392642284892e-06, |
| "loss": 1.1782, |
| "step": 48880 |
| }, |
| { |
| "epoch": 1868.4905660377358, |
| "grad_norm": 1.7341902387718784, |
| "learning_rate": 6.6415878970506175e-06, |
| "loss": 1.179, |
| "step": 48900 |
| }, |
| { |
| "epoch": 1869.245283018868, |
| "grad_norm": 1.7667425869444906, |
| "learning_rate": 6.6288317889355535e-06, |
| "loss": 1.1754, |
| "step": 48920 |
| }, |
| { |
| "epoch": 1870.0, |
| "grad_norm": 2.2994252135110655, |
| "learning_rate": 6.616124335241648e-06, |
| "loss": 1.1992, |
| "step": 48940 |
| }, |
| { |
| "epoch": 1870.754716981132, |
| "grad_norm": 1.7046658246235185, |
| "learning_rate": 6.603465553204852e-06, |
| "loss": 1.1811, |
| "step": 48960 |
| }, |
| { |
| "epoch": 1871.5094339622642, |
| "grad_norm": 2.1898673540015428, |
| "learning_rate": 6.5908554599951e-06, |
| "loss": 1.178, |
| "step": 48980 |
| }, |
| { |
| "epoch": 1872.2641509433963, |
| "grad_norm": 1.9804369829197095, |
| "learning_rate": 6.578294072716292e-06, |
| "loss": 1.1989, |
| "step": 49000 |
| }, |
| { |
| "epoch": 1873.0188679245282, |
| "grad_norm": 2.065726596455928, |
| "learning_rate": 6.565781408406267e-06, |
| "loss": 1.1931, |
| "step": 49020 |
| }, |
| { |
| "epoch": 1873.7735849056603, |
| "grad_norm": 2.2467788121970123, |
| "learning_rate": 6.553317484036772e-06, |
| "loss": 1.2074, |
| "step": 49040 |
| }, |
| { |
| "epoch": 1874.5283018867924, |
| "grad_norm": 1.991691603079823, |
| "learning_rate": 6.5409023165134424e-06, |
| "loss": 1.1983, |
| "step": 49060 |
| }, |
| { |
| "epoch": 1875.2830188679245, |
| "grad_norm": 2.106299625577455, |
| "learning_rate": 6.528535922675781e-06, |
| "loss": 1.1956, |
| "step": 49080 |
| }, |
| { |
| "epoch": 1876.0377358490566, |
| "grad_norm": 2.037693251120139, |
| "learning_rate": 6.516218319297147e-06, |
| "loss": 1.185, |
| "step": 49100 |
| }, |
| { |
| "epoch": 1876.7924528301887, |
| "grad_norm": 2.3718612692091763, |
| "learning_rate": 6.503949523084718e-06, |
| "loss": 1.1859, |
| "step": 49120 |
| }, |
| { |
| "epoch": 1877.5471698113208, |
| "grad_norm": 1.9858435056818156, |
| "learning_rate": 6.491729550679461e-06, |
| "loss": 1.2076, |
| "step": 49140 |
| }, |
| { |
| "epoch": 1878.301886792453, |
| "grad_norm": 2.147814028235424, |
| "learning_rate": 6.479558418656134e-06, |
| "loss": 1.1682, |
| "step": 49160 |
| }, |
| { |
| "epoch": 1879.0566037735848, |
| "grad_norm": 1.7628164718106505, |
| "learning_rate": 6.467436143523228e-06, |
| "loss": 1.1791, |
| "step": 49180 |
| }, |
| { |
| "epoch": 1879.811320754717, |
| "grad_norm": 1.9837896355936764, |
| "learning_rate": 6.455362741722995e-06, |
| "loss": 1.1977, |
| "step": 49200 |
| }, |
| { |
| "epoch": 1880.566037735849, |
| "grad_norm": 2.0613808893064327, |
| "learning_rate": 6.44333822963138e-06, |
| "loss": 1.1738, |
| "step": 49220 |
| }, |
| { |
| "epoch": 1881.3207547169811, |
| "grad_norm": 1.6736931908615154, |
| "learning_rate": 6.431362623558018e-06, |
| "loss": 1.1774, |
| "step": 49240 |
| }, |
| { |
| "epoch": 1882.0754716981132, |
| "grad_norm": 1.691911714014794, |
| "learning_rate": 6.4194359397462055e-06, |
| "loss": 1.1666, |
| "step": 49260 |
| }, |
| { |
| "epoch": 1882.8301886792453, |
| "grad_norm": 2.1345911027894138, |
| "learning_rate": 6.4075581943728944e-06, |
| "loss": 1.1973, |
| "step": 49280 |
| }, |
| { |
| "epoch": 1883.5849056603774, |
| "grad_norm": 1.9512349129787812, |
| "learning_rate": 6.395729403548645e-06, |
| "loss": 1.1672, |
| "step": 49300 |
| }, |
| { |
| "epoch": 1884.3396226415093, |
| "grad_norm": 2.6451924153676125, |
| "learning_rate": 6.383949583317629e-06, |
| "loss": 1.1695, |
| "step": 49320 |
| }, |
| { |
| "epoch": 1885.0943396226414, |
| "grad_norm": 1.783294063259621, |
| "learning_rate": 6.372218749657584e-06, |
| "loss": 1.1648, |
| "step": 49340 |
| }, |
| { |
| "epoch": 1885.8490566037735, |
| "grad_norm": 2.3799777683561967, |
| "learning_rate": 6.360536918479806e-06, |
| "loss": 1.1776, |
| "step": 49360 |
| }, |
| { |
| "epoch": 1886.6037735849056, |
| "grad_norm": 2.0124780882138347, |
| "learning_rate": 6.348904105629139e-06, |
| "loss": 1.1884, |
| "step": 49380 |
| }, |
| { |
| "epoch": 1887.3584905660377, |
| "grad_norm": 1.8426672524927896, |
| "learning_rate": 6.3373203268839345e-06, |
| "loss": 1.1842, |
| "step": 49400 |
| }, |
| { |
| "epoch": 1888.1132075471698, |
| "grad_norm": 2.0963675882931274, |
| "learning_rate": 6.325785597956021e-06, |
| "loss": 1.1807, |
| "step": 49420 |
| }, |
| { |
| "epoch": 1888.867924528302, |
| "grad_norm": 1.9137633109249375, |
| "learning_rate": 6.314299934490717e-06, |
| "loss": 1.1932, |
| "step": 49440 |
| }, |
| { |
| "epoch": 1889.622641509434, |
| "grad_norm": 2.2927620018796033, |
| "learning_rate": 6.3028633520667744e-06, |
| "loss": 1.186, |
| "step": 49460 |
| }, |
| { |
| "epoch": 1890.377358490566, |
| "grad_norm": 2.7433533031518182, |
| "learning_rate": 6.291475866196384e-06, |
| "loss": 1.1363, |
| "step": 49480 |
| }, |
| { |
| "epoch": 1891.132075471698, |
| "grad_norm": 1.563437023715403, |
| "learning_rate": 6.280137492325147e-06, |
| "loss": 1.2093, |
| "step": 49500 |
| }, |
| { |
| "epoch": 1891.8867924528302, |
| "grad_norm": 2.793822111662886, |
| "learning_rate": 6.2688482458320434e-06, |
| "loss": 1.1751, |
| "step": 49520 |
| }, |
| { |
| "epoch": 1892.6415094339623, |
| "grad_norm": 1.8418670948197584, |
| "learning_rate": 6.25760814202941e-06, |
| "loss": 1.1658, |
| "step": 49540 |
| }, |
| { |
| "epoch": 1893.3962264150944, |
| "grad_norm": 1.8332744098429328, |
| "learning_rate": 6.246417196162944e-06, |
| "loss": 1.1654, |
| "step": 49560 |
| }, |
| { |
| "epoch": 1894.1509433962265, |
| "grad_norm": 1.7817661421186255, |
| "learning_rate": 6.235275423411659e-06, |
| "loss": 1.1764, |
| "step": 49580 |
| }, |
| { |
| "epoch": 1894.9056603773586, |
| "grad_norm": 1.9495189221186473, |
| "learning_rate": 6.224182838887876e-06, |
| "loss": 1.1529, |
| "step": 49600 |
| }, |
| { |
| "epoch": 1895.6603773584907, |
| "grad_norm": 2.4039058315851447, |
| "learning_rate": 6.213139457637196e-06, |
| "loss": 1.1747, |
| "step": 49620 |
| }, |
| { |
| "epoch": 1896.4150943396226, |
| "grad_norm": 1.875771973172552, |
| "learning_rate": 6.202145294638478e-06, |
| "loss": 1.1821, |
| "step": 49640 |
| }, |
| { |
| "epoch": 1897.1698113207547, |
| "grad_norm": 2.1999372490425393, |
| "learning_rate": 6.191200364803824e-06, |
| "loss": 1.1813, |
| "step": 49660 |
| }, |
| { |
| "epoch": 1897.9245283018868, |
| "grad_norm": 11.988202176475387, |
| "learning_rate": 6.180304682978568e-06, |
| "loss": 1.1569, |
| "step": 49680 |
| }, |
| { |
| "epoch": 1898.6792452830189, |
| "grad_norm": 1.8662829336756046, |
| "learning_rate": 6.169458263941242e-06, |
| "loss": 1.1816, |
| "step": 49700 |
| }, |
| { |
| "epoch": 1899.433962264151, |
| "grad_norm": 2.3098966440534294, |
| "learning_rate": 6.158661122403553e-06, |
| "loss": 1.1581, |
| "step": 49720 |
| }, |
| { |
| "epoch": 1900.188679245283, |
| "grad_norm": 2.332658522584547, |
| "learning_rate": 6.1479132730103704e-06, |
| "loss": 1.1946, |
| "step": 49740 |
| }, |
| { |
| "epoch": 1900.9433962264152, |
| "grad_norm": 1.7105735490477962, |
| "learning_rate": 6.137214730339707e-06, |
| "loss": 1.1868, |
| "step": 49760 |
| }, |
| { |
| "epoch": 1901.698113207547, |
| "grad_norm": 2.243808666742797, |
| "learning_rate": 6.126565508902698e-06, |
| "loss": 1.1599, |
| "step": 49780 |
| }, |
| { |
| "epoch": 1902.4528301886792, |
| "grad_norm": 1.8783312097697262, |
| "learning_rate": 6.115965623143589e-06, |
| "loss": 1.1621, |
| "step": 49800 |
| }, |
| { |
| "epoch": 1903.2075471698113, |
| "grad_norm": 1.7166955372139616, |
| "learning_rate": 6.105415087439699e-06, |
| "loss": 1.1862, |
| "step": 49820 |
| }, |
| { |
| "epoch": 1903.9622641509434, |
| "grad_norm": 2.0340234917391524, |
| "learning_rate": 6.094913916101413e-06, |
| "loss": 1.1561, |
| "step": 49840 |
| }, |
| { |
| "epoch": 1904.7169811320755, |
| "grad_norm": 1.8220266868042787, |
| "learning_rate": 6.084462123372144e-06, |
| "loss": 1.1749, |
| "step": 49860 |
| }, |
| { |
| "epoch": 1905.4716981132076, |
| "grad_norm": 2.3373227334868973, |
| "learning_rate": 6.07405972342837e-06, |
| "loss": 1.2081, |
| "step": 49880 |
| }, |
| { |
| "epoch": 1906.2264150943397, |
| "grad_norm": 2.31770817514565, |
| "learning_rate": 6.063706730379534e-06, |
| "loss": 1.1705, |
| "step": 49900 |
| }, |
| { |
| "epoch": 1906.9811320754718, |
| "grad_norm": 1.749701769225866, |
| "learning_rate": 6.053403158268086e-06, |
| "loss": 1.1732, |
| "step": 49920 |
| }, |
| { |
| "epoch": 1907.7358490566037, |
| "grad_norm": 2.0136702275524736, |
| "learning_rate": 6.043149021069432e-06, |
| "loss": 1.1789, |
| "step": 49940 |
| }, |
| { |
| "epoch": 1908.4905660377358, |
| "grad_norm": 1.8991267563990468, |
| "learning_rate": 6.032944332691932e-06, |
| "loss": 1.1691, |
| "step": 49960 |
| }, |
| { |
| "epoch": 1909.245283018868, |
| "grad_norm": 2.178284333271757, |
| "learning_rate": 6.02278910697688e-06, |
| "loss": 1.1698, |
| "step": 49980 |
| }, |
| { |
| "epoch": 1910.0, |
| "grad_norm": 2.270879262505861, |
| "learning_rate": 6.012683357698476e-06, |
| "loss": 1.1424, |
| "step": 50000 |
| }, |
| { |
| "epoch": 1910.754716981132, |
| "grad_norm": 1.6800724855002753, |
| "learning_rate": 6.0026270985638094e-06, |
| "loss": 1.1405, |
| "step": 50020 |
| }, |
| { |
| "epoch": 1911.5094339622642, |
| "grad_norm": 1.9982510142589247, |
| "learning_rate": 5.9926203432128405e-06, |
| "loss": 1.1811, |
| "step": 50040 |
| }, |
| { |
| "epoch": 1912.2641509433963, |
| "grad_norm": 2.072677046394058, |
| "learning_rate": 5.98266310521839e-06, |
| "loss": 1.1832, |
| "step": 50060 |
| }, |
| { |
| "epoch": 1913.0188679245282, |
| "grad_norm": 1.8518177038658126, |
| "learning_rate": 5.972755398086119e-06, |
| "loss": 1.1768, |
| "step": 50080 |
| }, |
| { |
| "epoch": 1913.7735849056603, |
| "grad_norm": 1.7233232781661019, |
| "learning_rate": 5.9628972352545016e-06, |
| "loss": 1.1916, |
| "step": 50100 |
| }, |
| { |
| "epoch": 1914.5283018867924, |
| "grad_norm": 2.2438247684764776, |
| "learning_rate": 5.953088630094804e-06, |
| "loss": 1.1965, |
| "step": 50120 |
| }, |
| { |
| "epoch": 1915.2830188679245, |
| "grad_norm": 2.354329582753457, |
| "learning_rate": 5.943329595911085e-06, |
| "loss": 1.1657, |
| "step": 50140 |
| }, |
| { |
| "epoch": 1916.0377358490566, |
| "grad_norm": 2.0821470705714558, |
| "learning_rate": 5.933620145940163e-06, |
| "loss": 1.1733, |
| "step": 50160 |
| }, |
| { |
| "epoch": 1916.7924528301887, |
| "grad_norm": 2.3851614247004513, |
| "learning_rate": 5.92396029335161e-06, |
| "loss": 1.1973, |
| "step": 50180 |
| }, |
| { |
| "epoch": 1917.5471698113208, |
| "grad_norm": 2.165021041548156, |
| "learning_rate": 5.91435005124771e-06, |
| "loss": 1.1605, |
| "step": 50200 |
| }, |
| { |
| "epoch": 1918.301886792453, |
| "grad_norm": 1.8316074131304803, |
| "learning_rate": 5.904789432663471e-06, |
| "loss": 1.175, |
| "step": 50220 |
| }, |
| { |
| "epoch": 1919.0566037735848, |
| "grad_norm": 2.2444762984325517, |
| "learning_rate": 5.8952784505665775e-06, |
| "loss": 1.1546, |
| "step": 50240 |
| }, |
| { |
| "epoch": 1919.811320754717, |
| "grad_norm": 1.990381226210719, |
| "learning_rate": 5.885817117857409e-06, |
| "loss": 1.1734, |
| "step": 50260 |
| }, |
| { |
| "epoch": 1920.566037735849, |
| "grad_norm": 2.1012346230151935, |
| "learning_rate": 5.876405447368989e-06, |
| "loss": 1.1726, |
| "step": 50280 |
| }, |
| { |
| "epoch": 1921.3207547169811, |
| "grad_norm": 2.036740240000707, |
| "learning_rate": 5.867043451866989e-06, |
| "loss": 1.1858, |
| "step": 50300 |
| }, |
| { |
| "epoch": 1922.0754716981132, |
| "grad_norm": 1.9828813541843844, |
| "learning_rate": 5.85773114404969e-06, |
| "loss": 1.1523, |
| "step": 50320 |
| }, |
| { |
| "epoch": 1922.8301886792453, |
| "grad_norm": 2.1278126328460196, |
| "learning_rate": 5.848468536547991e-06, |
| "loss": 1.1886, |
| "step": 50340 |
| }, |
| { |
| "epoch": 1923.5849056603774, |
| "grad_norm": 2.315788726027488, |
| "learning_rate": 5.8392556419253755e-06, |
| "loss": 1.1686, |
| "step": 50360 |
| }, |
| { |
| "epoch": 1924.3396226415093, |
| "grad_norm": 2.3735624423680117, |
| "learning_rate": 5.830092472677899e-06, |
| "loss": 1.1584, |
| "step": 50380 |
| }, |
| { |
| "epoch": 1925.0943396226414, |
| "grad_norm": 2.3872016424634093, |
| "learning_rate": 5.820979041234169e-06, |
| "loss": 1.1859, |
| "step": 50400 |
| }, |
| { |
| "epoch": 1925.8490566037735, |
| "grad_norm": 2.0316120352053115, |
| "learning_rate": 5.811915359955322e-06, |
| "loss": 1.1578, |
| "step": 50420 |
| }, |
| { |
| "epoch": 1926.6037735849056, |
| "grad_norm": 1.9955741026809004, |
| "learning_rate": 5.8029014411350336e-06, |
| "loss": 1.1699, |
| "step": 50440 |
| }, |
| { |
| "epoch": 1927.3584905660377, |
| "grad_norm": 2.671513853147586, |
| "learning_rate": 5.793937296999476e-06, |
| "loss": 1.1613, |
| "step": 50460 |
| }, |
| { |
| "epoch": 1928.1132075471698, |
| "grad_norm": 2.085910842457962, |
| "learning_rate": 5.785022939707302e-06, |
| "loss": 1.1919, |
| "step": 50480 |
| }, |
| { |
| "epoch": 1928.867924528302, |
| "grad_norm": 2.15354309947986, |
| "learning_rate": 5.77615838134964e-06, |
| "loss": 1.1766, |
| "step": 50500 |
| }, |
| { |
| "epoch": 1929.622641509434, |
| "grad_norm": 2.5693650339132463, |
| "learning_rate": 5.76734363395007e-06, |
| "loss": 1.175, |
| "step": 50520 |
| }, |
| { |
| "epoch": 1930.377358490566, |
| "grad_norm": 3.2479427163076533, |
| "learning_rate": 5.7585787094646196e-06, |
| "loss": 1.1703, |
| "step": 50540 |
| }, |
| { |
| "epoch": 1931.132075471698, |
| "grad_norm": 2.1161416369904695, |
| "learning_rate": 5.749863619781723e-06, |
| "loss": 1.1657, |
| "step": 50560 |
| }, |
| { |
| "epoch": 1931.8867924528302, |
| "grad_norm": 1.7714808950845444, |
| "learning_rate": 5.7411983767222415e-06, |
| "loss": 1.1717, |
| "step": 50580 |
| }, |
| { |
| "epoch": 1932.6415094339623, |
| "grad_norm": 2.535706381586084, |
| "learning_rate": 5.732582992039398e-06, |
| "loss": 1.1553, |
| "step": 50600 |
| }, |
| { |
| "epoch": 1933.3962264150944, |
| "grad_norm": 2.1037009756450527, |
| "learning_rate": 5.724017477418814e-06, |
| "loss": 1.1771, |
| "step": 50620 |
| }, |
| { |
| "epoch": 1934.1509433962265, |
| "grad_norm": 1.849746935628885, |
| "learning_rate": 5.7155018444784526e-06, |
| "loss": 1.1422, |
| "step": 50640 |
| }, |
| { |
| "epoch": 1934.9056603773586, |
| "grad_norm": 1.9210522037566025, |
| "learning_rate": 5.707036104768635e-06, |
| "loss": 1.1756, |
| "step": 50660 |
| }, |
| { |
| "epoch": 1935.6603773584907, |
| "grad_norm": 1.819206038769788, |
| "learning_rate": 5.698620269771997e-06, |
| "loss": 1.1916, |
| "step": 50680 |
| }, |
| { |
| "epoch": 1936.4150943396226, |
| "grad_norm": 2.5377205844625417, |
| "learning_rate": 5.690254350903488e-06, |
| "loss": 1.1619, |
| "step": 50700 |
| }, |
| { |
| "epoch": 1937.1698113207547, |
| "grad_norm": 2.2007521096063902, |
| "learning_rate": 5.681938359510347e-06, |
| "loss": 1.1846, |
| "step": 50720 |
| }, |
| { |
| "epoch": 1937.9245283018868, |
| "grad_norm": 2.192606880082283, |
| "learning_rate": 5.673672306872103e-06, |
| "loss": 1.1699, |
| "step": 50740 |
| }, |
| { |
| "epoch": 1938.6792452830189, |
| "grad_norm": 2.1766069540448436, |
| "learning_rate": 5.665456204200552e-06, |
| "loss": 1.1871, |
| "step": 50760 |
| }, |
| { |
| "epoch": 1939.433962264151, |
| "grad_norm": 1.9751649291014899, |
| "learning_rate": 5.657290062639727e-06, |
| "loss": 1.1474, |
| "step": 50780 |
| }, |
| { |
| "epoch": 1940.188679245283, |
| "grad_norm": 1.890603847246591, |
| "learning_rate": 5.6491738932659e-06, |
| "loss": 1.1559, |
| "step": 50800 |
| }, |
| { |
| "epoch": 1940.9433962264152, |
| "grad_norm": 1.975301210235016, |
| "learning_rate": 5.641107707087573e-06, |
| "loss": 1.1521, |
| "step": 50820 |
| }, |
| { |
| "epoch": 1941.698113207547, |
| "grad_norm": 1.8441779800267277, |
| "learning_rate": 5.6330915150454375e-06, |
| "loss": 1.145, |
| "step": 50840 |
| }, |
| { |
| "epoch": 1942.4528301886792, |
| "grad_norm": 2.4451642203033064, |
| "learning_rate": 5.625125328012387e-06, |
| "loss": 1.1791, |
| "step": 50860 |
| }, |
| { |
| "epoch": 1943.2075471698113, |
| "grad_norm": 2.127622782788785, |
| "learning_rate": 5.617209156793476e-06, |
| "loss": 1.1471, |
| "step": 50880 |
| }, |
| { |
| "epoch": 1943.9622641509434, |
| "grad_norm": 2.0779587981444427, |
| "learning_rate": 5.609343012125934e-06, |
| "loss": 1.1537, |
| "step": 50900 |
| }, |
| { |
| "epoch": 1944.7169811320755, |
| "grad_norm": 2.1654459900473872, |
| "learning_rate": 5.601526904679125e-06, |
| "loss": 1.1609, |
| "step": 50920 |
| }, |
| { |
| "epoch": 1945.4716981132076, |
| "grad_norm": 1.8696254811746238, |
| "learning_rate": 5.593760845054552e-06, |
| "loss": 1.1523, |
| "step": 50940 |
| }, |
| { |
| "epoch": 1946.2264150943397, |
| "grad_norm": 1.7856722997496786, |
| "learning_rate": 5.586044843785832e-06, |
| "loss": 1.2012, |
| "step": 50960 |
| }, |
| { |
| "epoch": 1946.9811320754718, |
| "grad_norm": 1.9401895010628936, |
| "learning_rate": 5.578378911338684e-06, |
| "loss": 1.1384, |
| "step": 50980 |
| }, |
| { |
| "epoch": 1947.7358490566037, |
| "grad_norm": 2.1643993698581077, |
| "learning_rate": 5.570763058110911e-06, |
| "loss": 1.1645, |
| "step": 51000 |
| }, |
| { |
| "epoch": 1948.4905660377358, |
| "grad_norm": 2.180473463448981, |
| "learning_rate": 5.563197294432395e-06, |
| "loss": 1.1382, |
| "step": 51020 |
| }, |
| { |
| "epoch": 1949.245283018868, |
| "grad_norm": 1.9596617879790443, |
| "learning_rate": 5.555681630565088e-06, |
| "loss": 1.1539, |
| "step": 51040 |
| }, |
| { |
| "epoch": 1950.0, |
| "grad_norm": 1.8975845056567062, |
| "learning_rate": 5.548216076702974e-06, |
| "loss": 1.144, |
| "step": 51060 |
| }, |
| { |
| "epoch": 1950.754716981132, |
| "grad_norm": 2.188450696803476, |
| "learning_rate": 5.540800642972071e-06, |
| "loss": 1.1532, |
| "step": 51080 |
| }, |
| { |
| "epoch": 1951.5094339622642, |
| "grad_norm": 1.8285975742024299, |
| "learning_rate": 5.533435339430416e-06, |
| "loss": 1.1949, |
| "step": 51100 |
| }, |
| { |
| "epoch": 1952.2641509433963, |
| "grad_norm": 1.812418268110745, |
| "learning_rate": 5.526120176068055e-06, |
| "loss": 1.1613, |
| "step": 51120 |
| }, |
| { |
| "epoch": 1953.0188679245282, |
| "grad_norm": 1.9694834187837782, |
| "learning_rate": 5.518855162807036e-06, |
| "loss": 1.1749, |
| "step": 51140 |
| }, |
| { |
| "epoch": 1953.7735849056603, |
| "grad_norm": 2.014411336027095, |
| "learning_rate": 5.511640309501359e-06, |
| "loss": 1.1364, |
| "step": 51160 |
| }, |
| { |
| "epoch": 1954.5283018867924, |
| "grad_norm": 1.663001146626253, |
| "learning_rate": 5.504475625937011e-06, |
| "loss": 1.1469, |
| "step": 51180 |
| }, |
| { |
| "epoch": 1955.2830188679245, |
| "grad_norm": 3.7966652139269756, |
| "learning_rate": 5.497361121831918e-06, |
| "loss": 1.1634, |
| "step": 51200 |
| }, |
| { |
| "epoch": 1956.0377358490566, |
| "grad_norm": 1.6321676665862368, |
| "learning_rate": 5.490296806835955e-06, |
| "loss": 1.1747, |
| "step": 51220 |
| }, |
| { |
| "epoch": 1956.7924528301887, |
| "grad_norm": 1.8693551118320602, |
| "learning_rate": 5.483282690530914e-06, |
| "loss": 1.1513, |
| "step": 51240 |
| }, |
| { |
| "epoch": 1957.5471698113208, |
| "grad_norm": 1.7254591597688926, |
| "learning_rate": 5.476318782430499e-06, |
| "loss": 1.1384, |
| "step": 51260 |
| }, |
| { |
| "epoch": 1958.301886792453, |
| "grad_norm": 2.1059814541036284, |
| "learning_rate": 5.469405091980319e-06, |
| "loss": 1.145, |
| "step": 51280 |
| }, |
| { |
| "epoch": 1959.0566037735848, |
| "grad_norm": 2.4150150564267956, |
| "learning_rate": 5.462541628557862e-06, |
| "loss": 1.1727, |
| "step": 51300 |
| }, |
| { |
| "epoch": 1959.811320754717, |
| "grad_norm": 2.072986751089322, |
| "learning_rate": 5.4557284014725005e-06, |
| "loss": 1.1632, |
| "step": 51320 |
| }, |
| { |
| "epoch": 1960.566037735849, |
| "grad_norm": 1.7011080715428424, |
| "learning_rate": 5.448965419965458e-06, |
| "loss": 1.1719, |
| "step": 51340 |
| }, |
| { |
| "epoch": 1961.3207547169811, |
| "grad_norm": 2.050321684694806, |
| "learning_rate": 5.442252693209813e-06, |
| "loss": 1.1523, |
| "step": 51360 |
| }, |
| { |
| "epoch": 1962.0754716981132, |
| "grad_norm": 2.3154046947609603, |
| "learning_rate": 5.4355902303104744e-06, |
| "loss": 1.1365, |
| "step": 51380 |
| }, |
| { |
| "epoch": 1962.8301886792453, |
| "grad_norm": 2.292815295745735, |
| "learning_rate": 5.4289780403041805e-06, |
| "loss": 1.1595, |
| "step": 51400 |
| }, |
| { |
| "epoch": 1963.5849056603774, |
| "grad_norm": 2.1444563447901253, |
| "learning_rate": 5.422416132159477e-06, |
| "loss": 1.1609, |
| "step": 51420 |
| }, |
| { |
| "epoch": 1964.3396226415093, |
| "grad_norm": 1.8223405112306774, |
| "learning_rate": 5.415904514776712e-06, |
| "loss": 1.128, |
| "step": 51440 |
| }, |
| { |
| "epoch": 1965.0943396226414, |
| "grad_norm": 1.9601698616488796, |
| "learning_rate": 5.40944319698802e-06, |
| "loss": 1.1785, |
| "step": 51460 |
| }, |
| { |
| "epoch": 1965.8490566037735, |
| "grad_norm": 2.570580210466246, |
| "learning_rate": 5.403032187557308e-06, |
| "loss": 1.147, |
| "step": 51480 |
| }, |
| { |
| "epoch": 1966.6037735849056, |
| "grad_norm": 2.2935471508100553, |
| "learning_rate": 5.396671495180257e-06, |
| "loss": 1.1777, |
| "step": 51500 |
| }, |
| { |
| "epoch": 1967.3584905660377, |
| "grad_norm": 1.987678318177208, |
| "learning_rate": 5.390361128484278e-06, |
| "loss": 1.1283, |
| "step": 51520 |
| }, |
| { |
| "epoch": 1968.1132075471698, |
| "grad_norm": 1.9732671384472393, |
| "learning_rate": 5.38410109602855e-06, |
| "loss": 1.1631, |
| "step": 51540 |
| }, |
| { |
| "epoch": 1968.867924528302, |
| "grad_norm": 2.031999390800106, |
| "learning_rate": 5.37789140630396e-06, |
| "loss": 1.1498, |
| "step": 51560 |
| }, |
| { |
| "epoch": 1969.622641509434, |
| "grad_norm": 3.612362619019224, |
| "learning_rate": 5.3717320677331165e-06, |
| "loss": 1.1449, |
| "step": 51580 |
| }, |
| { |
| "epoch": 1970.377358490566, |
| "grad_norm": 2.7552728484649216, |
| "learning_rate": 5.365623088670337e-06, |
| "loss": 1.1221, |
| "step": 51600 |
| }, |
| { |
| "epoch": 1971.132075471698, |
| "grad_norm": 2.095240315052155, |
| "learning_rate": 5.359564477401625e-06, |
| "loss": 1.1635, |
| "step": 51620 |
| }, |
| { |
| "epoch": 1971.8867924528302, |
| "grad_norm": 2.051375314186468, |
| "learning_rate": 5.353556242144684e-06, |
| "loss": 1.1768, |
| "step": 51640 |
| }, |
| { |
| "epoch": 1972.6415094339623, |
| "grad_norm": 1.7681282740339075, |
| "learning_rate": 5.3475983910488705e-06, |
| "loss": 1.1524, |
| "step": 51660 |
| }, |
| { |
| "epoch": 1973.3962264150944, |
| "grad_norm": 1.9784394422194775, |
| "learning_rate": 5.34169093219521e-06, |
| "loss": 1.1694, |
| "step": 51680 |
| }, |
| { |
| "epoch": 1974.1509433962265, |
| "grad_norm": 2.1190098778323887, |
| "learning_rate": 5.3358338735963825e-06, |
| "loss": 1.1546, |
| "step": 51700 |
| }, |
| { |
| "epoch": 1974.9056603773586, |
| "grad_norm": 1.6461495183245571, |
| "learning_rate": 5.3300272231966895e-06, |
| "loss": 1.1597, |
| "step": 51720 |
| }, |
| { |
| "epoch": 1975.6603773584907, |
| "grad_norm": 2.287937258261333, |
| "learning_rate": 5.3242709888720875e-06, |
| "loss": 1.1565, |
| "step": 51740 |
| }, |
| { |
| "epoch": 1976.4150943396226, |
| "grad_norm": 1.971738312330891, |
| "learning_rate": 5.318565178430121e-06, |
| "loss": 1.1646, |
| "step": 51760 |
| }, |
| { |
| "epoch": 1977.1698113207547, |
| "grad_norm": 1.733242389596805, |
| "learning_rate": 5.312909799609962e-06, |
| "loss": 1.1507, |
| "step": 51780 |
| }, |
| { |
| "epoch": 1977.9245283018868, |
| "grad_norm": 2.2381830913006486, |
| "learning_rate": 5.307304860082375e-06, |
| "loss": 1.161, |
| "step": 51800 |
| }, |
| { |
| "epoch": 1978.6792452830189, |
| "grad_norm": 1.7639744175828544, |
| "learning_rate": 5.3017503674497e-06, |
| "loss": 1.1639, |
| "step": 51820 |
| }, |
| { |
| "epoch": 1979.433962264151, |
| "grad_norm": 2.142096606558369, |
| "learning_rate": 5.296246329245867e-06, |
| "loss": 1.145, |
| "step": 51840 |
| }, |
| { |
| "epoch": 1980.188679245283, |
| "grad_norm": 2.1364940736571905, |
| "learning_rate": 5.29079275293636e-06, |
| "loss": 1.1445, |
| "step": 51860 |
| }, |
| { |
| "epoch": 1980.9433962264152, |
| "grad_norm": 2.0408860512130063, |
| "learning_rate": 5.285389645918224e-06, |
| "loss": 1.1684, |
| "step": 51880 |
| }, |
| { |
| "epoch": 1981.698113207547, |
| "grad_norm": 2.1484279394512984, |
| "learning_rate": 5.280037015520047e-06, |
| "loss": 1.1427, |
| "step": 51900 |
| }, |
| { |
| "epoch": 1982.4528301886792, |
| "grad_norm": 1.8875817727112376, |
| "learning_rate": 5.27473486900196e-06, |
| "loss": 1.127, |
| "step": 51920 |
| }, |
| { |
| "epoch": 1983.2075471698113, |
| "grad_norm": 1.9694696435513541, |
| "learning_rate": 5.269483213555604e-06, |
| "loss": 1.1631, |
| "step": 51940 |
| }, |
| { |
| "epoch": 1983.9622641509434, |
| "grad_norm": 1.8852852930999937, |
| "learning_rate": 5.264282056304144e-06, |
| "loss": 1.1476, |
| "step": 51960 |
| }, |
| { |
| "epoch": 1984.7169811320755, |
| "grad_norm": 2.0442189239889488, |
| "learning_rate": 5.259131404302259e-06, |
| "loss": 1.1772, |
| "step": 51980 |
| }, |
| { |
| "epoch": 1985.4716981132076, |
| "grad_norm": 2.115147564108749, |
| "learning_rate": 5.254031264536109e-06, |
| "loss": 1.1451, |
| "step": 52000 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 54000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2077, |
| "save_steps": 4000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8771778183168000.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|