UD-VLA_LIBERO / trainer_state.json
chenpyyy's picture
Upload folder using huggingface_hub
85916c4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1985.4716981132076,
"eval_steps": 500,
"global_step": 52000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7547169811320755,
"grad_norm": 30.673773492701116,
"learning_rate": 3.2000000000000005e-05,
"loss": 9.2984,
"step": 20
},
{
"epoch": 1.509433962264151,
"grad_norm": 14.373295694588162,
"learning_rate": 6.400000000000001e-05,
"loss": 7.8844,
"step": 40
},
{
"epoch": 2.2641509433962264,
"grad_norm": 19.800138134391982,
"learning_rate": 7.99999818716091e-05,
"loss": 6.0394,
"step": 60
},
{
"epoch": 3.018867924528302,
"grad_norm": 2.662186456025038,
"learning_rate": 7.9999836844587e-05,
"loss": 4.373,
"step": 80
},
{
"epoch": 3.7735849056603774,
"grad_norm": 2.9944135324779984,
"learning_rate": 7.999954679110365e-05,
"loss": 3.93,
"step": 100
},
{
"epoch": 4.528301886792453,
"grad_norm": 6.144462097933044,
"learning_rate": 7.999911171228081e-05,
"loss": 3.8796,
"step": 120
},
{
"epoch": 5.283018867924528,
"grad_norm": 1.885170861907592,
"learning_rate": 7.999853160980113e-05,
"loss": 3.7437,
"step": 140
},
{
"epoch": 6.037735849056604,
"grad_norm": 1.794246585785847,
"learning_rate": 7.999780648590806e-05,
"loss": 3.6892,
"step": 160
},
{
"epoch": 6.7924528301886795,
"grad_norm": 12.766182271547313,
"learning_rate": 7.999693634340594e-05,
"loss": 3.6124,
"step": 180
},
{
"epoch": 7.547169811320755,
"grad_norm": 3.014805105995273,
"learning_rate": 7.999592118565996e-05,
"loss": 3.4224,
"step": 200
},
{
"epoch": 8.30188679245283,
"grad_norm": 1.6809726247876995,
"learning_rate": 7.999476101659613e-05,
"loss": 3.2811,
"step": 220
},
{
"epoch": 9.056603773584905,
"grad_norm": 1.7074366581291722,
"learning_rate": 7.999345584070125e-05,
"loss": 3.2012,
"step": 240
},
{
"epoch": 9.81132075471698,
"grad_norm": 1.6971425854714368,
"learning_rate": 7.999200566302298e-05,
"loss": 3.1281,
"step": 260
},
{
"epoch": 10.566037735849056,
"grad_norm": 2.2456748845663155,
"learning_rate": 7.999041048916969e-05,
"loss": 3.1133,
"step": 280
},
{
"epoch": 11.320754716981131,
"grad_norm": 2.301731796932263,
"learning_rate": 7.998867032531055e-05,
"loss": 3.0835,
"step": 300
},
{
"epoch": 12.075471698113208,
"grad_norm": 1.9076791695070954,
"learning_rate": 7.998678517817546e-05,
"loss": 3.0189,
"step": 320
},
{
"epoch": 12.830188679245284,
"grad_norm": 1.8142406149549974,
"learning_rate": 7.9984755055055e-05,
"loss": 3.0019,
"step": 340
},
{
"epoch": 13.584905660377359,
"grad_norm": 1.747017241682793,
"learning_rate": 7.998257996380048e-05,
"loss": 2.9866,
"step": 360
},
{
"epoch": 14.339622641509434,
"grad_norm": 1.430663205424301,
"learning_rate": 7.998025991282377e-05,
"loss": 3.0026,
"step": 380
},
{
"epoch": 15.09433962264151,
"grad_norm": 2.0885624099422566,
"learning_rate": 7.997779491109745e-05,
"loss": 2.946,
"step": 400
},
{
"epoch": 15.849056603773585,
"grad_norm": 1.858513295415329,
"learning_rate": 7.997518496815465e-05,
"loss": 2.9293,
"step": 420
},
{
"epoch": 16.60377358490566,
"grad_norm": 4.071990410305379,
"learning_rate": 7.9972430094089e-05,
"loss": 2.8812,
"step": 440
},
{
"epoch": 17.358490566037737,
"grad_norm": 1.7862953717408494,
"learning_rate": 7.996953029955468e-05,
"loss": 2.9285,
"step": 460
},
{
"epoch": 18.11320754716981,
"grad_norm": 1.6791350922351642,
"learning_rate": 7.996648559576633e-05,
"loss": 2.9062,
"step": 480
},
{
"epoch": 18.867924528301888,
"grad_norm": 2.14320095701687,
"learning_rate": 7.996329599449902e-05,
"loss": 2.8974,
"step": 500
},
{
"epoch": 19.62264150943396,
"grad_norm": 1.645724273362961,
"learning_rate": 7.995996150808815e-05,
"loss": 2.842,
"step": 520
},
{
"epoch": 20.37735849056604,
"grad_norm": 1.6833734047290758,
"learning_rate": 7.99564821494295e-05,
"loss": 2.8603,
"step": 540
},
{
"epoch": 21.132075471698112,
"grad_norm": 1.680341673033382,
"learning_rate": 7.995285793197909e-05,
"loss": 2.8275,
"step": 560
},
{
"epoch": 21.88679245283019,
"grad_norm": 1.5669198141799856,
"learning_rate": 7.994908886975317e-05,
"loss": 2.8495,
"step": 580
},
{
"epoch": 22.641509433962263,
"grad_norm": 1.572130529899407,
"learning_rate": 7.99451749773282e-05,
"loss": 2.7967,
"step": 600
},
{
"epoch": 23.39622641509434,
"grad_norm": 2.0314221097665413,
"learning_rate": 7.994111626984069e-05,
"loss": 2.8023,
"step": 620
},
{
"epoch": 24.150943396226417,
"grad_norm": 2.524880591484311,
"learning_rate": 7.993691276298728e-05,
"loss": 2.7928,
"step": 640
},
{
"epoch": 24.90566037735849,
"grad_norm": 3.1215250835282884,
"learning_rate": 7.993256447302454e-05,
"loss": 2.7998,
"step": 660
},
{
"epoch": 25.660377358490567,
"grad_norm": 1.2555800634223058,
"learning_rate": 7.9928071416769e-05,
"loss": 2.7862,
"step": 680
},
{
"epoch": 26.41509433962264,
"grad_norm": 1.5918196349327507,
"learning_rate": 7.992343361159705e-05,
"loss": 2.7825,
"step": 700
},
{
"epoch": 27.169811320754718,
"grad_norm": 1.4446288187541776,
"learning_rate": 7.991865107544492e-05,
"loss": 2.7566,
"step": 720
},
{
"epoch": 27.92452830188679,
"grad_norm": 1.5498155400201465,
"learning_rate": 7.991372382680851e-05,
"loss": 2.7341,
"step": 740
},
{
"epoch": 28.67924528301887,
"grad_norm": 1.9707678993278974,
"learning_rate": 7.99086518847434e-05,
"loss": 2.7315,
"step": 760
},
{
"epoch": 29.433962264150942,
"grad_norm": 1.663827192167774,
"learning_rate": 7.99034352688648e-05,
"loss": 2.733,
"step": 780
},
{
"epoch": 30.18867924528302,
"grad_norm": 1.9883280378959602,
"learning_rate": 7.989807399934738e-05,
"loss": 2.7323,
"step": 800
},
{
"epoch": 30.943396226415093,
"grad_norm": 1.535721464256961,
"learning_rate": 7.989256809692524e-05,
"loss": 2.7081,
"step": 820
},
{
"epoch": 31.69811320754717,
"grad_norm": 1.4966431407465013,
"learning_rate": 7.988691758289184e-05,
"loss": 2.694,
"step": 840
},
{
"epoch": 32.45283018867924,
"grad_norm": 1.9684497666022975,
"learning_rate": 7.988112247909996e-05,
"loss": 2.6947,
"step": 860
},
{
"epoch": 33.20754716981132,
"grad_norm": 1.2512316865338262,
"learning_rate": 7.987518280796148e-05,
"loss": 2.7216,
"step": 880
},
{
"epoch": 33.9622641509434,
"grad_norm": 1.2566119692549285,
"learning_rate": 7.986909859244743e-05,
"loss": 2.6675,
"step": 900
},
{
"epoch": 34.716981132075475,
"grad_norm": 1.8282134080395354,
"learning_rate": 7.986286985608782e-05,
"loss": 2.6712,
"step": 920
},
{
"epoch": 35.471698113207545,
"grad_norm": 1.3075929724419728,
"learning_rate": 7.985649662297164e-05,
"loss": 2.668,
"step": 940
},
{
"epoch": 36.22641509433962,
"grad_norm": 1.4411328906456615,
"learning_rate": 7.984997891774664e-05,
"loss": 2.6937,
"step": 960
},
{
"epoch": 36.9811320754717,
"grad_norm": 1.8430700250981429,
"learning_rate": 7.984331676561932e-05,
"loss": 2.6798,
"step": 980
},
{
"epoch": 37.735849056603776,
"grad_norm": 1.4511454692578831,
"learning_rate": 7.983651019235483e-05,
"loss": 2.6637,
"step": 1000
},
{
"epoch": 38.490566037735846,
"grad_norm": 1.502704294446435,
"learning_rate": 7.982955922427681e-05,
"loss": 2.6688,
"step": 1020
},
{
"epoch": 39.24528301886792,
"grad_norm": 1.4028762887194124,
"learning_rate": 7.982246388826741e-05,
"loss": 2.6086,
"step": 1040
},
{
"epoch": 40.0,
"grad_norm": 1.539721758237447,
"learning_rate": 7.981522421176697e-05,
"loss": 2.6084,
"step": 1060
},
{
"epoch": 40.75471698113208,
"grad_norm": 1.77576635302677,
"learning_rate": 7.980784022277421e-05,
"loss": 2.6216,
"step": 1080
},
{
"epoch": 41.509433962264154,
"grad_norm": 1.4874495954369062,
"learning_rate": 7.980031194984588e-05,
"loss": 2.6328,
"step": 1100
},
{
"epoch": 42.264150943396224,
"grad_norm": 1.8846242927156294,
"learning_rate": 7.979263942209669e-05,
"loss": 2.6427,
"step": 1120
},
{
"epoch": 43.0188679245283,
"grad_norm": 1.6981877996408483,
"learning_rate": 7.978482266919936e-05,
"loss": 2.6224,
"step": 1140
},
{
"epoch": 43.77358490566038,
"grad_norm": 1.3746555989630926,
"learning_rate": 7.977686172138426e-05,
"loss": 2.6011,
"step": 1160
},
{
"epoch": 44.528301886792455,
"grad_norm": 1.377657678804025,
"learning_rate": 7.97687566094395e-05,
"loss": 2.6086,
"step": 1180
},
{
"epoch": 45.283018867924525,
"grad_norm": 1.2094636718352942,
"learning_rate": 7.976050736471069e-05,
"loss": 2.582,
"step": 1200
},
{
"epoch": 46.0377358490566,
"grad_norm": 1.4433837187551148,
"learning_rate": 7.975211401910087e-05,
"loss": 2.6294,
"step": 1220
},
{
"epoch": 46.79245283018868,
"grad_norm": 1.5026382784404573,
"learning_rate": 7.97435766050704e-05,
"loss": 2.5993,
"step": 1240
},
{
"epoch": 47.54716981132076,
"grad_norm": 1.2094136471599368,
"learning_rate": 7.973489515563676e-05,
"loss": 2.6164,
"step": 1260
},
{
"epoch": 48.301886792452834,
"grad_norm": 1.394688364908413,
"learning_rate": 7.972606970437446e-05,
"loss": 2.6056,
"step": 1280
},
{
"epoch": 49.056603773584904,
"grad_norm": 1.271568801692499,
"learning_rate": 7.971710028541502e-05,
"loss": 2.5755,
"step": 1300
},
{
"epoch": 49.81132075471698,
"grad_norm": 1.4259670316825253,
"learning_rate": 7.970798693344663e-05,
"loss": 2.5759,
"step": 1320
},
{
"epoch": 50.56603773584906,
"grad_norm": 1.3071538390073274,
"learning_rate": 7.969872968371418e-05,
"loss": 2.6031,
"step": 1340
},
{
"epoch": 51.320754716981135,
"grad_norm": 1.2595773412735998,
"learning_rate": 7.968932857201907e-05,
"loss": 2.5711,
"step": 1360
},
{
"epoch": 52.075471698113205,
"grad_norm": 2.0429570729259714,
"learning_rate": 7.967978363471901e-05,
"loss": 2.5662,
"step": 1380
},
{
"epoch": 52.83018867924528,
"grad_norm": 1.6700659590709221,
"learning_rate": 7.967009490872805e-05,
"loss": 2.5618,
"step": 1400
},
{
"epoch": 53.58490566037736,
"grad_norm": 1.33856858087749,
"learning_rate": 7.966026243151624e-05,
"loss": 2.5351,
"step": 1420
},
{
"epoch": 54.339622641509436,
"grad_norm": 1.514257636366029,
"learning_rate": 7.965028624110956e-05,
"loss": 2.5686,
"step": 1440
},
{
"epoch": 55.094339622641506,
"grad_norm": 1.5655072381428023,
"learning_rate": 7.964016637608987e-05,
"loss": 2.5329,
"step": 1460
},
{
"epoch": 55.84905660377358,
"grad_norm": 1.3215439672221574,
"learning_rate": 7.96299028755946e-05,
"loss": 2.5701,
"step": 1480
},
{
"epoch": 56.60377358490566,
"grad_norm": 1.4294366995579832,
"learning_rate": 7.961949577931671e-05,
"loss": 2.5143,
"step": 1500
},
{
"epoch": 57.35849056603774,
"grad_norm": 1.1676370114885968,
"learning_rate": 7.960894512750449e-05,
"loss": 2.5653,
"step": 1520
},
{
"epoch": 58.113207547169814,
"grad_norm": 1.2026735621707902,
"learning_rate": 7.95982509609614e-05,
"loss": 2.5161,
"step": 1540
},
{
"epoch": 58.867924528301884,
"grad_norm": 1.2479764772455937,
"learning_rate": 7.958741332104596e-05,
"loss": 2.508,
"step": 1560
},
{
"epoch": 59.62264150943396,
"grad_norm": 1.1961953679380617,
"learning_rate": 7.957643224967155e-05,
"loss": 2.5009,
"step": 1580
},
{
"epoch": 60.37735849056604,
"grad_norm": 1.2497242021619674,
"learning_rate": 7.956530778930622e-05,
"loss": 2.5059,
"step": 1600
},
{
"epoch": 61.132075471698116,
"grad_norm": 1.3171737588939698,
"learning_rate": 7.955403998297261e-05,
"loss": 2.4988,
"step": 1620
},
{
"epoch": 61.886792452830186,
"grad_norm": 1.4834839050766762,
"learning_rate": 7.95426288742477e-05,
"loss": 2.4981,
"step": 1640
},
{
"epoch": 62.64150943396226,
"grad_norm": 1.5715059944491987,
"learning_rate": 7.953107450726267e-05,
"loss": 2.5151,
"step": 1660
},
{
"epoch": 63.39622641509434,
"grad_norm": 1.3272413313721245,
"learning_rate": 7.95193769267028e-05,
"loss": 2.4963,
"step": 1680
},
{
"epoch": 64.15094339622641,
"grad_norm": 1.0349543461606097,
"learning_rate": 7.950753617780715e-05,
"loss": 2.4829,
"step": 1700
},
{
"epoch": 64.90566037735849,
"grad_norm": 1.9240255477140202,
"learning_rate": 7.949555230636851e-05,
"loss": 2.4943,
"step": 1720
},
{
"epoch": 65.66037735849056,
"grad_norm": 1.3252737004710828,
"learning_rate": 7.948342535873318e-05,
"loss": 2.4642,
"step": 1740
},
{
"epoch": 66.41509433962264,
"grad_norm": 1.4539890356994254,
"learning_rate": 7.947115538180077e-05,
"loss": 2.4609,
"step": 1760
},
{
"epoch": 67.16981132075472,
"grad_norm": 1.4560503030341407,
"learning_rate": 7.945874242302408e-05,
"loss": 2.5209,
"step": 1780
},
{
"epoch": 67.9245283018868,
"grad_norm": 1.344513424004851,
"learning_rate": 7.944618653040883e-05,
"loss": 2.4993,
"step": 1800
},
{
"epoch": 68.67924528301887,
"grad_norm": 1.8832922124286062,
"learning_rate": 7.943348775251356e-05,
"loss": 2.4646,
"step": 1820
},
{
"epoch": 69.43396226415095,
"grad_norm": 1.2399208576659413,
"learning_rate": 7.942064613844938e-05,
"loss": 2.4849,
"step": 1840
},
{
"epoch": 70.18867924528301,
"grad_norm": 1.3243651575141913,
"learning_rate": 7.940766173787979e-05,
"loss": 2.4599,
"step": 1860
},
{
"epoch": 70.94339622641509,
"grad_norm": 1.556289335581103,
"learning_rate": 7.939453460102055e-05,
"loss": 2.4888,
"step": 1880
},
{
"epoch": 71.69811320754717,
"grad_norm": 1.2063108835158236,
"learning_rate": 7.93812647786394e-05,
"loss": 2.4403,
"step": 1900
},
{
"epoch": 72.45283018867924,
"grad_norm": 1.381601728211994,
"learning_rate": 7.936785232205587e-05,
"loss": 2.4616,
"step": 1920
},
{
"epoch": 73.20754716981132,
"grad_norm": 1.1962254019464496,
"learning_rate": 7.935429728314119e-05,
"loss": 2.4594,
"step": 1940
},
{
"epoch": 73.9622641509434,
"grad_norm": 1.5037943758052086,
"learning_rate": 7.934059971431796e-05,
"loss": 2.4767,
"step": 1960
},
{
"epoch": 74.71698113207547,
"grad_norm": 1.3520028552568202,
"learning_rate": 7.932675966856001e-05,
"loss": 2.4627,
"step": 1980
},
{
"epoch": 75.47169811320755,
"grad_norm": 1.1447719169505226,
"learning_rate": 7.931277719939217e-05,
"loss": 2.4434,
"step": 2000
},
{
"epoch": 76.22641509433963,
"grad_norm": 1.264362441844072,
"learning_rate": 7.92986523608901e-05,
"loss": 2.4532,
"step": 2020
},
{
"epoch": 76.98113207547169,
"grad_norm": 1.3287557477036405,
"learning_rate": 7.928438520768005e-05,
"loss": 2.4227,
"step": 2040
},
{
"epoch": 77.73584905660377,
"grad_norm": 1.3638216226843092,
"learning_rate": 7.926997579493864e-05,
"loss": 2.4124,
"step": 2060
},
{
"epoch": 78.49056603773585,
"grad_norm": 1.8271009676844974,
"learning_rate": 7.925542417839267e-05,
"loss": 2.4564,
"step": 2080
},
{
"epoch": 79.24528301886792,
"grad_norm": 3.54259081233157,
"learning_rate": 7.924073041431895e-05,
"loss": 2.4369,
"step": 2100
},
{
"epoch": 80.0,
"grad_norm": 1.4017324354742142,
"learning_rate": 7.922589455954394e-05,
"loss": 2.4464,
"step": 2120
},
{
"epoch": 80.75471698113208,
"grad_norm": 1.2911982587049995,
"learning_rate": 7.921091667144366e-05,
"loss": 2.4513,
"step": 2140
},
{
"epoch": 81.50943396226415,
"grad_norm": 1.6022339076405718,
"learning_rate": 7.919579680794347e-05,
"loss": 2.4203,
"step": 2160
},
{
"epoch": 82.26415094339623,
"grad_norm": 1.192345448314673,
"learning_rate": 7.918053502751772e-05,
"loss": 2.4254,
"step": 2180
},
{
"epoch": 83.01886792452831,
"grad_norm": 1.6383611170040047,
"learning_rate": 7.916513138918968e-05,
"loss": 2.4271,
"step": 2200
},
{
"epoch": 83.77358490566037,
"grad_norm": 1.2342470221196802,
"learning_rate": 7.91495859525312e-05,
"loss": 2.4079,
"step": 2220
},
{
"epoch": 84.52830188679245,
"grad_norm": 1.0846993450602334,
"learning_rate": 7.913389877766257e-05,
"loss": 2.4383,
"step": 2240
},
{
"epoch": 85.28301886792453,
"grad_norm": 1.6823249556637492,
"learning_rate": 7.911806992525215e-05,
"loss": 2.4146,
"step": 2260
},
{
"epoch": 86.0377358490566,
"grad_norm": 1.1641636008270617,
"learning_rate": 7.91020994565163e-05,
"loss": 2.4208,
"step": 2280
},
{
"epoch": 86.79245283018868,
"grad_norm": 1.2267450186018727,
"learning_rate": 7.9085987433219e-05,
"loss": 2.4123,
"step": 2300
},
{
"epoch": 87.54716981132076,
"grad_norm": 1.3570826999644423,
"learning_rate": 7.906973391767178e-05,
"loss": 2.3968,
"step": 2320
},
{
"epoch": 88.30188679245283,
"grad_norm": 1.4751948166402733,
"learning_rate": 7.905333897273327e-05,
"loss": 2.4266,
"step": 2340
},
{
"epoch": 89.05660377358491,
"grad_norm": 1.6442713319159463,
"learning_rate": 7.903680266180908e-05,
"loss": 2.4226,
"step": 2360
},
{
"epoch": 89.81132075471699,
"grad_norm": 1.3132724406404779,
"learning_rate": 7.90201250488516e-05,
"loss": 2.419,
"step": 2380
},
{
"epoch": 90.56603773584905,
"grad_norm": 1.4073019579145547,
"learning_rate": 7.900330619835963e-05,
"loss": 2.3689,
"step": 2400
},
{
"epoch": 91.32075471698113,
"grad_norm": 1.2366514839120522,
"learning_rate": 7.89863461753782e-05,
"loss": 2.4054,
"step": 2420
},
{
"epoch": 92.0754716981132,
"grad_norm": 1.2825349652701765,
"learning_rate": 7.896924504549836e-05,
"loss": 2.4019,
"step": 2440
},
{
"epoch": 92.83018867924528,
"grad_norm": 1.836162542809911,
"learning_rate": 7.895200287485676e-05,
"loss": 2.4177,
"step": 2460
},
{
"epoch": 93.58490566037736,
"grad_norm": 1.1862449779023223,
"learning_rate": 7.893461973013567e-05,
"loss": 2.417,
"step": 2480
},
{
"epoch": 94.33962264150944,
"grad_norm": 1.4267902121087415,
"learning_rate": 7.891709567856242e-05,
"loss": 2.3877,
"step": 2500
},
{
"epoch": 95.09433962264151,
"grad_norm": 1.2628527153576017,
"learning_rate": 7.889943078790934e-05,
"loss": 2.3893,
"step": 2520
},
{
"epoch": 95.84905660377359,
"grad_norm": 1.2789710243072507,
"learning_rate": 7.888162512649344e-05,
"loss": 2.3747,
"step": 2540
},
{
"epoch": 96.60377358490567,
"grad_norm": 1.2286761119774143,
"learning_rate": 7.886367876317615e-05,
"loss": 2.3835,
"step": 2560
},
{
"epoch": 97.35849056603773,
"grad_norm": 1.1142509789518844,
"learning_rate": 7.884559176736305e-05,
"loss": 2.3751,
"step": 2580
},
{
"epoch": 98.11320754716981,
"grad_norm": 1.4479112681435136,
"learning_rate": 7.882736420900357e-05,
"loss": 2.3885,
"step": 2600
},
{
"epoch": 98.86792452830188,
"grad_norm": 1.363147415477506,
"learning_rate": 7.880899615859078e-05,
"loss": 2.3738,
"step": 2620
},
{
"epoch": 99.62264150943396,
"grad_norm": 1.1387365076919822,
"learning_rate": 7.879048768716105e-05,
"loss": 2.3476,
"step": 2640
},
{
"epoch": 100.37735849056604,
"grad_norm": 1.1944352338174065,
"learning_rate": 7.87718388662939e-05,
"loss": 2.3729,
"step": 2660
},
{
"epoch": 101.13207547169812,
"grad_norm": 1.1017143695500988,
"learning_rate": 7.875304976811153e-05,
"loss": 2.3846,
"step": 2680
},
{
"epoch": 101.88679245283019,
"grad_norm": 1.250014546065029,
"learning_rate": 7.873412046527873e-05,
"loss": 2.3928,
"step": 2700
},
{
"epoch": 102.64150943396227,
"grad_norm": 1.4448571670529484,
"learning_rate": 7.871505103100243e-05,
"loss": 2.3464,
"step": 2720
},
{
"epoch": 103.39622641509433,
"grad_norm": 1.1242909760207218,
"learning_rate": 7.869584153903159e-05,
"loss": 2.3739,
"step": 2740
},
{
"epoch": 104.15094339622641,
"grad_norm": 2.2842982833142176,
"learning_rate": 7.86764920636568e-05,
"loss": 2.348,
"step": 2760
},
{
"epoch": 104.90566037735849,
"grad_norm": 1.377894286349549,
"learning_rate": 7.865700267970997e-05,
"loss": 2.3888,
"step": 2780
},
{
"epoch": 105.66037735849056,
"grad_norm": 1.889252338819464,
"learning_rate": 7.863737346256416e-05,
"loss": 2.339,
"step": 2800
},
{
"epoch": 106.41509433962264,
"grad_norm": 1.2007024366101338,
"learning_rate": 7.861760448813318e-05,
"loss": 2.3518,
"step": 2820
},
{
"epoch": 107.16981132075472,
"grad_norm": 1.3150471864332571,
"learning_rate": 7.859769583287136e-05,
"loss": 2.3755,
"step": 2840
},
{
"epoch": 107.9245283018868,
"grad_norm": 1.3488307619297817,
"learning_rate": 7.857764757377321e-05,
"loss": 2.3613,
"step": 2860
},
{
"epoch": 108.67924528301887,
"grad_norm": 1.1271224750447038,
"learning_rate": 7.855745978837316e-05,
"loss": 2.3434,
"step": 2880
},
{
"epoch": 109.43396226415095,
"grad_norm": 1.2792788627087681,
"learning_rate": 7.85371325547452e-05,
"loss": 2.3475,
"step": 2900
},
{
"epoch": 110.18867924528301,
"grad_norm": 1.1278269502097389,
"learning_rate": 7.851666595150267e-05,
"loss": 2.3561,
"step": 2920
},
{
"epoch": 110.94339622641509,
"grad_norm": 1.2221588824212564,
"learning_rate": 7.849606005779789e-05,
"loss": 2.345,
"step": 2940
},
{
"epoch": 111.69811320754717,
"grad_norm": 1.2272636691471697,
"learning_rate": 7.84753149533219e-05,
"loss": 2.3491,
"step": 2960
},
{
"epoch": 112.45283018867924,
"grad_norm": 1.4379769660358386,
"learning_rate": 7.845443071830403e-05,
"loss": 2.3703,
"step": 2980
},
{
"epoch": 113.20754716981132,
"grad_norm": 1.1938598523408401,
"learning_rate": 7.843340743351179e-05,
"loss": 2.3514,
"step": 3000
},
{
"epoch": 113.9622641509434,
"grad_norm": 1.1633264713108291,
"learning_rate": 7.841224518025038e-05,
"loss": 2.3396,
"step": 3020
},
{
"epoch": 114.71698113207547,
"grad_norm": 1.1889386134705129,
"learning_rate": 7.839094404036246e-05,
"loss": 2.3654,
"step": 3040
},
{
"epoch": 115.47169811320755,
"grad_norm": 1.2210304404269434,
"learning_rate": 7.836950409622788e-05,
"loss": 2.3827,
"step": 3060
},
{
"epoch": 116.22641509433963,
"grad_norm": 1.2063342612399106,
"learning_rate": 7.834792543076318e-05,
"loss": 2.3316,
"step": 3080
},
{
"epoch": 116.98113207547169,
"grad_norm": 1.1263568091149723,
"learning_rate": 7.832620812742149e-05,
"loss": 2.3483,
"step": 3100
},
{
"epoch": 117.73584905660377,
"grad_norm": 1.1259514670897872,
"learning_rate": 7.830435227019208e-05,
"loss": 2.3125,
"step": 3120
},
{
"epoch": 118.49056603773585,
"grad_norm": 1.4031978763279247,
"learning_rate": 7.828235794360003e-05,
"loss": 2.3509,
"step": 3140
},
{
"epoch": 119.24528301886792,
"grad_norm": 1.1004874238643756,
"learning_rate": 7.826022523270598e-05,
"loss": 2.2975,
"step": 3160
},
{
"epoch": 120.0,
"grad_norm": 1.1440875702771847,
"learning_rate": 7.823795422310573e-05,
"loss": 2.3048,
"step": 3180
},
{
"epoch": 120.75471698113208,
"grad_norm": 1.254578833443374,
"learning_rate": 7.821554500092995e-05,
"loss": 2.3253,
"step": 3200
},
{
"epoch": 121.50943396226415,
"grad_norm": 1.3020705320626609,
"learning_rate": 7.819299765284377e-05,
"loss": 2.32,
"step": 3220
},
{
"epoch": 122.26415094339623,
"grad_norm": 1.144219025307704,
"learning_rate": 7.817031226604663e-05,
"loss": 2.3338,
"step": 3240
},
{
"epoch": 123.01886792452831,
"grad_norm": 1.4392091454771268,
"learning_rate": 7.814748892827171e-05,
"loss": 2.3081,
"step": 3260
},
{
"epoch": 123.77358490566037,
"grad_norm": 1.57104334995189,
"learning_rate": 7.812452772778576e-05,
"loss": 2.3044,
"step": 3280
},
{
"epoch": 124.52830188679245,
"grad_norm": 1.2140782445458616,
"learning_rate": 7.810142875338864e-05,
"loss": 2.3162,
"step": 3300
},
{
"epoch": 125.28301886792453,
"grad_norm": 1.1430293000699974,
"learning_rate": 7.807819209441311e-05,
"loss": 2.3349,
"step": 3320
},
{
"epoch": 126.0377358490566,
"grad_norm": 1.3717461598648188,
"learning_rate": 7.805481784072435e-05,
"loss": 2.3048,
"step": 3340
},
{
"epoch": 126.79245283018868,
"grad_norm": 1.413324243222593,
"learning_rate": 7.803130608271972e-05,
"loss": 2.2987,
"step": 3360
},
{
"epoch": 127.54716981132076,
"grad_norm": 1.4726228615781376,
"learning_rate": 7.80076569113283e-05,
"loss": 2.3164,
"step": 3380
},
{
"epoch": 128.30188679245282,
"grad_norm": 1.4760960932985028,
"learning_rate": 7.798387041801066e-05,
"loss": 2.3314,
"step": 3400
},
{
"epoch": 129.0566037735849,
"grad_norm": 1.429175780411594,
"learning_rate": 7.795994669475842e-05,
"loss": 2.2752,
"step": 3420
},
{
"epoch": 129.81132075471697,
"grad_norm": 1.1413240245586067,
"learning_rate": 7.793588583409394e-05,
"loss": 2.333,
"step": 3440
},
{
"epoch": 130.56603773584905,
"grad_norm": 1.298767089765165,
"learning_rate": 7.791168792906992e-05,
"loss": 2.3227,
"step": 3460
},
{
"epoch": 131.32075471698113,
"grad_norm": 1.2359046339523858,
"learning_rate": 7.788735307326908e-05,
"loss": 2.3108,
"step": 3480
},
{
"epoch": 132.0754716981132,
"grad_norm": 1.1866095738297588,
"learning_rate": 7.786288136080376e-05,
"loss": 2.274,
"step": 3500
},
{
"epoch": 132.83018867924528,
"grad_norm": 1.1104279322428132,
"learning_rate": 7.78382728863156e-05,
"loss": 2.2888,
"step": 3520
},
{
"epoch": 133.58490566037736,
"grad_norm": 2.2680957450657537,
"learning_rate": 7.781352774497518e-05,
"loss": 2.2938,
"step": 3540
},
{
"epoch": 134.33962264150944,
"grad_norm": 1.6397138146409036,
"learning_rate": 7.778864603248155e-05,
"loss": 2.3068,
"step": 3560
},
{
"epoch": 135.0943396226415,
"grad_norm": 1.3552905083817133,
"learning_rate": 7.7763627845062e-05,
"loss": 2.3155,
"step": 3580
},
{
"epoch": 135.8490566037736,
"grad_norm": 1.1820209128101842,
"learning_rate": 7.773847327947157e-05,
"loss": 2.2937,
"step": 3600
},
{
"epoch": 136.60377358490567,
"grad_norm": 1.2309654111909685,
"learning_rate": 7.771318243299278e-05,
"loss": 2.2887,
"step": 3620
},
{
"epoch": 137.35849056603774,
"grad_norm": 1.3795089340342572,
"learning_rate": 7.768775540343515e-05,
"loss": 2.2961,
"step": 3640
},
{
"epoch": 138.11320754716982,
"grad_norm": 1.3662606951792517,
"learning_rate": 7.766219228913492e-05,
"loss": 2.288,
"step": 3660
},
{
"epoch": 138.8679245283019,
"grad_norm": 1.1081358756463113,
"learning_rate": 7.763649318895459e-05,
"loss": 2.3193,
"step": 3680
},
{
"epoch": 139.62264150943398,
"grad_norm": 1.3054965758516237,
"learning_rate": 7.761065820228258e-05,
"loss": 2.2904,
"step": 3700
},
{
"epoch": 140.37735849056602,
"grad_norm": 1.4052953203319152,
"learning_rate": 7.758468742903284e-05,
"loss": 2.2803,
"step": 3720
},
{
"epoch": 141.1320754716981,
"grad_norm": 1.310015833541638,
"learning_rate": 7.755858096964445e-05,
"loss": 2.2891,
"step": 3740
},
{
"epoch": 141.88679245283018,
"grad_norm": 1.0645192580358254,
"learning_rate": 7.753233892508125e-05,
"loss": 2.2982,
"step": 3760
},
{
"epoch": 142.64150943396226,
"grad_norm": 1.0898474528650213,
"learning_rate": 7.750596139683145e-05,
"loss": 2.2711,
"step": 3780
},
{
"epoch": 143.39622641509433,
"grad_norm": 1.2074165473918712,
"learning_rate": 7.747944848690719e-05,
"loss": 2.2592,
"step": 3800
},
{
"epoch": 144.1509433962264,
"grad_norm": 1.0959283058664937,
"learning_rate": 7.745280029784423e-05,
"loss": 2.2813,
"step": 3820
},
{
"epoch": 144.9056603773585,
"grad_norm": 1.2139556681199035,
"learning_rate": 7.742601693270148e-05,
"loss": 2.2564,
"step": 3840
},
{
"epoch": 145.66037735849056,
"grad_norm": 1.2374163658098694,
"learning_rate": 7.739909849506064e-05,
"loss": 2.2972,
"step": 3860
},
{
"epoch": 146.41509433962264,
"grad_norm": 1.212582172960113,
"learning_rate": 7.737204508902578e-05,
"loss": 2.2683,
"step": 3880
},
{
"epoch": 147.16981132075472,
"grad_norm": 1.064638273683967,
"learning_rate": 7.734485681922295e-05,
"loss": 2.2643,
"step": 3900
},
{
"epoch": 147.9245283018868,
"grad_norm": 1.5185500205423423,
"learning_rate": 7.731753379079976e-05,
"loss": 2.2825,
"step": 3920
},
{
"epoch": 148.67924528301887,
"grad_norm": 1.2571175031602655,
"learning_rate": 7.7290076109425e-05,
"loss": 2.2838,
"step": 3940
},
{
"epoch": 149.43396226415095,
"grad_norm": 1.1185096216789012,
"learning_rate": 7.726248388128821e-05,
"loss": 2.2713,
"step": 3960
},
{
"epoch": 150.18867924528303,
"grad_norm": 1.283741452573828,
"learning_rate": 7.723475721309926e-05,
"loss": 2.2578,
"step": 3980
},
{
"epoch": 150.9433962264151,
"grad_norm": 1.1735101055664479,
"learning_rate": 7.720689621208799e-05,
"loss": 2.2584,
"step": 4000
},
{
"epoch": 151.69811320754718,
"grad_norm": 1.1931741706657397,
"learning_rate": 7.717890098600371e-05,
"loss": 2.2439,
"step": 4020
},
{
"epoch": 152.45283018867926,
"grad_norm": 1.0510206287412838,
"learning_rate": 7.715077164311486e-05,
"loss": 2.2646,
"step": 4040
},
{
"epoch": 153.20754716981133,
"grad_norm": 1.2236996476850626,
"learning_rate": 7.712250829220856e-05,
"loss": 2.2518,
"step": 4060
},
{
"epoch": 153.96226415094338,
"grad_norm": 1.4295022161938338,
"learning_rate": 7.70941110425902e-05,
"loss": 2.2445,
"step": 4080
},
{
"epoch": 154.71698113207546,
"grad_norm": 1.2608108045607223,
"learning_rate": 7.706558000408294e-05,
"loss": 2.2504,
"step": 4100
},
{
"epoch": 155.47169811320754,
"grad_norm": 1.4378816608236173,
"learning_rate": 7.703691528702747e-05,
"loss": 2.2433,
"step": 4120
},
{
"epoch": 156.22641509433961,
"grad_norm": 1.3122607821127985,
"learning_rate": 7.700811700228138e-05,
"loss": 2.2593,
"step": 4140
},
{
"epoch": 156.9811320754717,
"grad_norm": 1.1677763203213758,
"learning_rate": 7.697918526121882e-05,
"loss": 2.2521,
"step": 4160
},
{
"epoch": 157.73584905660377,
"grad_norm": 1.1304212534843256,
"learning_rate": 7.695012017573013e-05,
"loss": 2.2743,
"step": 4180
},
{
"epoch": 158.49056603773585,
"grad_norm": 1.2157344056650818,
"learning_rate": 7.692092185822129e-05,
"loss": 2.2405,
"step": 4200
},
{
"epoch": 159.24528301886792,
"grad_norm": 1.2521062422528308,
"learning_rate": 7.689159042161356e-05,
"loss": 2.258,
"step": 4220
},
{
"epoch": 160.0,
"grad_norm": 1.417021221810849,
"learning_rate": 7.686212597934299e-05,
"loss": 2.2187,
"step": 4240
},
{
"epoch": 160.75471698113208,
"grad_norm": 1.0987738687082824,
"learning_rate": 7.68325286453601e-05,
"loss": 2.2155,
"step": 4260
},
{
"epoch": 161.50943396226415,
"grad_norm": 1.4771801969035276,
"learning_rate": 7.680279853412924e-05,
"loss": 2.27,
"step": 4280
},
{
"epoch": 162.26415094339623,
"grad_norm": 1.1956274528883593,
"learning_rate": 7.677293576062836e-05,
"loss": 2.2717,
"step": 4300
},
{
"epoch": 163.0188679245283,
"grad_norm": 1.1219859338242828,
"learning_rate": 7.674294044034839e-05,
"loss": 2.2487,
"step": 4320
},
{
"epoch": 163.77358490566039,
"grad_norm": 1.255744824066408,
"learning_rate": 7.671281268929293e-05,
"loss": 2.2366,
"step": 4340
},
{
"epoch": 164.52830188679246,
"grad_norm": 1.112451658029252,
"learning_rate": 7.668255262397772e-05,
"loss": 2.2377,
"step": 4360
},
{
"epoch": 165.28301886792454,
"grad_norm": 1.1131032086265853,
"learning_rate": 7.66521603614302e-05,
"loss": 2.2483,
"step": 4380
},
{
"epoch": 166.03773584905662,
"grad_norm": 1.2568117014241036,
"learning_rate": 7.662163601918907e-05,
"loss": 2.2637,
"step": 4400
},
{
"epoch": 166.79245283018867,
"grad_norm": 1.0763275712599132,
"learning_rate": 7.659097971530385e-05,
"loss": 2.2275,
"step": 4420
},
{
"epoch": 167.54716981132074,
"grad_norm": 1.0880356132513982,
"learning_rate": 7.656019156833438e-05,
"loss": 2.227,
"step": 4440
},
{
"epoch": 168.30188679245282,
"grad_norm": 1.0805504953865772,
"learning_rate": 7.652927169735042e-05,
"loss": 2.2205,
"step": 4460
},
{
"epoch": 169.0566037735849,
"grad_norm": 1.0979536600508317,
"learning_rate": 7.649822022193114e-05,
"loss": 2.2008,
"step": 4480
},
{
"epoch": 169.81132075471697,
"grad_norm": 1.0424485855679975,
"learning_rate": 7.646703726216467e-05,
"loss": 2.235,
"step": 4500
},
{
"epoch": 170.56603773584905,
"grad_norm": 1.1541609361962377,
"learning_rate": 7.643572293864766e-05,
"loss": 2.2297,
"step": 4520
},
{
"epoch": 171.32075471698113,
"grad_norm": 1.1630212513509717,
"learning_rate": 7.640427737248479e-05,
"loss": 2.2295,
"step": 4540
},
{
"epoch": 172.0754716981132,
"grad_norm": 1.5088805287099432,
"learning_rate": 7.637270068528828e-05,
"loss": 2.2445,
"step": 4560
},
{
"epoch": 172.83018867924528,
"grad_norm": 1.66773080303759,
"learning_rate": 7.634099299917748e-05,
"loss": 2.2336,
"step": 4580
},
{
"epoch": 173.58490566037736,
"grad_norm": 1.4239223646642891,
"learning_rate": 7.630915443677834e-05,
"loss": 2.2128,
"step": 4600
},
{
"epoch": 174.33962264150944,
"grad_norm": 1.2623270496447048,
"learning_rate": 7.627718512122297e-05,
"loss": 2.2253,
"step": 4620
},
{
"epoch": 175.0943396226415,
"grad_norm": 1.2406324767245749,
"learning_rate": 7.624508517614919e-05,
"loss": 2.2131,
"step": 4640
},
{
"epoch": 175.8490566037736,
"grad_norm": 1.3130455463591448,
"learning_rate": 7.621285472569993e-05,
"loss": 2.1944,
"step": 4660
},
{
"epoch": 176.60377358490567,
"grad_norm": 1.1413419622441512,
"learning_rate": 7.61804938945229e-05,
"loss": 2.2243,
"step": 4680
},
{
"epoch": 177.35849056603774,
"grad_norm": 1.2146654711035267,
"learning_rate": 7.614800280777005e-05,
"loss": 2.2172,
"step": 4700
},
{
"epoch": 178.11320754716982,
"grad_norm": 1.3634898063511693,
"learning_rate": 7.611538159109703e-05,
"loss": 2.205,
"step": 4720
},
{
"epoch": 178.8679245283019,
"grad_norm": 1.2311721419826,
"learning_rate": 7.608263037066277e-05,
"loss": 2.2252,
"step": 4740
},
{
"epoch": 179.62264150943398,
"grad_norm": 1.0908914570592438,
"learning_rate": 7.6049749273129e-05,
"loss": 2.2138,
"step": 4760
},
{
"epoch": 180.37735849056602,
"grad_norm": 1.1038829505990149,
"learning_rate": 7.601673842565972e-05,
"loss": 2.1939,
"step": 4780
},
{
"epoch": 181.1320754716981,
"grad_norm": 1.1236372724431538,
"learning_rate": 7.598359795592073e-05,
"loss": 2.2382,
"step": 4800
},
{
"epoch": 181.88679245283018,
"grad_norm": 1.3232451908070362,
"learning_rate": 7.59503279920791e-05,
"loss": 2.201,
"step": 4820
},
{
"epoch": 182.64150943396226,
"grad_norm": 1.3292125597941664,
"learning_rate": 7.591692866280274e-05,
"loss": 2.2058,
"step": 4840
},
{
"epoch": 183.39622641509433,
"grad_norm": 1.1970310296785942,
"learning_rate": 7.588340009725985e-05,
"loss": 2.206,
"step": 4860
},
{
"epoch": 184.1509433962264,
"grad_norm": 1.055682897860096,
"learning_rate": 7.584974242511845e-05,
"loss": 2.2148,
"step": 4880
},
{
"epoch": 184.9056603773585,
"grad_norm": 1.1655929048666676,
"learning_rate": 7.581595577654584e-05,
"loss": 2.2146,
"step": 4900
},
{
"epoch": 185.66037735849056,
"grad_norm": 1.2197862783964168,
"learning_rate": 7.578204028220814e-05,
"loss": 2.2023,
"step": 4920
},
{
"epoch": 186.41509433962264,
"grad_norm": 1.1536947546834515,
"learning_rate": 7.574799607326977e-05,
"loss": 2.2074,
"step": 4940
},
{
"epoch": 187.16981132075472,
"grad_norm": 1.1570044860516948,
"learning_rate": 7.571382328139293e-05,
"loss": 2.2057,
"step": 4960
},
{
"epoch": 187.9245283018868,
"grad_norm": 2.2251854969672165,
"learning_rate": 7.56795220387371e-05,
"loss": 2.1975,
"step": 4980
},
{
"epoch": 188.67924528301887,
"grad_norm": 1.16489093753128,
"learning_rate": 7.564509247795854e-05,
"loss": 2.1947,
"step": 5000
},
{
"epoch": 189.43396226415095,
"grad_norm": 1.1610456984999162,
"learning_rate": 7.561053473220977e-05,
"loss": 2.1861,
"step": 5020
},
{
"epoch": 190.18867924528303,
"grad_norm": 1.173342232590181,
"learning_rate": 7.557584893513902e-05,
"loss": 2.1997,
"step": 5040
},
{
"epoch": 190.9433962264151,
"grad_norm": 1.1873135989990635,
"learning_rate": 7.554103522088976e-05,
"loss": 2.1841,
"step": 5060
},
{
"epoch": 191.69811320754718,
"grad_norm": 1.4263143797188473,
"learning_rate": 7.550609372410018e-05,
"loss": 2.1823,
"step": 5080
},
{
"epoch": 192.45283018867926,
"grad_norm": 1.0849530063111787,
"learning_rate": 7.547102457990266e-05,
"loss": 2.1842,
"step": 5100
},
{
"epoch": 193.20754716981133,
"grad_norm": 1.1497288768060088,
"learning_rate": 7.54358279239232e-05,
"loss": 2.2258,
"step": 5120
},
{
"epoch": 193.96226415094338,
"grad_norm": 1.2795496420829302,
"learning_rate": 7.540050389228099e-05,
"loss": 2.192,
"step": 5140
},
{
"epoch": 194.71698113207546,
"grad_norm": 1.0700549445449614,
"learning_rate": 7.536505262158779e-05,
"loss": 2.1913,
"step": 5160
},
{
"epoch": 195.47169811320754,
"grad_norm": 1.3697359389801924,
"learning_rate": 7.532947424894744e-05,
"loss": 2.2044,
"step": 5180
},
{
"epoch": 196.22641509433961,
"grad_norm": 1.0721264053082575,
"learning_rate": 7.52937689119554e-05,
"loss": 2.1916,
"step": 5200
},
{
"epoch": 196.9811320754717,
"grad_norm": 1.2325173290768243,
"learning_rate": 7.525793674869805e-05,
"loss": 2.1738,
"step": 5220
},
{
"epoch": 197.73584905660377,
"grad_norm": 1.078471360885739,
"learning_rate": 7.522197789775235e-05,
"loss": 2.2043,
"step": 5240
},
{
"epoch": 198.49056603773585,
"grad_norm": 1.508079711738152,
"learning_rate": 7.518589249818516e-05,
"loss": 2.2159,
"step": 5260
},
{
"epoch": 199.24528301886792,
"grad_norm": 1.0511550659614401,
"learning_rate": 7.514968068955273e-05,
"loss": 2.168,
"step": 5280
},
{
"epoch": 200.0,
"grad_norm": 1.0585993296644824,
"learning_rate": 7.511334261190026e-05,
"loss": 2.1847,
"step": 5300
},
{
"epoch": 200.75471698113208,
"grad_norm": 1.19584254290663,
"learning_rate": 7.507687840576123e-05,
"loss": 2.1953,
"step": 5320
},
{
"epoch": 201.50943396226415,
"grad_norm": 1.360707266271236,
"learning_rate": 7.504028821215686e-05,
"loss": 2.1866,
"step": 5340
},
{
"epoch": 202.26415094339623,
"grad_norm": 1.0326833677791634,
"learning_rate": 7.500357217259573e-05,
"loss": 2.1889,
"step": 5360
},
{
"epoch": 203.0188679245283,
"grad_norm": 1.1966698046584427,
"learning_rate": 7.496673042907302e-05,
"loss": 2.204,
"step": 5380
},
{
"epoch": 203.77358490566039,
"grad_norm": 1.1792590946885393,
"learning_rate": 7.492976312407011e-05,
"loss": 2.1679,
"step": 5400
},
{
"epoch": 204.52830188679246,
"grad_norm": 1.0821551276306904,
"learning_rate": 7.489267040055393e-05,
"loss": 2.172,
"step": 5420
},
{
"epoch": 205.28301886792454,
"grad_norm": 1.1206896992927644,
"learning_rate": 7.48554524019765e-05,
"loss": 2.1558,
"step": 5440
},
{
"epoch": 206.03773584905662,
"grad_norm": 1.164481519584628,
"learning_rate": 7.481810927227427e-05,
"loss": 2.1707,
"step": 5460
},
{
"epoch": 206.79245283018867,
"grad_norm": 1.045173494578065,
"learning_rate": 7.47806411558677e-05,
"loss": 2.1454,
"step": 5480
},
{
"epoch": 207.54716981132074,
"grad_norm": 1.3037299893846073,
"learning_rate": 7.474304819766053e-05,
"loss": 2.1735,
"step": 5500
},
{
"epoch": 208.30188679245282,
"grad_norm": 1.1799164756908072,
"learning_rate": 7.470533054303937e-05,
"loss": 2.1678,
"step": 5520
},
{
"epoch": 209.0566037735849,
"grad_norm": 1.23204534029245,
"learning_rate": 7.46674883378731e-05,
"loss": 2.18,
"step": 5540
},
{
"epoch": 209.81132075471697,
"grad_norm": 1.1705040244332197,
"learning_rate": 7.462952172851219e-05,
"loss": 2.1638,
"step": 5560
},
{
"epoch": 210.56603773584905,
"grad_norm": 1.115647376955501,
"learning_rate": 7.459143086178838e-05,
"loss": 2.1517,
"step": 5580
},
{
"epoch": 211.32075471698113,
"grad_norm": 1.095644914375309,
"learning_rate": 7.455321588501378e-05,
"loss": 2.1624,
"step": 5600
},
{
"epoch": 212.0754716981132,
"grad_norm": 1.2461377018123299,
"learning_rate": 7.451487694598063e-05,
"loss": 2.1795,
"step": 5620
},
{
"epoch": 212.83018867924528,
"grad_norm": 1.0808714278402736,
"learning_rate": 7.447641419296051e-05,
"loss": 2.1857,
"step": 5640
},
{
"epoch": 213.58490566037736,
"grad_norm": 1.175783749152713,
"learning_rate": 7.443782777470388e-05,
"loss": 2.1489,
"step": 5660
},
{
"epoch": 214.33962264150944,
"grad_norm": 1.0323602107911023,
"learning_rate": 7.43991178404394e-05,
"loss": 2.1814,
"step": 5680
},
{
"epoch": 215.0943396226415,
"grad_norm": 1.4371901693782694,
"learning_rate": 7.436028453987343e-05,
"loss": 2.1607,
"step": 5700
},
{
"epoch": 215.8490566037736,
"grad_norm": 1.2749189929859621,
"learning_rate": 7.432132802318953e-05,
"loss": 2.1344,
"step": 5720
},
{
"epoch": 216.60377358490567,
"grad_norm": 1.7991005001893379,
"learning_rate": 7.428224844104763e-05,
"loss": 2.1705,
"step": 5740
},
{
"epoch": 217.35849056603774,
"grad_norm": 1.0763947355182082,
"learning_rate": 7.424304594458374e-05,
"loss": 2.1681,
"step": 5760
},
{
"epoch": 218.11320754716982,
"grad_norm": 1.147647175883896,
"learning_rate": 7.420372068540913e-05,
"loss": 2.1792,
"step": 5780
},
{
"epoch": 218.8679245283019,
"grad_norm": 1.232169418468151,
"learning_rate": 7.41642728156099e-05,
"loss": 2.1143,
"step": 5800
},
{
"epoch": 219.62264150943398,
"grad_norm": 1.3992234444810514,
"learning_rate": 7.41247024877463e-05,
"loss": 2.1612,
"step": 5820
},
{
"epoch": 220.37735849056602,
"grad_norm": 1.2478978185410232,
"learning_rate": 7.40850098548522e-05,
"loss": 2.1749,
"step": 5840
},
{
"epoch": 221.1320754716981,
"grad_norm": 1.0796153228438745,
"learning_rate": 7.404519507043443e-05,
"loss": 2.1345,
"step": 5860
},
{
"epoch": 221.88679245283018,
"grad_norm": 1.164330548160425,
"learning_rate": 7.40052582884723e-05,
"loss": 2.1573,
"step": 5880
},
{
"epoch": 222.64150943396226,
"grad_norm": 1.2041551436276394,
"learning_rate": 7.396519966341684e-05,
"loss": 2.162,
"step": 5900
},
{
"epoch": 223.39622641509433,
"grad_norm": 1.2780053810145304,
"learning_rate": 7.392501935019036e-05,
"loss": 2.1524,
"step": 5920
},
{
"epoch": 224.1509433962264,
"grad_norm": 1.069497717017709,
"learning_rate": 7.388471750418576e-05,
"loss": 2.1427,
"step": 5940
},
{
"epoch": 224.9056603773585,
"grad_norm": 1.1790523262171884,
"learning_rate": 7.384429428126599e-05,
"loss": 2.1693,
"step": 5960
},
{
"epoch": 225.66037735849056,
"grad_norm": 1.0727940077044007,
"learning_rate": 7.380374983776333e-05,
"loss": 2.1146,
"step": 5980
},
{
"epoch": 226.41509433962264,
"grad_norm": 1.0481198314836597,
"learning_rate": 7.376308433047898e-05,
"loss": 2.1563,
"step": 6000
},
{
"epoch": 227.16981132075472,
"grad_norm": 1.3874056107583248,
"learning_rate": 7.372229791668223e-05,
"loss": 2.1456,
"step": 6020
},
{
"epoch": 227.9245283018868,
"grad_norm": 1.3153838535909976,
"learning_rate": 7.368139075411003e-05,
"loss": 2.1575,
"step": 6040
},
{
"epoch": 228.67924528301887,
"grad_norm": 1.1788160013410025,
"learning_rate": 7.364036300096631e-05,
"loss": 2.1437,
"step": 6060
},
{
"epoch": 229.43396226415095,
"grad_norm": 1.1169312984810649,
"learning_rate": 7.359921481592136e-05,
"loss": 2.1568,
"step": 6080
},
{
"epoch": 230.18867924528303,
"grad_norm": 1.1435068374715258,
"learning_rate": 7.355794635811118e-05,
"loss": 2.1503,
"step": 6100
},
{
"epoch": 230.9433962264151,
"grad_norm": 1.7552469517638039,
"learning_rate": 7.3516557787137e-05,
"loss": 2.128,
"step": 6120
},
{
"epoch": 231.69811320754718,
"grad_norm": 1.0779669989000775,
"learning_rate": 7.347504926306452e-05,
"loss": 2.1485,
"step": 6140
},
{
"epoch": 232.45283018867926,
"grad_norm": 1.186788194688993,
"learning_rate": 7.343342094642333e-05,
"loss": 2.1576,
"step": 6160
},
{
"epoch": 233.20754716981133,
"grad_norm": 1.0594086679490557,
"learning_rate": 7.339167299820636e-05,
"loss": 2.1492,
"step": 6180
},
{
"epoch": 233.96226415094338,
"grad_norm": 1.1917321731840318,
"learning_rate": 7.334980557986916e-05,
"loss": 2.1482,
"step": 6200
},
{
"epoch": 234.71698113207546,
"grad_norm": 1.083198692826801,
"learning_rate": 7.330781885332932e-05,
"loss": 2.1461,
"step": 6220
},
{
"epoch": 235.47169811320754,
"grad_norm": 1.3139182121317998,
"learning_rate": 7.326571298096586e-05,
"loss": 2.156,
"step": 6240
},
{
"epoch": 236.22641509433961,
"grad_norm": 1.0854207170845476,
"learning_rate": 7.322348812561857e-05,
"loss": 2.1258,
"step": 6260
},
{
"epoch": 236.9811320754717,
"grad_norm": 1.5015227061373095,
"learning_rate": 7.318114445058739e-05,
"loss": 2.1439,
"step": 6280
},
{
"epoch": 237.73584905660377,
"grad_norm": 1.2347794021289429,
"learning_rate": 7.313868211963179e-05,
"loss": 2.1317,
"step": 6300
},
{
"epoch": 238.49056603773585,
"grad_norm": 1.442835177639965,
"learning_rate": 7.309610129697015e-05,
"loss": 2.113,
"step": 6320
},
{
"epoch": 239.24528301886792,
"grad_norm": 1.236255276661992,
"learning_rate": 7.305340214727905e-05,
"loss": 2.1378,
"step": 6340
},
{
"epoch": 240.0,
"grad_norm": 1.205183440308278,
"learning_rate": 7.301058483569271e-05,
"loss": 2.1336,
"step": 6360
},
{
"epoch": 240.75471698113208,
"grad_norm": 1.3246584618487252,
"learning_rate": 7.296764952780239e-05,
"loss": 2.1221,
"step": 6380
},
{
"epoch": 241.50943396226415,
"grad_norm": 1.0168670687272512,
"learning_rate": 7.292459638965558e-05,
"loss": 2.1188,
"step": 6400
},
{
"epoch": 242.26415094339623,
"grad_norm": 1.3467491151924502,
"learning_rate": 7.288142558775552e-05,
"loss": 2.101,
"step": 6420
},
{
"epoch": 243.0188679245283,
"grad_norm": 1.261074686560294,
"learning_rate": 7.283813728906054e-05,
"loss": 2.1411,
"step": 6440
},
{
"epoch": 243.77358490566039,
"grad_norm": 1.2485690805022434,
"learning_rate": 7.27947316609833e-05,
"loss": 2.1277,
"step": 6460
},
{
"epoch": 244.52830188679246,
"grad_norm": 1.3241322758759912,
"learning_rate": 7.275120887139026e-05,
"loss": 2.1363,
"step": 6480
},
{
"epoch": 245.28301886792454,
"grad_norm": 1.5599386219671891,
"learning_rate": 7.270756908860098e-05,
"loss": 2.1089,
"step": 6500
},
{
"epoch": 246.03773584905662,
"grad_norm": 1.2114819177389966,
"learning_rate": 7.266381248138751e-05,
"loss": 2.1089,
"step": 6520
},
{
"epoch": 246.79245283018867,
"grad_norm": 1.1306582742382014,
"learning_rate": 7.261993921897364e-05,
"loss": 2.1079,
"step": 6540
},
{
"epoch": 247.54716981132074,
"grad_norm": 1.2673326383282852,
"learning_rate": 7.257594947103438e-05,
"loss": 2.1266,
"step": 6560
},
{
"epoch": 248.30188679245282,
"grad_norm": 1.4019469970816203,
"learning_rate": 7.253184340769518e-05,
"loss": 2.1481,
"step": 6580
},
{
"epoch": 249.0566037735849,
"grad_norm": 1.0449709444069573,
"learning_rate": 7.248762119953135e-05,
"loss": 2.1158,
"step": 6600
},
{
"epoch": 249.81132075471697,
"grad_norm": 1.1593445705123036,
"learning_rate": 7.244328301756737e-05,
"loss": 2.13,
"step": 6620
},
{
"epoch": 250.56603773584905,
"grad_norm": 1.2635129121192081,
"learning_rate": 7.23988290332763e-05,
"loss": 2.1167,
"step": 6640
},
{
"epoch": 251.32075471698113,
"grad_norm": 1.0527620092255492,
"learning_rate": 7.235425941857891e-05,
"loss": 2.114,
"step": 6660
},
{
"epoch": 252.0754716981132,
"grad_norm": 1.3035661165388843,
"learning_rate": 7.230957434584331e-05,
"loss": 2.0928,
"step": 6680
},
{
"epoch": 252.83018867924528,
"grad_norm": 1.0136550616355096,
"learning_rate": 7.226477398788402e-05,
"loss": 2.0987,
"step": 6700
},
{
"epoch": 253.58490566037736,
"grad_norm": 1.258957796854538,
"learning_rate": 7.22198585179615e-05,
"loss": 2.1032,
"step": 6720
},
{
"epoch": 254.33962264150944,
"grad_norm": 1.2937771749668925,
"learning_rate": 7.21748281097813e-05,
"loss": 2.1003,
"step": 6740
},
{
"epoch": 255.0943396226415,
"grad_norm": 1.0533802729958242,
"learning_rate": 7.212968293749357e-05,
"loss": 2.1201,
"step": 6760
},
{
"epoch": 255.8490566037736,
"grad_norm": 1.0065996122655994,
"learning_rate": 7.208442317569225e-05,
"loss": 2.1119,
"step": 6780
},
{
"epoch": 256.60377358490564,
"grad_norm": 1.1726423865130644,
"learning_rate": 7.203904899941444e-05,
"loss": 2.0967,
"step": 6800
},
{
"epoch": 257.35849056603774,
"grad_norm": 1.4137580376820904,
"learning_rate": 7.199356058413975e-05,
"loss": 2.1297,
"step": 6820
},
{
"epoch": 258.1132075471698,
"grad_norm": 1.2534212871623691,
"learning_rate": 7.194795810578956e-05,
"loss": 2.1142,
"step": 6840
},
{
"epoch": 258.8679245283019,
"grad_norm": 1.1760816154209972,
"learning_rate": 7.190224174072643e-05,
"loss": 2.1524,
"step": 6860
},
{
"epoch": 259.62264150943395,
"grad_norm": 1.1576937144547554,
"learning_rate": 7.185641166575331e-05,
"loss": 2.0873,
"step": 6880
},
{
"epoch": 260.37735849056605,
"grad_norm": 1.18595129264392,
"learning_rate": 7.181046805811294e-05,
"loss": 2.1118,
"step": 6900
},
{
"epoch": 261.1320754716981,
"grad_norm": 1.1205604370668647,
"learning_rate": 7.176441109548715e-05,
"loss": 2.0986,
"step": 6920
},
{
"epoch": 261.8867924528302,
"grad_norm": 1.1884901674285933,
"learning_rate": 7.171824095599609e-05,
"loss": 2.1109,
"step": 6940
},
{
"epoch": 262.64150943396226,
"grad_norm": 1.123509221114028,
"learning_rate": 7.167195781819768e-05,
"loss": 2.1047,
"step": 6960
},
{
"epoch": 263.39622641509436,
"grad_norm": 1.1260075560640628,
"learning_rate": 7.162556186108684e-05,
"loss": 2.0972,
"step": 6980
},
{
"epoch": 264.1509433962264,
"grad_norm": 1.191570785218505,
"learning_rate": 7.157905326409477e-05,
"loss": 2.0938,
"step": 7000
},
{
"epoch": 264.9056603773585,
"grad_norm": 1.1009948219165815,
"learning_rate": 7.153243220708831e-05,
"loss": 2.1084,
"step": 7020
},
{
"epoch": 265.66037735849056,
"grad_norm": 1.0984593800759155,
"learning_rate": 7.148569887036923e-05,
"loss": 2.0989,
"step": 7040
},
{
"epoch": 266.41509433962267,
"grad_norm": 1.2379578619669414,
"learning_rate": 7.143885343467355e-05,
"loss": 2.1166,
"step": 7060
},
{
"epoch": 267.1698113207547,
"grad_norm": 1.1064036960932773,
"learning_rate": 7.139189608117077e-05,
"loss": 2.1104,
"step": 7080
},
{
"epoch": 267.92452830188677,
"grad_norm": 1.0772108392111555,
"learning_rate": 7.134482699146328e-05,
"loss": 2.0897,
"step": 7100
},
{
"epoch": 268.6792452830189,
"grad_norm": 1.1292302199915438,
"learning_rate": 7.129764634758554e-05,
"loss": 2.1157,
"step": 7120
},
{
"epoch": 269.4339622641509,
"grad_norm": 1.1278390668879588,
"learning_rate": 7.125035433200346e-05,
"loss": 2.0932,
"step": 7140
},
{
"epoch": 270.188679245283,
"grad_norm": 1.0414531139729244,
"learning_rate": 7.120295112761368e-05,
"loss": 2.1151,
"step": 7160
},
{
"epoch": 270.9433962264151,
"grad_norm": 1.1545750967690267,
"learning_rate": 7.115543691774282e-05,
"loss": 2.1131,
"step": 7180
},
{
"epoch": 271.6981132075472,
"grad_norm": 1.204421852849513,
"learning_rate": 7.110781188614684e-05,
"loss": 2.0802,
"step": 7200
},
{
"epoch": 272.45283018867923,
"grad_norm": 1.7831981359411682,
"learning_rate": 7.106007621701024e-05,
"loss": 2.0798,
"step": 7220
},
{
"epoch": 273.20754716981133,
"grad_norm": 1.3197751132016162,
"learning_rate": 7.101223009494545e-05,
"loss": 2.0992,
"step": 7240
},
{
"epoch": 273.9622641509434,
"grad_norm": 1.2400242729400996,
"learning_rate": 7.096427370499204e-05,
"loss": 2.0864,
"step": 7260
},
{
"epoch": 274.7169811320755,
"grad_norm": 1.0912978575620245,
"learning_rate": 7.091620723261605e-05,
"loss": 2.0923,
"step": 7280
},
{
"epoch": 275.47169811320754,
"grad_norm": 1.231133204650358,
"learning_rate": 7.086803086370918e-05,
"loss": 2.0795,
"step": 7300
},
{
"epoch": 276.22641509433964,
"grad_norm": 1.2282758399906704,
"learning_rate": 7.081974478458825e-05,
"loss": 2.0761,
"step": 7320
},
{
"epoch": 276.9811320754717,
"grad_norm": 1.3460196947110317,
"learning_rate": 7.077134918199428e-05,
"loss": 2.0752,
"step": 7340
},
{
"epoch": 277.7358490566038,
"grad_norm": 1.080960097565614,
"learning_rate": 7.072284424309193e-05,
"loss": 2.0889,
"step": 7360
},
{
"epoch": 278.49056603773585,
"grad_norm": 1.3111047591517453,
"learning_rate": 7.067423015546863e-05,
"loss": 2.0839,
"step": 7380
},
{
"epoch": 279.24528301886795,
"grad_norm": 1.5401314919739673,
"learning_rate": 7.0625507107134e-05,
"loss": 2.0927,
"step": 7400
},
{
"epoch": 280.0,
"grad_norm": 1.1041636245431063,
"learning_rate": 7.057667528651904e-05,
"loss": 2.0803,
"step": 7420
},
{
"epoch": 280.75471698113205,
"grad_norm": 1.4056508604045173,
"learning_rate": 7.052773488247539e-05,
"loss": 2.0668,
"step": 7440
},
{
"epoch": 281.50943396226415,
"grad_norm": 1.046923519873644,
"learning_rate": 7.047868608427462e-05,
"loss": 2.082,
"step": 7460
},
{
"epoch": 282.2641509433962,
"grad_norm": 1.3029328700653047,
"learning_rate": 7.042952908160754e-05,
"loss": 2.0556,
"step": 7480
},
{
"epoch": 283.0188679245283,
"grad_norm": 1.227982067650406,
"learning_rate": 7.03802640645834e-05,
"loss": 2.0478,
"step": 7500
},
{
"epoch": 283.77358490566036,
"grad_norm": 1.1015625311453152,
"learning_rate": 7.033089122372919e-05,
"loss": 2.0773,
"step": 7520
},
{
"epoch": 284.52830188679246,
"grad_norm": 1.316103623119528,
"learning_rate": 7.028141074998891e-05,
"loss": 2.0756,
"step": 7540
},
{
"epoch": 285.2830188679245,
"grad_norm": 1.255257016262856,
"learning_rate": 7.023182283472277e-05,
"loss": 2.0866,
"step": 7560
},
{
"epoch": 286.0377358490566,
"grad_norm": 1.030736862070767,
"learning_rate": 7.018212766970658e-05,
"loss": 2.0723,
"step": 7580
},
{
"epoch": 286.79245283018867,
"grad_norm": 1.293105711429154,
"learning_rate": 7.013232544713086e-05,
"loss": 2.0759,
"step": 7600
},
{
"epoch": 287.54716981132077,
"grad_norm": 1.538845550854816,
"learning_rate": 7.008241635960018e-05,
"loss": 2.0238,
"step": 7620
},
{
"epoch": 288.3018867924528,
"grad_norm": 1.1376778839977162,
"learning_rate": 7.003240060013241e-05,
"loss": 2.0895,
"step": 7640
},
{
"epoch": 289.0566037735849,
"grad_norm": 1.1524198390846205,
"learning_rate": 6.998227836215794e-05,
"loss": 2.0712,
"step": 7660
},
{
"epoch": 289.811320754717,
"grad_norm": 1.1269455096899952,
"learning_rate": 6.9932049839519e-05,
"loss": 2.0785,
"step": 7680
},
{
"epoch": 290.5660377358491,
"grad_norm": 1.1947929898571277,
"learning_rate": 6.98817152264688e-05,
"loss": 2.0789,
"step": 7700
},
{
"epoch": 291.3207547169811,
"grad_norm": 1.05216691350341,
"learning_rate": 6.983127471767088e-05,
"loss": 2.0721,
"step": 7720
},
{
"epoch": 292.07547169811323,
"grad_norm": 1.0659553585546824,
"learning_rate": 6.978072850819832e-05,
"loss": 2.0897,
"step": 7740
},
{
"epoch": 292.8301886792453,
"grad_norm": 1.4999207869643305,
"learning_rate": 6.9730076793533e-05,
"loss": 2.0875,
"step": 7760
},
{
"epoch": 293.58490566037733,
"grad_norm": 1.051015475094539,
"learning_rate": 6.967931976956479e-05,
"loss": 2.0572,
"step": 7780
},
{
"epoch": 294.33962264150944,
"grad_norm": 1.4062884543226315,
"learning_rate": 6.962845763259084e-05,
"loss": 2.0783,
"step": 7800
},
{
"epoch": 295.0943396226415,
"grad_norm": 1.3344933104485628,
"learning_rate": 6.957749057931486e-05,
"loss": 2.0491,
"step": 7820
},
{
"epoch": 295.8490566037736,
"grad_norm": 1.2853456909301206,
"learning_rate": 6.952641880684623e-05,
"loss": 2.0589,
"step": 7840
},
{
"epoch": 296.60377358490564,
"grad_norm": 0.9567533052896401,
"learning_rate": 6.947524251269942e-05,
"loss": 2.0638,
"step": 7860
},
{
"epoch": 297.35849056603774,
"grad_norm": 1.0295871659614384,
"learning_rate": 6.942396189479305e-05,
"loss": 2.0452,
"step": 7880
},
{
"epoch": 298.1132075471698,
"grad_norm": 1.0385361931014787,
"learning_rate": 6.937257715144922e-05,
"loss": 2.0693,
"step": 7900
},
{
"epoch": 298.8679245283019,
"grad_norm": 1.0809377854877955,
"learning_rate": 6.932108848139274e-05,
"loss": 2.0657,
"step": 7920
},
{
"epoch": 299.62264150943395,
"grad_norm": 1.2262962690135735,
"learning_rate": 6.926949608375031e-05,
"loss": 2.0333,
"step": 7940
},
{
"epoch": 300.37735849056605,
"grad_norm": 1.2494078282001366,
"learning_rate": 6.921780015804983e-05,
"loss": 2.0611,
"step": 7960
},
{
"epoch": 301.1320754716981,
"grad_norm": 1.2191785732688871,
"learning_rate": 6.916600090421955e-05,
"loss": 2.0414,
"step": 7980
},
{
"epoch": 301.8867924528302,
"grad_norm": 1.3922552836876412,
"learning_rate": 6.911409852258734e-05,
"loss": 2.0344,
"step": 8000
},
{
"epoch": 302.64150943396226,
"grad_norm": 1.001774801425353,
"learning_rate": 6.906209321387992e-05,
"loss": 2.0745,
"step": 8020
},
{
"epoch": 303.39622641509436,
"grad_norm": 1.1104337640037032,
"learning_rate": 6.900998517922203e-05,
"loss": 2.0593,
"step": 8040
},
{
"epoch": 304.1509433962264,
"grad_norm": 1.825335695980726,
"learning_rate": 6.895777462013575e-05,
"loss": 2.0459,
"step": 8060
},
{
"epoch": 304.9056603773585,
"grad_norm": 1.157162721000543,
"learning_rate": 6.89054617385396e-05,
"loss": 2.0565,
"step": 8080
},
{
"epoch": 305.66037735849056,
"grad_norm": 1.0813317731646406,
"learning_rate": 6.885304673674785e-05,
"loss": 2.0647,
"step": 8100
},
{
"epoch": 306.41509433962267,
"grad_norm": 1.1711257270339308,
"learning_rate": 6.880052981746973e-05,
"loss": 2.0779,
"step": 8120
},
{
"epoch": 307.1698113207547,
"grad_norm": 1.063366248076951,
"learning_rate": 6.874791118380859e-05,
"loss": 2.0299,
"step": 8140
},
{
"epoch": 307.92452830188677,
"grad_norm": 1.4839153895893722,
"learning_rate": 6.869519103926117e-05,
"loss": 2.0689,
"step": 8160
},
{
"epoch": 308.6792452830189,
"grad_norm": 1.0599730190677705,
"learning_rate": 6.864236958771677e-05,
"loss": 2.0559,
"step": 8180
},
{
"epoch": 309.4339622641509,
"grad_norm": 1.1000743640073944,
"learning_rate": 6.85894470334565e-05,
"loss": 2.0814,
"step": 8200
},
{
"epoch": 310.188679245283,
"grad_norm": 1.1134888630426287,
"learning_rate": 6.853642358115248e-05,
"loss": 2.0619,
"step": 8220
},
{
"epoch": 310.9433962264151,
"grad_norm": 1.4109893718513755,
"learning_rate": 6.848329943586703e-05,
"loss": 2.0478,
"step": 8240
},
{
"epoch": 311.6981132075472,
"grad_norm": 1.4005508549478216,
"learning_rate": 6.843007480305188e-05,
"loss": 2.0451,
"step": 8260
},
{
"epoch": 312.45283018867923,
"grad_norm": 1.1506459796822934,
"learning_rate": 6.83767498885474e-05,
"loss": 2.0496,
"step": 8280
},
{
"epoch": 313.20754716981133,
"grad_norm": 1.1846013546521996,
"learning_rate": 6.832332489858181e-05,
"loss": 2.0503,
"step": 8300
},
{
"epoch": 313.9622641509434,
"grad_norm": 1.083146150872066,
"learning_rate": 6.826980003977029e-05,
"loss": 2.0411,
"step": 8320
},
{
"epoch": 314.7169811320755,
"grad_norm": 1.1083923007981826,
"learning_rate": 6.821617551911432e-05,
"loss": 2.059,
"step": 8340
},
{
"epoch": 315.47169811320754,
"grad_norm": 1.7089827022606041,
"learning_rate": 6.816245154400081e-05,
"loss": 2.0316,
"step": 8360
},
{
"epoch": 316.22641509433964,
"grad_norm": 1.2036464145657677,
"learning_rate": 6.810862832220125e-05,
"loss": 2.0383,
"step": 8380
},
{
"epoch": 316.9811320754717,
"grad_norm": 1.0678492328292477,
"learning_rate": 6.8054706061871e-05,
"loss": 2.0357,
"step": 8400
},
{
"epoch": 317.7358490566038,
"grad_norm": 1.1510123829327024,
"learning_rate": 6.800068497154838e-05,
"loss": 2.0509,
"step": 8420
},
{
"epoch": 318.49056603773585,
"grad_norm": 1.1744519756591179,
"learning_rate": 6.794656526015402e-05,
"loss": 2.0362,
"step": 8440
},
{
"epoch": 319.24528301886795,
"grad_norm": 1.0951767070535987,
"learning_rate": 6.78923471369899e-05,
"loss": 2.0261,
"step": 8460
},
{
"epoch": 320.0,
"grad_norm": 1.40278574496307,
"learning_rate": 6.783803081173856e-05,
"loss": 2.0041,
"step": 8480
},
{
"epoch": 320.75471698113205,
"grad_norm": 1.2731462205629138,
"learning_rate": 6.778361649446238e-05,
"loss": 2.0455,
"step": 8500
},
{
"epoch": 321.50943396226415,
"grad_norm": 1.1686588861352702,
"learning_rate": 6.772910439560273e-05,
"loss": 2.0328,
"step": 8520
},
{
"epoch": 322.2641509433962,
"grad_norm": 1.0989551544372271,
"learning_rate": 6.767449472597907e-05,
"loss": 2.0495,
"step": 8540
},
{
"epoch": 323.0188679245283,
"grad_norm": 1.3624805761549945,
"learning_rate": 6.761978769678828e-05,
"loss": 2.0447,
"step": 8560
},
{
"epoch": 323.77358490566036,
"grad_norm": 1.1444357160826135,
"learning_rate": 6.75649835196037e-05,
"loss": 2.0663,
"step": 8580
},
{
"epoch": 324.52830188679246,
"grad_norm": 1.1946574491976927,
"learning_rate": 6.75100824063744e-05,
"loss": 2.0483,
"step": 8600
},
{
"epoch": 325.2830188679245,
"grad_norm": 1.0504702569050626,
"learning_rate": 6.745508456942438e-05,
"loss": 1.9978,
"step": 8620
},
{
"epoch": 326.0377358490566,
"grad_norm": 0.9767612100068984,
"learning_rate": 6.739999022145167e-05,
"loss": 2.0382,
"step": 8640
},
{
"epoch": 326.79245283018867,
"grad_norm": 1.0291078738332238,
"learning_rate": 6.734479957552753e-05,
"loss": 2.0298,
"step": 8660
},
{
"epoch": 327.54716981132077,
"grad_norm": 1.244368475618607,
"learning_rate": 6.72895128450957e-05,
"loss": 2.005,
"step": 8680
},
{
"epoch": 328.3018867924528,
"grad_norm": 1.1290954094741668,
"learning_rate": 6.723413024397144e-05,
"loss": 2.0569,
"step": 8700
},
{
"epoch": 329.0566037735849,
"grad_norm": 1.0915375487825718,
"learning_rate": 6.717865198634082e-05,
"loss": 2.0447,
"step": 8720
},
{
"epoch": 329.811320754717,
"grad_norm": 1.1768398401350053,
"learning_rate": 6.71230782867599e-05,
"loss": 2.0217,
"step": 8740
},
{
"epoch": 330.5660377358491,
"grad_norm": 1.104835402612007,
"learning_rate": 6.706740936015375e-05,
"loss": 2.0386,
"step": 8760
},
{
"epoch": 331.3207547169811,
"grad_norm": 1.1248015036534322,
"learning_rate": 6.70116454218158e-05,
"loss": 2.0103,
"step": 8780
},
{
"epoch": 332.07547169811323,
"grad_norm": 1.2169922349555569,
"learning_rate": 6.69557866874069e-05,
"loss": 2.0241,
"step": 8800
},
{
"epoch": 332.8301886792453,
"grad_norm": 1.214613807170357,
"learning_rate": 6.689983337295448e-05,
"loss": 2.0188,
"step": 8820
},
{
"epoch": 333.58490566037733,
"grad_norm": 1.790201434963867,
"learning_rate": 6.684378569485181e-05,
"loss": 1.9779,
"step": 8840
},
{
"epoch": 334.33962264150944,
"grad_norm": 1.1294537346241684,
"learning_rate": 6.678764386985706e-05,
"loss": 2.0288,
"step": 8860
},
{
"epoch": 335.0943396226415,
"grad_norm": 1.023220014865738,
"learning_rate": 6.673140811509254e-05,
"loss": 2.0388,
"step": 8880
},
{
"epoch": 335.8490566037736,
"grad_norm": 1.184810167823652,
"learning_rate": 6.667507864804373e-05,
"loss": 2.0527,
"step": 8900
},
{
"epoch": 336.60377358490564,
"grad_norm": 1.4108637345041166,
"learning_rate": 6.661865568655867e-05,
"loss": 2.0521,
"step": 8920
},
{
"epoch": 337.35849056603774,
"grad_norm": 1.1099661578909887,
"learning_rate": 6.656213944884687e-05,
"loss": 2.0142,
"step": 8940
},
{
"epoch": 338.1132075471698,
"grad_norm": 1.2550646623744501,
"learning_rate": 6.650553015347861e-05,
"loss": 2.0234,
"step": 8960
},
{
"epoch": 338.8679245283019,
"grad_norm": 1.3226209109800835,
"learning_rate": 6.64488280193841e-05,
"loss": 2.0026,
"step": 8980
},
{
"epoch": 339.62264150943395,
"grad_norm": 1.2466974723773605,
"learning_rate": 6.639203326585253e-05,
"loss": 2.0505,
"step": 9000
},
{
"epoch": 340.37735849056605,
"grad_norm": 1.2259925868881607,
"learning_rate": 6.633514611253129e-05,
"loss": 1.989,
"step": 9020
},
{
"epoch": 341.1320754716981,
"grad_norm": 1.2616555953485367,
"learning_rate": 6.627816677942518e-05,
"loss": 2.0172,
"step": 9040
},
{
"epoch": 341.8867924528302,
"grad_norm": 1.0660372107925478,
"learning_rate": 6.622109548689542e-05,
"loss": 2.0235,
"step": 9060
},
{
"epoch": 342.64150943396226,
"grad_norm": 1.2995047263783295,
"learning_rate": 6.616393245565893e-05,
"loss": 2.0116,
"step": 9080
},
{
"epoch": 343.39622641509436,
"grad_norm": 1.2947984731633606,
"learning_rate": 6.610667790678738e-05,
"loss": 2.0241,
"step": 9100
},
{
"epoch": 344.1509433962264,
"grad_norm": 1.008247863003288,
"learning_rate": 6.60493320617064e-05,
"loss": 1.984,
"step": 9120
},
{
"epoch": 344.9056603773585,
"grad_norm": 1.5922178618355085,
"learning_rate": 6.599189514219469e-05,
"loss": 1.989,
"step": 9140
},
{
"epoch": 345.66037735849056,
"grad_norm": 1.0785537649724395,
"learning_rate": 6.593436737038316e-05,
"loss": 2.0135,
"step": 9160
},
{
"epoch": 346.41509433962267,
"grad_norm": 1.1766322003509095,
"learning_rate": 6.58767489687541e-05,
"loss": 2.0021,
"step": 9180
},
{
"epoch": 347.1698113207547,
"grad_norm": 1.3777596137615202,
"learning_rate": 6.581904016014026e-05,
"loss": 1.9988,
"step": 9200
},
{
"epoch": 347.92452830188677,
"grad_norm": 1.5517987219865874,
"learning_rate": 6.57612411677241e-05,
"loss": 2.0309,
"step": 9220
},
{
"epoch": 348.6792452830189,
"grad_norm": 1.0560275221648643,
"learning_rate": 6.570335221503679e-05,
"loss": 1.9923,
"step": 9240
},
{
"epoch": 349.4339622641509,
"grad_norm": 1.3504991405267055,
"learning_rate": 6.564537352595744e-05,
"loss": 1.9739,
"step": 9260
},
{
"epoch": 350.188679245283,
"grad_norm": 1.153039781830911,
"learning_rate": 6.558730532471219e-05,
"loss": 1.9803,
"step": 9280
},
{
"epoch": 350.9433962264151,
"grad_norm": 0.9434571532030971,
"learning_rate": 6.55291478358734e-05,
"loss": 1.9677,
"step": 9300
},
{
"epoch": 351.6981132075472,
"grad_norm": 1.8784899020425583,
"learning_rate": 6.547090128435869e-05,
"loss": 1.9988,
"step": 9320
},
{
"epoch": 352.45283018867923,
"grad_norm": 1.449139419473746,
"learning_rate": 6.541256589543013e-05,
"loss": 1.9974,
"step": 9340
},
{
"epoch": 353.20754716981133,
"grad_norm": 1.3936924715065266,
"learning_rate": 6.53541418946934e-05,
"loss": 2.016,
"step": 9360
},
{
"epoch": 353.9622641509434,
"grad_norm": 1.6114753123601104,
"learning_rate": 6.529562950809679e-05,
"loss": 2.0021,
"step": 9380
},
{
"epoch": 354.7169811320755,
"grad_norm": 1.109612991369577,
"learning_rate": 6.523702896193052e-05,
"loss": 1.9928,
"step": 9400
},
{
"epoch": 355.47169811320754,
"grad_norm": 1.2128714849575388,
"learning_rate": 6.517834048282572e-05,
"loss": 1.9908,
"step": 9420
},
{
"epoch": 356.22641509433964,
"grad_norm": 1.6910853942561526,
"learning_rate": 6.511956429775353e-05,
"loss": 1.998,
"step": 9440
},
{
"epoch": 356.9811320754717,
"grad_norm": 1.1430018991497974,
"learning_rate": 6.506070063402434e-05,
"loss": 1.9726,
"step": 9460
},
{
"epoch": 357.7358490566038,
"grad_norm": 1.1413458296675405,
"learning_rate": 6.500174971928684e-05,
"loss": 1.9972,
"step": 9480
},
{
"epoch": 358.49056603773585,
"grad_norm": 1.1439825840759497,
"learning_rate": 6.494271178152717e-05,
"loss": 1.9965,
"step": 9500
},
{
"epoch": 359.24528301886795,
"grad_norm": 1.1294761259383999,
"learning_rate": 6.488358704906799e-05,
"loss": 1.9651,
"step": 9520
},
{
"epoch": 360.0,
"grad_norm": 1.0691230669285636,
"learning_rate": 6.482437575056767e-05,
"loss": 1.9559,
"step": 9540
},
{
"epoch": 360.75471698113205,
"grad_norm": 1.1830173398938235,
"learning_rate": 6.476507811501933e-05,
"loss": 2.0035,
"step": 9560
},
{
"epoch": 361.50943396226415,
"grad_norm": 1.0463609194131098,
"learning_rate": 6.470569437175001e-05,
"loss": 2.0062,
"step": 9580
},
{
"epoch": 362.2641509433962,
"grad_norm": 1.0991234719266971,
"learning_rate": 6.464622475041972e-05,
"loss": 1.9775,
"step": 9600
},
{
"epoch": 363.0188679245283,
"grad_norm": 1.198408129328553,
"learning_rate": 6.458666948102068e-05,
"loss": 1.9684,
"step": 9620
},
{
"epoch": 363.77358490566036,
"grad_norm": 1.2518491777745682,
"learning_rate": 6.452702879387625e-05,
"loss": 2.0052,
"step": 9640
},
{
"epoch": 364.52830188679246,
"grad_norm": 1.1372899117638453,
"learning_rate": 6.44673029196402e-05,
"loss": 2.005,
"step": 9660
},
{
"epoch": 365.2830188679245,
"grad_norm": 1.1412736472546972,
"learning_rate": 6.44074920892957e-05,
"loss": 1.9545,
"step": 9680
},
{
"epoch": 366.0377358490566,
"grad_norm": 1.295474241313066,
"learning_rate": 6.434759653415454e-05,
"loss": 1.9943,
"step": 9700
},
{
"epoch": 366.79245283018867,
"grad_norm": 1.6454682745260736,
"learning_rate": 6.42876164858561e-05,
"loss": 1.9831,
"step": 9720
},
{
"epoch": 367.54716981132077,
"grad_norm": 1.3152256044573012,
"learning_rate": 6.42275521763666e-05,
"loss": 1.9898,
"step": 9740
},
{
"epoch": 368.3018867924528,
"grad_norm": 1.1398547695205932,
"learning_rate": 6.416740383797806e-05,
"loss": 2.0018,
"step": 9760
},
{
"epoch": 369.0566037735849,
"grad_norm": 1.560025618616493,
"learning_rate": 6.410717170330754e-05,
"loss": 1.9774,
"step": 9780
},
{
"epoch": 369.811320754717,
"grad_norm": 1.1084036628854508,
"learning_rate": 6.404685600529614e-05,
"loss": 1.9898,
"step": 9800
},
{
"epoch": 370.5660377358491,
"grad_norm": 1.064043317797065,
"learning_rate": 6.398645697720813e-05,
"loss": 1.9683,
"step": 9820
},
{
"epoch": 371.3207547169811,
"grad_norm": 1.0561133254372814,
"learning_rate": 6.392597485263005e-05,
"loss": 1.9892,
"step": 9840
},
{
"epoch": 372.07547169811323,
"grad_norm": 1.8498750890966262,
"learning_rate": 6.386540986546981e-05,
"loss": 2.0028,
"step": 9860
},
{
"epoch": 372.8301886792453,
"grad_norm": 1.1839290834136853,
"learning_rate": 6.38047622499558e-05,
"loss": 2.0067,
"step": 9880
},
{
"epoch": 373.58490566037733,
"grad_norm": 1.0860526304824587,
"learning_rate": 6.374403224063593e-05,
"loss": 1.9771,
"step": 9900
},
{
"epoch": 374.33962264150944,
"grad_norm": 1.1746289875773082,
"learning_rate": 6.368322007237679e-05,
"loss": 1.9693,
"step": 9920
},
{
"epoch": 375.0943396226415,
"grad_norm": 1.3082714258157306,
"learning_rate": 6.36223259803627e-05,
"loss": 1.9737,
"step": 9940
},
{
"epoch": 375.8490566037736,
"grad_norm": 1.1647865395498773,
"learning_rate": 6.356135020009478e-05,
"loss": 1.9619,
"step": 9960
},
{
"epoch": 376.60377358490564,
"grad_norm": 0.947059541183795,
"learning_rate": 6.350029296739012e-05,
"loss": 1.975,
"step": 9980
},
{
"epoch": 377.35849056603774,
"grad_norm": 1.2248902808010191,
"learning_rate": 6.343915451838081e-05,
"loss": 1.9628,
"step": 10000
},
{
"epoch": 378.1132075471698,
"grad_norm": 1.097611241891744,
"learning_rate": 6.337793508951301e-05,
"loss": 1.9775,
"step": 10020
},
{
"epoch": 378.8679245283019,
"grad_norm": 1.2529669087878597,
"learning_rate": 6.331663491754607e-05,
"loss": 1.9468,
"step": 10040
},
{
"epoch": 379.62264150943395,
"grad_norm": 1.1767271144174725,
"learning_rate": 6.325525423955162e-05,
"loss": 1.9413,
"step": 10060
},
{
"epoch": 380.37735849056605,
"grad_norm": 1.282222785156654,
"learning_rate": 6.319379329291262e-05,
"loss": 1.9655,
"step": 10080
},
{
"epoch": 381.1320754716981,
"grad_norm": 0.9819686841799513,
"learning_rate": 6.313225231532246e-05,
"loss": 1.9537,
"step": 10100
},
{
"epoch": 381.8867924528302,
"grad_norm": 1.206003307363446,
"learning_rate": 6.307063154478407e-05,
"loss": 1.9387,
"step": 10120
},
{
"epoch": 382.64150943396226,
"grad_norm": 1.236739142400694,
"learning_rate": 6.300893121960891e-05,
"loss": 1.9478,
"step": 10140
},
{
"epoch": 383.39622641509436,
"grad_norm": 1.0017771325975895,
"learning_rate": 6.294715157841618e-05,
"loss": 1.9714,
"step": 10160
},
{
"epoch": 384.1509433962264,
"grad_norm": 1.0637999951499557,
"learning_rate": 6.28852928601318e-05,
"loss": 1.9905,
"step": 10180
},
{
"epoch": 384.9056603773585,
"grad_norm": 1.0944082795368726,
"learning_rate": 6.282335530398746e-05,
"loss": 1.9586,
"step": 10200
},
{
"epoch": 385.66037735849056,
"grad_norm": 1.0420947581782276,
"learning_rate": 6.276133914951982e-05,
"loss": 2.0008,
"step": 10220
},
{
"epoch": 386.41509433962267,
"grad_norm": 1.2531335945397626,
"learning_rate": 6.26992446365695e-05,
"loss": 1.9718,
"step": 10240
},
{
"epoch": 387.1698113207547,
"grad_norm": 1.0272789455614961,
"learning_rate": 6.26370720052801e-05,
"loss": 1.9741,
"step": 10260
},
{
"epoch": 387.92452830188677,
"grad_norm": 1.1543574176007045,
"learning_rate": 6.25748214960974e-05,
"loss": 1.9508,
"step": 10280
},
{
"epoch": 388.6792452830189,
"grad_norm": 1.123008926585049,
"learning_rate": 6.251249334976835e-05,
"loss": 1.9238,
"step": 10300
},
{
"epoch": 389.4339622641509,
"grad_norm": 1.1351605673087415,
"learning_rate": 6.245008780734015e-05,
"loss": 1.9379,
"step": 10320
},
{
"epoch": 390.188679245283,
"grad_norm": 1.139914151072252,
"learning_rate": 6.238760511015928e-05,
"loss": 1.9863,
"step": 10340
},
{
"epoch": 390.9433962264151,
"grad_norm": 1.3069434137417522,
"learning_rate": 6.232504549987069e-05,
"loss": 1.9569,
"step": 10360
},
{
"epoch": 391.6981132075472,
"grad_norm": 1.7598014364780348,
"learning_rate": 6.22624092184167e-05,
"loss": 1.9389,
"step": 10380
},
{
"epoch": 392.45283018867923,
"grad_norm": 1.0862334208555093,
"learning_rate": 6.21996965080362e-05,
"loss": 1.9744,
"step": 10400
},
{
"epoch": 393.20754716981133,
"grad_norm": 1.1400427784758083,
"learning_rate": 6.213690761126365e-05,
"loss": 1.9563,
"step": 10420
},
{
"epoch": 393.9622641509434,
"grad_norm": 1.171092319320692,
"learning_rate": 6.207404277092816e-05,
"loss": 1.9268,
"step": 10440
},
{
"epoch": 394.7169811320755,
"grad_norm": 1.2187674621534166,
"learning_rate": 6.201110223015247e-05,
"loss": 1.9141,
"step": 10460
},
{
"epoch": 395.47169811320754,
"grad_norm": 1.1182747577783947,
"learning_rate": 6.19480862323522e-05,
"loss": 1.9498,
"step": 10480
},
{
"epoch": 396.22641509433964,
"grad_norm": 1.2189637302318261,
"learning_rate": 6.188499502123471e-05,
"loss": 1.9563,
"step": 10500
},
{
"epoch": 396.9811320754717,
"grad_norm": 1.0928304287739772,
"learning_rate": 6.18218288407983e-05,
"loss": 1.976,
"step": 10520
},
{
"epoch": 397.7358490566038,
"grad_norm": 1.0366879822409767,
"learning_rate": 6.17585879353311e-05,
"loss": 1.9804,
"step": 10540
},
{
"epoch": 398.49056603773585,
"grad_norm": 1.153371078643115,
"learning_rate": 6.169527254941035e-05,
"loss": 1.987,
"step": 10560
},
{
"epoch": 399.24528301886795,
"grad_norm": 1.2467206603942558,
"learning_rate": 6.163188292790129e-05,
"loss": 1.958,
"step": 10580
},
{
"epoch": 400.0,
"grad_norm": 1.222097823579558,
"learning_rate": 6.156841931595623e-05,
"loss": 1.9838,
"step": 10600
},
{
"epoch": 400.75471698113205,
"grad_norm": 1.1722193895632427,
"learning_rate": 6.150488195901367e-05,
"loss": 1.9496,
"step": 10620
},
{
"epoch": 401.50943396226415,
"grad_norm": 1.7976530024431303,
"learning_rate": 6.144127110279726e-05,
"loss": 1.9869,
"step": 10640
},
{
"epoch": 402.2641509433962,
"grad_norm": 1.1341428853515279,
"learning_rate": 6.137758699331498e-05,
"loss": 1.9235,
"step": 10660
},
{
"epoch": 403.0188679245283,
"grad_norm": 1.107959509965475,
"learning_rate": 6.131382987685803e-05,
"loss": 1.927,
"step": 10680
},
{
"epoch": 403.77358490566036,
"grad_norm": 1.8525780404729881,
"learning_rate": 6.125000000000001e-05,
"loss": 1.9487,
"step": 10700
},
{
"epoch": 404.52830188679246,
"grad_norm": 1.1448557605131082,
"learning_rate": 6.118609760959587e-05,
"loss": 1.9547,
"step": 10720
},
{
"epoch": 405.2830188679245,
"grad_norm": 1.1150883211805585,
"learning_rate": 6.112212295278103e-05,
"loss": 1.9487,
"step": 10740
},
{
"epoch": 406.0377358490566,
"grad_norm": 1.149956522288425,
"learning_rate": 6.105807627697039e-05,
"loss": 1.9614,
"step": 10760
},
{
"epoch": 406.79245283018867,
"grad_norm": 1.1217970508214505,
"learning_rate": 6.099395782985736e-05,
"loss": 1.9555,
"step": 10780
},
{
"epoch": 407.54716981132077,
"grad_norm": 1.261444831314206,
"learning_rate": 6.0929767859412914e-05,
"loss": 1.9527,
"step": 10800
},
{
"epoch": 408.3018867924528,
"grad_norm": 1.2610523391632782,
"learning_rate": 6.086550661388466e-05,
"loss": 1.9321,
"step": 10820
},
{
"epoch": 409.0566037735849,
"grad_norm": 1.1090357115444625,
"learning_rate": 6.080117434179586e-05,
"loss": 1.9211,
"step": 10840
},
{
"epoch": 409.811320754717,
"grad_norm": 0.9790706595618122,
"learning_rate": 6.0736771291944384e-05,
"loss": 1.9203,
"step": 10860
},
{
"epoch": 410.5660377358491,
"grad_norm": 1.2166651155014474,
"learning_rate": 6.067229771340195e-05,
"loss": 1.9323,
"step": 10880
},
{
"epoch": 411.3207547169811,
"grad_norm": 1.433284831152631,
"learning_rate": 6.0607753855512944e-05,
"loss": 1.9623,
"step": 10900
},
{
"epoch": 412.07547169811323,
"grad_norm": 1.5219003618009672,
"learning_rate": 6.054313996789358e-05,
"loss": 1.9198,
"step": 10920
},
{
"epoch": 412.8301886792453,
"grad_norm": 1.1498060228780786,
"learning_rate": 6.047845630043091e-05,
"loss": 1.9487,
"step": 10940
},
{
"epoch": 413.58490566037733,
"grad_norm": 1.1202482322074203,
"learning_rate": 6.041370310328184e-05,
"loss": 1.9067,
"step": 10960
},
{
"epoch": 414.33962264150944,
"grad_norm": 1.2430430079147141,
"learning_rate": 6.0348880626872184e-05,
"loss": 1.9382,
"step": 10980
},
{
"epoch": 415.0943396226415,
"grad_norm": 1.0869359699622836,
"learning_rate": 6.028398912189569e-05,
"loss": 1.9611,
"step": 11000
},
{
"epoch": 415.8490566037736,
"grad_norm": 1.2526066034095944,
"learning_rate": 6.0219028839313045e-05,
"loss": 1.9644,
"step": 11020
},
{
"epoch": 416.60377358490564,
"grad_norm": 1.3228258582837578,
"learning_rate": 6.015400003035096e-05,
"loss": 1.9401,
"step": 11040
},
{
"epoch": 417.35849056603774,
"grad_norm": 1.9762624742364299,
"learning_rate": 6.008890294650111e-05,
"loss": 1.9465,
"step": 11060
},
{
"epoch": 418.1132075471698,
"grad_norm": 1.1424506198639062,
"learning_rate": 6.0023737839519284e-05,
"loss": 1.9439,
"step": 11080
},
{
"epoch": 418.8679245283019,
"grad_norm": 1.2107658482065982,
"learning_rate": 5.995850496142429e-05,
"loss": 1.9342,
"step": 11100
},
{
"epoch": 419.62264150943395,
"grad_norm": 1.1945042871517195,
"learning_rate": 5.989320456449705e-05,
"loss": 1.9463,
"step": 11120
},
{
"epoch": 420.37735849056605,
"grad_norm": 1.4521278706471037,
"learning_rate": 5.9827836901279616e-05,
"loss": 1.9097,
"step": 11140
},
{
"epoch": 421.1320754716981,
"grad_norm": 1.3941560968372226,
"learning_rate": 5.97624022245742e-05,
"loss": 1.9332,
"step": 11160
},
{
"epoch": 421.8867924528302,
"grad_norm": 1.39870585850457,
"learning_rate": 5.969690078744211e-05,
"loss": 1.9458,
"step": 11180
},
{
"epoch": 422.64150943396226,
"grad_norm": 1.0274165784628992,
"learning_rate": 5.963133284320292e-05,
"loss": 1.9365,
"step": 11200
},
{
"epoch": 423.39622641509436,
"grad_norm": 1.1694905737535597,
"learning_rate": 5.956569864543338e-05,
"loss": 1.8966,
"step": 11220
},
{
"epoch": 424.1509433962264,
"grad_norm": 2.28522960541773,
"learning_rate": 5.9499998447966484e-05,
"loss": 1.9332,
"step": 11240
},
{
"epoch": 424.9056603773585,
"grad_norm": 1.1252246816652476,
"learning_rate": 5.943423250489044e-05,
"loss": 1.9308,
"step": 11260
},
{
"epoch": 425.66037735849056,
"grad_norm": 1.132211561056973,
"learning_rate": 5.9368401070547756e-05,
"loss": 1.9221,
"step": 11280
},
{
"epoch": 426.41509433962267,
"grad_norm": 1.0848974124812198,
"learning_rate": 5.93025043995342e-05,
"loss": 1.9374,
"step": 11300
},
{
"epoch": 427.1698113207547,
"grad_norm": 1.0363649309093041,
"learning_rate": 5.9236542746697845e-05,
"loss": 1.9461,
"step": 11320
},
{
"epoch": 427.92452830188677,
"grad_norm": 1.1928905874896651,
"learning_rate": 5.9170516367138065e-05,
"loss": 1.9378,
"step": 11340
},
{
"epoch": 428.6792452830189,
"grad_norm": 1.1106230737067035,
"learning_rate": 5.910442551620457e-05,
"loss": 1.942,
"step": 11360
},
{
"epoch": 429.4339622641509,
"grad_norm": 1.1576265708604865,
"learning_rate": 5.903827044949638e-05,
"loss": 1.9471,
"step": 11380
},
{
"epoch": 430.188679245283,
"grad_norm": 1.1174079944741127,
"learning_rate": 5.897205142286091e-05,
"loss": 1.8922,
"step": 11400
},
{
"epoch": 430.9433962264151,
"grad_norm": 1.3085632343404145,
"learning_rate": 5.890576869239289e-05,
"loss": 1.9458,
"step": 11420
},
{
"epoch": 431.6981132075472,
"grad_norm": 1.2863747903344196,
"learning_rate": 5.883942251443342e-05,
"loss": 1.9099,
"step": 11440
},
{
"epoch": 432.45283018867923,
"grad_norm": 1.0551943593908828,
"learning_rate": 5.877301314556899e-05,
"loss": 1.9141,
"step": 11460
},
{
"epoch": 433.20754716981133,
"grad_norm": 1.086077987508793,
"learning_rate": 5.870654084263047e-05,
"loss": 1.96,
"step": 11480
},
{
"epoch": 433.9622641509434,
"grad_norm": 1.3656558008500363,
"learning_rate": 5.864000586269215e-05,
"loss": 1.904,
"step": 11500
},
{
"epoch": 434.7169811320755,
"grad_norm": 1.4439265227942644,
"learning_rate": 5.8573408463070655e-05,
"loss": 1.9273,
"step": 11520
},
{
"epoch": 435.47169811320754,
"grad_norm": 1.3611886653187657,
"learning_rate": 5.850674890132405e-05,
"loss": 1.9034,
"step": 11540
},
{
"epoch": 436.22641509433964,
"grad_norm": 1.3616986059829845,
"learning_rate": 5.844002743525081e-05,
"loss": 1.9143,
"step": 11560
},
{
"epoch": 436.9811320754717,
"grad_norm": 1.1127209994732485,
"learning_rate": 5.8373244322888796e-05,
"loss": 1.9467,
"step": 11580
},
{
"epoch": 437.7358490566038,
"grad_norm": 1.2452581872873123,
"learning_rate": 5.83063998225143e-05,
"loss": 1.946,
"step": 11600
},
{
"epoch": 438.49056603773585,
"grad_norm": 1.169219637417814,
"learning_rate": 5.823949419264102e-05,
"loss": 1.9057,
"step": 11620
},
{
"epoch": 439.24528301886795,
"grad_norm": 1.3149994286787028,
"learning_rate": 5.817252769201905e-05,
"loss": 1.8922,
"step": 11640
},
{
"epoch": 440.0,
"grad_norm": 1.3111574851574335,
"learning_rate": 5.81055005796339e-05,
"loss": 1.9222,
"step": 11660
},
{
"epoch": 440.75471698113205,
"grad_norm": 1.187457856172297,
"learning_rate": 5.803841311470551e-05,
"loss": 1.9188,
"step": 11680
},
{
"epoch": 441.50943396226415,
"grad_norm": 1.057264779435906,
"learning_rate": 5.7971265556687206e-05,
"loss": 1.9185,
"step": 11700
},
{
"epoch": 442.2641509433962,
"grad_norm": 1.1403855029477634,
"learning_rate": 5.790405816526473e-05,
"loss": 1.9328,
"step": 11720
},
{
"epoch": 443.0188679245283,
"grad_norm": 1.2270148994812622,
"learning_rate": 5.78367912003552e-05,
"loss": 1.8952,
"step": 11740
},
{
"epoch": 443.77358490566036,
"grad_norm": 1.0385291337101263,
"learning_rate": 5.776946492210618e-05,
"loss": 1.9042,
"step": 11760
},
{
"epoch": 444.52830188679246,
"grad_norm": 1.2996045459665522,
"learning_rate": 5.770207959089455e-05,
"loss": 1.9373,
"step": 11780
},
{
"epoch": 445.2830188679245,
"grad_norm": 1.1405006769622614,
"learning_rate": 5.763463546732563e-05,
"loss": 1.9035,
"step": 11800
},
{
"epoch": 446.0377358490566,
"grad_norm": 1.2182586988416257,
"learning_rate": 5.756713281223206e-05,
"loss": 1.936,
"step": 11820
},
{
"epoch": 446.79245283018867,
"grad_norm": 1.097561953783009,
"learning_rate": 5.74995718866729e-05,
"loss": 1.9057,
"step": 11840
},
{
"epoch": 447.54716981132077,
"grad_norm": 1.0690078287157365,
"learning_rate": 5.743195295193255e-05,
"loss": 1.9074,
"step": 11860
},
{
"epoch": 448.3018867924528,
"grad_norm": 1.1127790113128593,
"learning_rate": 5.736427626951971e-05,
"loss": 1.9269,
"step": 11880
},
{
"epoch": 449.0566037735849,
"grad_norm": 1.0521548682001445,
"learning_rate": 5.729654210116646e-05,
"loss": 1.897,
"step": 11900
},
{
"epoch": 449.811320754717,
"grad_norm": 1.0831322365716964,
"learning_rate": 5.7228750708827196e-05,
"loss": 1.9019,
"step": 11920
},
{
"epoch": 450.5660377358491,
"grad_norm": 1.113425539515294,
"learning_rate": 5.71609023546776e-05,
"loss": 1.8995,
"step": 11940
},
{
"epoch": 451.3207547169811,
"grad_norm": 1.1378527380008467,
"learning_rate": 5.709299730111367e-05,
"loss": 1.9112,
"step": 11960
},
{
"epoch": 452.07547169811323,
"grad_norm": 1.2308344759482057,
"learning_rate": 5.702503581075065e-05,
"loss": 1.8869,
"step": 11980
},
{
"epoch": 452.8301886792453,
"grad_norm": 1.3869181367868268,
"learning_rate": 5.6957018146422106e-05,
"loss": 1.9092,
"step": 12000
},
{
"epoch": 453.58490566037733,
"grad_norm": 1.1702979518774306,
"learning_rate": 5.688894457117877e-05,
"loss": 1.8944,
"step": 12020
},
{
"epoch": 454.33962264150944,
"grad_norm": 1.2974690219427283,
"learning_rate": 5.6820815348287674e-05,
"loss": 1.8794,
"step": 12040
},
{
"epoch": 455.0943396226415,
"grad_norm": 1.3757370848375583,
"learning_rate": 5.675263074123103e-05,
"loss": 1.9208,
"step": 12060
},
{
"epoch": 455.8490566037736,
"grad_norm": 1.3314963474728592,
"learning_rate": 5.668439101370524e-05,
"loss": 1.8823,
"step": 12080
},
{
"epoch": 456.60377358490564,
"grad_norm": 1.1525239716029143,
"learning_rate": 5.6616096429619885e-05,
"loss": 1.8778,
"step": 12100
},
{
"epoch": 457.35849056603774,
"grad_norm": 1.1391429331630094,
"learning_rate": 5.6547747253096713e-05,
"loss": 1.8973,
"step": 12120
},
{
"epoch": 458.1132075471698,
"grad_norm": 1.2813875070982645,
"learning_rate": 5.647934374846856e-05,
"loss": 1.9037,
"step": 12140
},
{
"epoch": 458.8679245283019,
"grad_norm": 1.130130379386682,
"learning_rate": 5.641088618027841e-05,
"loss": 1.8946,
"step": 12160
},
{
"epoch": 459.62264150943395,
"grad_norm": 1.189098976296786,
"learning_rate": 5.6342374813278305e-05,
"loss": 1.9122,
"step": 12180
},
{
"epoch": 460.37735849056605,
"grad_norm": 1.18982288351709,
"learning_rate": 5.627380991242839e-05,
"loss": 1.8893,
"step": 12200
},
{
"epoch": 461.1320754716981,
"grad_norm": 1.3440462024222728,
"learning_rate": 5.6205191742895787e-05,
"loss": 1.8879,
"step": 12220
},
{
"epoch": 461.8867924528302,
"grad_norm": 1.0998628162432096,
"learning_rate": 5.613652057005367e-05,
"loss": 1.8911,
"step": 12240
},
{
"epoch": 462.64150943396226,
"grad_norm": 1.0660994627393063,
"learning_rate": 5.6067796659480196e-05,
"loss": 1.9055,
"step": 12260
},
{
"epoch": 463.39622641509436,
"grad_norm": 1.7426680752228556,
"learning_rate": 5.599902027695745e-05,
"loss": 1.897,
"step": 12280
},
{
"epoch": 464.1509433962264,
"grad_norm": 1.388841332022157,
"learning_rate": 5.593019168847049e-05,
"loss": 1.8812,
"step": 12300
},
{
"epoch": 464.9056603773585,
"grad_norm": 1.2274558384609464,
"learning_rate": 5.586131116020621e-05,
"loss": 1.8496,
"step": 12320
},
{
"epoch": 465.66037735849056,
"grad_norm": 1.1945002690405846,
"learning_rate": 5.5792378958552456e-05,
"loss": 1.9146,
"step": 12340
},
{
"epoch": 466.41509433962267,
"grad_norm": 1.1629769495886029,
"learning_rate": 5.5723395350096866e-05,
"loss": 1.8734,
"step": 12360
},
{
"epoch": 467.1698113207547,
"grad_norm": 1.1703423211235366,
"learning_rate": 5.565436060162589e-05,
"loss": 1.8882,
"step": 12380
},
{
"epoch": 467.92452830188677,
"grad_norm": 1.3904930914694782,
"learning_rate": 5.5585274980123765e-05,
"loss": 1.8794,
"step": 12400
},
{
"epoch": 468.6792452830189,
"grad_norm": 1.1043102032574945,
"learning_rate": 5.551613875277148e-05,
"loss": 1.888,
"step": 12420
},
{
"epoch": 469.4339622641509,
"grad_norm": 1.019172984960956,
"learning_rate": 5.5446952186945716e-05,
"loss": 1.8887,
"step": 12440
},
{
"epoch": 470.188679245283,
"grad_norm": 1.2815784609995193,
"learning_rate": 5.537771555021785e-05,
"loss": 1.9026,
"step": 12460
},
{
"epoch": 470.9433962264151,
"grad_norm": 1.0452909524777938,
"learning_rate": 5.53084291103529e-05,
"loss": 1.8688,
"step": 12480
},
{
"epoch": 471.6981132075472,
"grad_norm": 1.2824929707840547,
"learning_rate": 5.5239093135308484e-05,
"loss": 1.8568,
"step": 12500
},
{
"epoch": 472.45283018867923,
"grad_norm": 1.0473918662270072,
"learning_rate": 5.516970789323382e-05,
"loss": 1.8962,
"step": 12520
},
{
"epoch": 473.20754716981133,
"grad_norm": 1.1551860073406197,
"learning_rate": 5.5100273652468596e-05,
"loss": 1.9053,
"step": 12540
},
{
"epoch": 473.9622641509434,
"grad_norm": 1.2315884678620779,
"learning_rate": 5.50307906815421e-05,
"loss": 1.8802,
"step": 12560
},
{
"epoch": 474.7169811320755,
"grad_norm": 1.2036057101238689,
"learning_rate": 5.496125924917195e-05,
"loss": 1.8848,
"step": 12580
},
{
"epoch": 475.47169811320754,
"grad_norm": 1.1443042979106692,
"learning_rate": 5.4891679624263313e-05,
"loss": 1.8993,
"step": 12600
},
{
"epoch": 476.22641509433964,
"grad_norm": 1.1112985130684456,
"learning_rate": 5.482205207590763e-05,
"loss": 1.8997,
"step": 12620
},
{
"epoch": 476.9811320754717,
"grad_norm": 1.1198907315048803,
"learning_rate": 5.475237687338175e-05,
"loss": 1.9204,
"step": 12640
},
{
"epoch": 477.7358490566038,
"grad_norm": 1.0505243476691362,
"learning_rate": 5.468265428614679e-05,
"loss": 1.8824,
"step": 12660
},
{
"epoch": 478.49056603773585,
"grad_norm": 1.1618158349057395,
"learning_rate": 5.461288458384711e-05,
"loss": 1.8675,
"step": 12680
},
{
"epoch": 479.24528301886795,
"grad_norm": 1.310696647632245,
"learning_rate": 5.454306803630931e-05,
"loss": 1.8617,
"step": 12700
},
{
"epoch": 480.0,
"grad_norm": 1.2853008412145361,
"learning_rate": 5.447320491354114e-05,
"loss": 1.8798,
"step": 12720
},
{
"epoch": 480.75471698113205,
"grad_norm": 1.2035604713641803,
"learning_rate": 5.440329548573049e-05,
"loss": 1.8505,
"step": 12740
},
{
"epoch": 481.50943396226415,
"grad_norm": 1.301768301418178,
"learning_rate": 5.433334002324431e-05,
"loss": 1.8849,
"step": 12760
},
{
"epoch": 482.2641509433962,
"grad_norm": 1.0741158531319273,
"learning_rate": 5.426333879662761e-05,
"loss": 1.8362,
"step": 12780
},
{
"epoch": 483.0188679245283,
"grad_norm": 1.2118720683926874,
"learning_rate": 5.419329207660237e-05,
"loss": 1.8811,
"step": 12800
},
{
"epoch": 483.77358490566036,
"grad_norm": 1.295829194970654,
"learning_rate": 5.412320013406651e-05,
"loss": 1.8473,
"step": 12820
},
{
"epoch": 484.52830188679246,
"grad_norm": 1.2658203604478202,
"learning_rate": 5.405306324009282e-05,
"loss": 1.8728,
"step": 12840
},
{
"epoch": 485.2830188679245,
"grad_norm": 1.2195390339098875,
"learning_rate": 5.3982881665928015e-05,
"loss": 1.8704,
"step": 12860
},
{
"epoch": 486.0377358490566,
"grad_norm": 1.067227068131729,
"learning_rate": 5.391265568299149e-05,
"loss": 1.8619,
"step": 12880
},
{
"epoch": 486.79245283018867,
"grad_norm": 1.3306442274846357,
"learning_rate": 5.384238556287451e-05,
"loss": 1.8638,
"step": 12900
},
{
"epoch": 487.54716981132077,
"grad_norm": 1.2531810114251472,
"learning_rate": 5.377207157733893e-05,
"loss": 1.8839,
"step": 12920
},
{
"epoch": 488.3018867924528,
"grad_norm": 1.0879029191456078,
"learning_rate": 5.370171399831631e-05,
"loss": 1.866,
"step": 12940
},
{
"epoch": 489.0566037735849,
"grad_norm": 1.1769881515511749,
"learning_rate": 5.363131309790678e-05,
"loss": 1.8253,
"step": 12960
},
{
"epoch": 489.811320754717,
"grad_norm": 1.3614975573612427,
"learning_rate": 5.356086914837802e-05,
"loss": 1.8487,
"step": 12980
},
{
"epoch": 490.5660377358491,
"grad_norm": 1.5342718531352588,
"learning_rate": 5.349038242216419e-05,
"loss": 1.847,
"step": 13000
},
{
"epoch": 491.3207547169811,
"grad_norm": 1.1571547119310825,
"learning_rate": 5.341985319186489e-05,
"loss": 1.8822,
"step": 13020
},
{
"epoch": 492.07547169811323,
"grad_norm": 1.1739881074209173,
"learning_rate": 5.33492817302441e-05,
"loss": 1.8531,
"step": 13040
},
{
"epoch": 492.8301886792453,
"grad_norm": 1.1934573145337144,
"learning_rate": 5.3278668310229125e-05,
"loss": 1.8986,
"step": 13060
},
{
"epoch": 493.58490566037733,
"grad_norm": 2.343948986647593,
"learning_rate": 5.320801320490955e-05,
"loss": 1.8935,
"step": 13080
},
{
"epoch": 494.33962264150944,
"grad_norm": 1.0764970314512263,
"learning_rate": 5.3137316687536136e-05,
"loss": 1.854,
"step": 13100
},
{
"epoch": 495.0943396226415,
"grad_norm": 1.0129335749841757,
"learning_rate": 5.3066579031519824e-05,
"loss": 1.874,
"step": 13120
},
{
"epoch": 495.8490566037736,
"grad_norm": 1.3577114498479963,
"learning_rate": 5.299580051043069e-05,
"loss": 1.8534,
"step": 13140
},
{
"epoch": 496.60377358490564,
"grad_norm": 1.1849380554040083,
"learning_rate": 5.292498139799678e-05,
"loss": 1.8705,
"step": 13160
},
{
"epoch": 497.35849056603774,
"grad_norm": 1.1290155132472126,
"learning_rate": 5.2854121968103185e-05,
"loss": 1.8659,
"step": 13180
},
{
"epoch": 498.1132075471698,
"grad_norm": 1.3627686926052522,
"learning_rate": 5.278322249479088e-05,
"loss": 1.8686,
"step": 13200
},
{
"epoch": 498.8679245283019,
"grad_norm": 1.147585604555274,
"learning_rate": 5.271228325225573e-05,
"loss": 1.8301,
"step": 13220
},
{
"epoch": 499.62264150943395,
"grad_norm": 1.124456551859716,
"learning_rate": 5.264130451484736e-05,
"loss": 1.846,
"step": 13240
},
{
"epoch": 500.37735849056605,
"grad_norm": 1.1328557419125425,
"learning_rate": 5.257028655706819e-05,
"loss": 1.8489,
"step": 13260
},
{
"epoch": 501.1320754716981,
"grad_norm": 1.3248366741093285,
"learning_rate": 5.249922965357231e-05,
"loss": 1.847,
"step": 13280
},
{
"epoch": 501.8867924528302,
"grad_norm": 1.2987470821979115,
"learning_rate": 5.24281340791644e-05,
"loss": 1.8598,
"step": 13300
},
{
"epoch": 502.64150943396226,
"grad_norm": 1.2954015666799925,
"learning_rate": 5.235700010879869e-05,
"loss": 1.8144,
"step": 13320
},
{
"epoch": 503.39622641509436,
"grad_norm": 1.14457084913068,
"learning_rate": 5.228582801757796e-05,
"loss": 1.8666,
"step": 13340
},
{
"epoch": 504.1509433962264,
"grad_norm": 1.1877213110343792,
"learning_rate": 5.221461808075237e-05,
"loss": 1.8637,
"step": 13360
},
{
"epoch": 504.9056603773585,
"grad_norm": 1.0687096720310838,
"learning_rate": 5.214337057371846e-05,
"loss": 1.8639,
"step": 13380
},
{
"epoch": 505.66037735849056,
"grad_norm": 1.1296264305151373,
"learning_rate": 5.207208577201805e-05,
"loss": 1.8508,
"step": 13400
},
{
"epoch": 506.41509433962267,
"grad_norm": 1.4245030753661052,
"learning_rate": 5.200076395133721e-05,
"loss": 1.8328,
"step": 13420
},
{
"epoch": 507.1698113207547,
"grad_norm": 1.3736962730435212,
"learning_rate": 5.1929405387505185e-05,
"loss": 1.8402,
"step": 13440
},
{
"epoch": 507.92452830188677,
"grad_norm": 1.3874806329791736,
"learning_rate": 5.185801035649329e-05,
"loss": 1.8392,
"step": 13460
},
{
"epoch": 508.6792452830189,
"grad_norm": 1.2993168124302985,
"learning_rate": 5.1786579134413916e-05,
"loss": 1.8357,
"step": 13480
},
{
"epoch": 509.4339622641509,
"grad_norm": 1.1615849238599296,
"learning_rate": 5.171511199751936e-05,
"loss": 1.8602,
"step": 13500
},
{
"epoch": 510.188679245283,
"grad_norm": 1.313961870036688,
"learning_rate": 5.164360922220089e-05,
"loss": 1.8276,
"step": 13520
},
{
"epoch": 510.9433962264151,
"grad_norm": 1.240911570140835,
"learning_rate": 5.157207108498754e-05,
"loss": 1.83,
"step": 13540
},
{
"epoch": 511.6981132075472,
"grad_norm": 1.2739381058558579,
"learning_rate": 5.1500497862545134e-05,
"loss": 1.864,
"step": 13560
},
{
"epoch": 512.4528301886793,
"grad_norm": 1.3641387795362538,
"learning_rate": 5.142888983167516e-05,
"loss": 1.9016,
"step": 13580
},
{
"epoch": 513.2075471698113,
"grad_norm": 1.233949050539118,
"learning_rate": 5.135724726931374e-05,
"loss": 1.8224,
"step": 13600
},
{
"epoch": 513.9622641509434,
"grad_norm": 1.2764553522178392,
"learning_rate": 5.128557045253056e-05,
"loss": 1.8489,
"step": 13620
},
{
"epoch": 514.7169811320755,
"grad_norm": 1.0847794881407822,
"learning_rate": 5.121385965852773e-05,
"loss": 1.8433,
"step": 13640
},
{
"epoch": 515.4716981132076,
"grad_norm": 1.211639546404476,
"learning_rate": 5.114211516463883e-05,
"loss": 1.8592,
"step": 13660
},
{
"epoch": 516.2264150943396,
"grad_norm": 1.6499382505803508,
"learning_rate": 5.1070337248327704e-05,
"loss": 1.8491,
"step": 13680
},
{
"epoch": 516.9811320754717,
"grad_norm": 1.1415154218905448,
"learning_rate": 5.0998526187187506e-05,
"loss": 1.8263,
"step": 13700
},
{
"epoch": 517.7358490566038,
"grad_norm": 1.2931592721596668,
"learning_rate": 5.092668225893955e-05,
"loss": 1.8341,
"step": 13720
},
{
"epoch": 518.4905660377359,
"grad_norm": 1.1289936456910783,
"learning_rate": 5.0854805741432266e-05,
"loss": 1.8256,
"step": 13740
},
{
"epoch": 519.2452830188679,
"grad_norm": 1.1568681178648177,
"learning_rate": 5.078289691264009e-05,
"loss": 1.8268,
"step": 13760
},
{
"epoch": 520.0,
"grad_norm": 1.2075151796344337,
"learning_rate": 5.071095605066247e-05,
"loss": 1.8342,
"step": 13780
},
{
"epoch": 520.7547169811321,
"grad_norm": 1.41061431054736,
"learning_rate": 5.063898343372271e-05,
"loss": 1.8569,
"step": 13800
},
{
"epoch": 521.5094339622641,
"grad_norm": 1.7141184097601845,
"learning_rate": 5.0566979340166915e-05,
"loss": 1.8447,
"step": 13820
},
{
"epoch": 522.2641509433962,
"grad_norm": 1.1912730571129804,
"learning_rate": 5.0494944048462946e-05,
"loss": 1.8632,
"step": 13840
},
{
"epoch": 523.0188679245283,
"grad_norm": 1.2784159482259496,
"learning_rate": 5.042287783719931e-05,
"loss": 1.8293,
"step": 13860
},
{
"epoch": 523.7735849056604,
"grad_norm": 1.1444265949319492,
"learning_rate": 5.0350780985084076e-05,
"loss": 1.8423,
"step": 13880
},
{
"epoch": 524.5283018867924,
"grad_norm": 1.1366776283872817,
"learning_rate": 5.027865377094383e-05,
"loss": 1.8284,
"step": 13900
},
{
"epoch": 525.2830188679245,
"grad_norm": 1.2870871198292675,
"learning_rate": 5.020649647372258e-05,
"loss": 1.8313,
"step": 13920
},
{
"epoch": 526.0377358490566,
"grad_norm": 1.8138208437079086,
"learning_rate": 5.013430937248066e-05,
"loss": 1.8382,
"step": 13940
},
{
"epoch": 526.7924528301887,
"grad_norm": 1.319578877513452,
"learning_rate": 5.00620927463937e-05,
"loss": 1.8343,
"step": 13960
},
{
"epoch": 527.5471698113207,
"grad_norm": 1.2254503656584737,
"learning_rate": 4.998984687475148e-05,
"loss": 1.8439,
"step": 13980
},
{
"epoch": 528.3018867924528,
"grad_norm": 1.0900572753736815,
"learning_rate": 4.9917572036956896e-05,
"loss": 1.8339,
"step": 14000
},
{
"epoch": 529.0566037735849,
"grad_norm": 1.3672093328811397,
"learning_rate": 4.984526851252489e-05,
"loss": 1.8269,
"step": 14020
},
{
"epoch": 529.811320754717,
"grad_norm": 1.0474703180578433,
"learning_rate": 4.97729365810813e-05,
"loss": 1.8278,
"step": 14040
},
{
"epoch": 530.566037735849,
"grad_norm": 1.302303693187968,
"learning_rate": 4.9700576522361875e-05,
"loss": 1.8406,
"step": 14060
},
{
"epoch": 531.3207547169811,
"grad_norm": 1.3351319231004828,
"learning_rate": 4.96281886162111e-05,
"loss": 1.805,
"step": 14080
},
{
"epoch": 532.0754716981132,
"grad_norm": 1.7423062973900807,
"learning_rate": 4.955577314258118e-05,
"loss": 1.8021,
"step": 14100
},
{
"epoch": 532.8301886792453,
"grad_norm": 1.0851524592672839,
"learning_rate": 4.9483330381530944e-05,
"loss": 1.8376,
"step": 14120
},
{
"epoch": 533.5849056603773,
"grad_norm": 1.110982412101906,
"learning_rate": 4.941086061322473e-05,
"loss": 1.8468,
"step": 14140
},
{
"epoch": 534.3396226415094,
"grad_norm": 1.1481042439437046,
"learning_rate": 4.933836411793133e-05,
"loss": 1.8131,
"step": 14160
},
{
"epoch": 535.0943396226415,
"grad_norm": 1.1504371756112235,
"learning_rate": 4.926584117602288e-05,
"loss": 1.8081,
"step": 14180
},
{
"epoch": 535.8490566037735,
"grad_norm": 1.1403864383961178,
"learning_rate": 4.919329206797387e-05,
"loss": 1.823,
"step": 14200
},
{
"epoch": 536.6037735849056,
"grad_norm": 1.2962348904995422,
"learning_rate": 4.912071707435988e-05,
"loss": 1.8187,
"step": 14220
},
{
"epoch": 537.3584905660377,
"grad_norm": 1.1885752096952027,
"learning_rate": 4.904811647585668e-05,
"loss": 1.8256,
"step": 14240
},
{
"epoch": 538.1132075471698,
"grad_norm": 1.064497747677543,
"learning_rate": 4.897549055323902e-05,
"loss": 1.8,
"step": 14260
},
{
"epoch": 538.8679245283018,
"grad_norm": 1.4469124816185257,
"learning_rate": 4.8902839587379614e-05,
"loss": 1.8365,
"step": 14280
},
{
"epoch": 539.622641509434,
"grad_norm": 1.0326597719869466,
"learning_rate": 4.8830163859248014e-05,
"loss": 1.812,
"step": 14300
},
{
"epoch": 540.377358490566,
"grad_norm": 1.261127094091647,
"learning_rate": 4.875746364990955e-05,
"loss": 1.7936,
"step": 14320
},
{
"epoch": 541.1320754716982,
"grad_norm": 1.6850662159573848,
"learning_rate": 4.8684739240524185e-05,
"loss": 1.8039,
"step": 14340
},
{
"epoch": 541.8867924528302,
"grad_norm": 1.1719859164333604,
"learning_rate": 4.861199091234556e-05,
"loss": 1.7995,
"step": 14360
},
{
"epoch": 542.6415094339623,
"grad_norm": 1.1168812884827573,
"learning_rate": 4.853921894671973e-05,
"loss": 1.804,
"step": 14380
},
{
"epoch": 543.3962264150944,
"grad_norm": 1.5041434010962127,
"learning_rate": 4.846642362508422e-05,
"loss": 1.8042,
"step": 14400
},
{
"epoch": 544.1509433962265,
"grad_norm": 1.2922119772360392,
"learning_rate": 4.8393605228966854e-05,
"loss": 1.8176,
"step": 14420
},
{
"epoch": 544.9056603773585,
"grad_norm": 1.316092813395267,
"learning_rate": 4.832076403998472e-05,
"loss": 1.8324,
"step": 14440
},
{
"epoch": 545.6603773584906,
"grad_norm": 1.148925533679318,
"learning_rate": 4.8247900339843045e-05,
"loss": 1.8249,
"step": 14460
},
{
"epoch": 546.4150943396227,
"grad_norm": 1.3351586320323485,
"learning_rate": 4.817501441033409e-05,
"loss": 1.8023,
"step": 14480
},
{
"epoch": 547.1698113207547,
"grad_norm": 1.4554583529380825,
"learning_rate": 4.810210653333613e-05,
"loss": 1.782,
"step": 14500
},
{
"epoch": 547.9245283018868,
"grad_norm": 1.2418737812043639,
"learning_rate": 4.802917699081225e-05,
"loss": 1.7981,
"step": 14520
},
{
"epoch": 548.6792452830189,
"grad_norm": 1.1837142285238051,
"learning_rate": 4.795622606480942e-05,
"loss": 1.7982,
"step": 14540
},
{
"epoch": 549.433962264151,
"grad_norm": 1.2674115880751322,
"learning_rate": 4.788325403745724e-05,
"loss": 1.8055,
"step": 14560
},
{
"epoch": 550.188679245283,
"grad_norm": 2.02523705877845,
"learning_rate": 4.7810261190966944e-05,
"loss": 1.7905,
"step": 14580
},
{
"epoch": 550.9433962264151,
"grad_norm": 1.3660297273644537,
"learning_rate": 4.773724780763023e-05,
"loss": 1.8267,
"step": 14600
},
{
"epoch": 551.6981132075472,
"grad_norm": 1.1728070148137189,
"learning_rate": 4.766421416981833e-05,
"loss": 1.7862,
"step": 14620
},
{
"epoch": 552.4528301886793,
"grad_norm": 1.148521109395332,
"learning_rate": 4.759116055998069e-05,
"loss": 1.7842,
"step": 14640
},
{
"epoch": 553.2075471698113,
"grad_norm": 1.2578627421373816,
"learning_rate": 4.7518087260644065e-05,
"loss": 1.8105,
"step": 14660
},
{
"epoch": 553.9622641509434,
"grad_norm": 1.2736902452272465,
"learning_rate": 4.744499455441133e-05,
"loss": 1.7931,
"step": 14680
},
{
"epoch": 554.7169811320755,
"grad_norm": 1.0794014181765008,
"learning_rate": 4.737188272396044e-05,
"loss": 1.8043,
"step": 14700
},
{
"epoch": 555.4716981132076,
"grad_norm": 1.3894129104855453,
"learning_rate": 4.729875205204327e-05,
"loss": 1.8301,
"step": 14720
},
{
"epoch": 556.2264150943396,
"grad_norm": 1.147340224849857,
"learning_rate": 4.722560282148459e-05,
"loss": 1.8178,
"step": 14740
},
{
"epoch": 556.9811320754717,
"grad_norm": 1.3948879461559769,
"learning_rate": 4.7152435315180975e-05,
"loss": 1.7648,
"step": 14760
},
{
"epoch": 557.7358490566038,
"grad_norm": 1.3694680696221502,
"learning_rate": 4.7079249816099584e-05,
"loss": 1.8104,
"step": 14780
},
{
"epoch": 558.4905660377359,
"grad_norm": 1.4147919843537753,
"learning_rate": 4.700604660727726e-05,
"loss": 1.7721,
"step": 14800
},
{
"epoch": 559.2452830188679,
"grad_norm": 1.2297666792262925,
"learning_rate": 4.6932825971819285e-05,
"loss": 1.7923,
"step": 14820
},
{
"epoch": 560.0,
"grad_norm": 1.1416590332464547,
"learning_rate": 4.6859588192898365e-05,
"loss": 1.7709,
"step": 14840
},
{
"epoch": 560.7547169811321,
"grad_norm": 1.2633394473980435,
"learning_rate": 4.6786333553753454e-05,
"loss": 1.8265,
"step": 14860
},
{
"epoch": 561.5094339622641,
"grad_norm": 1.73410063706433,
"learning_rate": 4.671306233768877e-05,
"loss": 1.7935,
"step": 14880
},
{
"epoch": 562.2641509433962,
"grad_norm": 1.909552398589606,
"learning_rate": 4.663977482807263e-05,
"loss": 1.7928,
"step": 14900
},
{
"epoch": 563.0188679245283,
"grad_norm": 2.3340344731557505,
"learning_rate": 4.656647130833632e-05,
"loss": 1.8083,
"step": 14920
},
{
"epoch": 563.7735849056604,
"grad_norm": 1.5856106264075287,
"learning_rate": 4.64931520619731e-05,
"loss": 1.8345,
"step": 14940
},
{
"epoch": 564.5283018867924,
"grad_norm": 1.4125116448786768,
"learning_rate": 4.6419817372537015e-05,
"loss": 1.7764,
"step": 14960
},
{
"epoch": 565.2830188679245,
"grad_norm": 1.1720058705654566,
"learning_rate": 4.634646752364185e-05,
"loss": 1.7917,
"step": 14980
},
{
"epoch": 566.0377358490566,
"grad_norm": 1.1615325214837866,
"learning_rate": 4.627310279896001e-05,
"loss": 1.7916,
"step": 15000
},
{
"epoch": 566.7924528301887,
"grad_norm": 1.3392839325444756,
"learning_rate": 4.619972348222143e-05,
"loss": 1.7803,
"step": 15020
},
{
"epoch": 567.5471698113207,
"grad_norm": 1.3665016393198224,
"learning_rate": 4.6126329857212486e-05,
"loss": 1.7822,
"step": 15040
},
{
"epoch": 568.3018867924528,
"grad_norm": 1.6085820618369988,
"learning_rate": 4.605292220777489e-05,
"loss": 1.7889,
"step": 15060
},
{
"epoch": 569.0566037735849,
"grad_norm": 1.4230286645037085,
"learning_rate": 4.5979500817804594e-05,
"loss": 1.805,
"step": 15080
},
{
"epoch": 569.811320754717,
"grad_norm": 1.1596468566263143,
"learning_rate": 4.590606597125065e-05,
"loss": 1.7892,
"step": 15100
},
{
"epoch": 570.566037735849,
"grad_norm": 1.4539519726534167,
"learning_rate": 4.583261795211423e-05,
"loss": 1.7831,
"step": 15120
},
{
"epoch": 571.3207547169811,
"grad_norm": 1.2521318013943803,
"learning_rate": 4.575915704444736e-05,
"loss": 1.8018,
"step": 15140
},
{
"epoch": 572.0754716981132,
"grad_norm": 1.1819685518944387,
"learning_rate": 4.5685683532352e-05,
"loss": 1.7866,
"step": 15160
},
{
"epoch": 572.8301886792453,
"grad_norm": 1.1876204585927221,
"learning_rate": 4.5612197699978766e-05,
"loss": 1.7833,
"step": 15180
},
{
"epoch": 573.5849056603773,
"grad_norm": 1.1556357684763976,
"learning_rate": 4.5538699831526006e-05,
"loss": 1.8024,
"step": 15200
},
{
"epoch": 574.3396226415094,
"grad_norm": 1.3326980140111142,
"learning_rate": 4.5465190211238544e-05,
"loss": 1.7829,
"step": 15220
},
{
"epoch": 575.0943396226415,
"grad_norm": 1.308268798679134,
"learning_rate": 4.539166912340671e-05,
"loss": 1.7766,
"step": 15240
},
{
"epoch": 575.8490566037735,
"grad_norm": 1.1564791044184874,
"learning_rate": 4.531813685236516e-05,
"loss": 1.8021,
"step": 15260
},
{
"epoch": 576.6037735849056,
"grad_norm": 1.4187942127459952,
"learning_rate": 4.524459368249179e-05,
"loss": 1.7523,
"step": 15280
},
{
"epoch": 577.3584905660377,
"grad_norm": 1.1994628151621998,
"learning_rate": 4.5171039898206644e-05,
"loss": 1.7845,
"step": 15300
},
{
"epoch": 578.1132075471698,
"grad_norm": 1.172216325696233,
"learning_rate": 4.509747578397086e-05,
"loss": 1.7591,
"step": 15320
},
{
"epoch": 578.8679245283018,
"grad_norm": 1.1667988074546227,
"learning_rate": 4.5023901624285465e-05,
"loss": 1.7955,
"step": 15340
},
{
"epoch": 579.622641509434,
"grad_norm": 1.31427332849911,
"learning_rate": 4.495031770369038e-05,
"loss": 1.7605,
"step": 15360
},
{
"epoch": 580.377358490566,
"grad_norm": 1.2050607756000014,
"learning_rate": 4.487672430676325e-05,
"loss": 1.7673,
"step": 15380
},
{
"epoch": 581.1320754716982,
"grad_norm": 1.2087614153318165,
"learning_rate": 4.480312171811838e-05,
"loss": 1.7876,
"step": 15400
},
{
"epoch": 581.8867924528302,
"grad_norm": 1.3000620466205515,
"learning_rate": 4.472951022240562e-05,
"loss": 1.7611,
"step": 15420
},
{
"epoch": 582.6415094339623,
"grad_norm": 1.7966112906689369,
"learning_rate": 4.4655890104309254e-05,
"loss": 1.7702,
"step": 15440
},
{
"epoch": 583.3962264150944,
"grad_norm": 1.198242649687164,
"learning_rate": 4.458226164854697e-05,
"loss": 1.7942,
"step": 15460
},
{
"epoch": 584.1509433962265,
"grad_norm": 1.6859720478683236,
"learning_rate": 4.450862513986861e-05,
"loss": 1.758,
"step": 15480
},
{
"epoch": 584.9056603773585,
"grad_norm": 1.1440767805984655,
"learning_rate": 4.443498086305525e-05,
"loss": 1.7647,
"step": 15500
},
{
"epoch": 585.6603773584906,
"grad_norm": 1.2426581026511485,
"learning_rate": 4.436132910291792e-05,
"loss": 1.7468,
"step": 15520
},
{
"epoch": 586.4150943396227,
"grad_norm": 1.5652742956982049,
"learning_rate": 4.4287670144296675e-05,
"loss": 1.7733,
"step": 15540
},
{
"epoch": 587.1698113207547,
"grad_norm": 1.150105537080449,
"learning_rate": 4.421400427205934e-05,
"loss": 1.7878,
"step": 15560
},
{
"epoch": 587.9245283018868,
"grad_norm": 1.297179193085273,
"learning_rate": 4.4140331771100516e-05,
"loss": 1.7558,
"step": 15580
},
{
"epoch": 588.6792452830189,
"grad_norm": 1.4354989531166704,
"learning_rate": 4.406665292634046e-05,
"loss": 1.7652,
"step": 15600
},
{
"epoch": 589.433962264151,
"grad_norm": 1.3544454831633896,
"learning_rate": 4.399296802272388e-05,
"loss": 1.7695,
"step": 15620
},
{
"epoch": 590.188679245283,
"grad_norm": 1.356710977975809,
"learning_rate": 4.3919277345219033e-05,
"loss": 1.7317,
"step": 15640
},
{
"epoch": 590.9433962264151,
"grad_norm": 1.3504644293745585,
"learning_rate": 4.3845581178816394e-05,
"loss": 1.7784,
"step": 15660
},
{
"epoch": 591.6981132075472,
"grad_norm": 1.2934057468915228,
"learning_rate": 4.377187980852775e-05,
"loss": 1.7655,
"step": 15680
},
{
"epoch": 592.4528301886793,
"grad_norm": 2.7284471186236976,
"learning_rate": 4.369817351938495e-05,
"loss": 1.7617,
"step": 15700
},
{
"epoch": 593.2075471698113,
"grad_norm": 1.4587946653999224,
"learning_rate": 4.3624462596438926e-05,
"loss": 1.7675,
"step": 15720
},
{
"epoch": 593.9622641509434,
"grad_norm": 1.824543804524391,
"learning_rate": 4.3550747324758475e-05,
"loss": 1.7835,
"step": 15740
},
{
"epoch": 594.7169811320755,
"grad_norm": 1.1558960324762337,
"learning_rate": 4.3477027989429267e-05,
"loss": 1.7848,
"step": 15760
},
{
"epoch": 595.4716981132076,
"grad_norm": 1.3618125278208344,
"learning_rate": 4.340330487555261e-05,
"loss": 1.7717,
"step": 15780
},
{
"epoch": 596.2264150943396,
"grad_norm": 1.8336334887122832,
"learning_rate": 4.332957826824451e-05,
"loss": 1.7753,
"step": 15800
},
{
"epoch": 596.9811320754717,
"grad_norm": 1.6035556059617442,
"learning_rate": 4.325584845263445e-05,
"loss": 1.7507,
"step": 15820
},
{
"epoch": 597.7358490566038,
"grad_norm": 1.1021262642715972,
"learning_rate": 4.318211571386428e-05,
"loss": 1.7683,
"step": 15840
},
{
"epoch": 598.4905660377359,
"grad_norm": 1.3112589542500708,
"learning_rate": 4.310838033708722e-05,
"loss": 1.753,
"step": 15860
},
{
"epoch": 599.2452830188679,
"grad_norm": 1.5257318148219035,
"learning_rate": 4.303464260746667e-05,
"loss": 1.7446,
"step": 15880
},
{
"epoch": 600.0,
"grad_norm": 1.2648959489882874,
"learning_rate": 4.296090281017511e-05,
"loss": 1.7513,
"step": 15900
},
{
"epoch": 600.7547169811321,
"grad_norm": 1.4414622706601208,
"learning_rate": 4.2887161230393034e-05,
"loss": 1.7421,
"step": 15920
},
{
"epoch": 601.5094339622641,
"grad_norm": 1.60216933395765,
"learning_rate": 4.281341815330784e-05,
"loss": 1.7335,
"step": 15940
},
{
"epoch": 602.2641509433962,
"grad_norm": 1.438261210769706,
"learning_rate": 4.273967386411267e-05,
"loss": 1.7676,
"step": 15960
},
{
"epoch": 603.0188679245283,
"grad_norm": 1.61121062509495,
"learning_rate": 4.26659286480054e-05,
"loss": 1.7767,
"step": 15980
},
{
"epoch": 603.7735849056604,
"grad_norm": 1.3368219249794455,
"learning_rate": 4.2592182790187495e-05,
"loss": 1.7615,
"step": 16000
},
{
"epoch": 604.5283018867924,
"grad_norm": 1.1964899050496502,
"learning_rate": 4.251843657586285e-05,
"loss": 1.7909,
"step": 16020
},
{
"epoch": 605.2830188679245,
"grad_norm": 1.1409598499641234,
"learning_rate": 4.244469029023682e-05,
"loss": 1.7806,
"step": 16040
},
{
"epoch": 606.0377358490566,
"grad_norm": 1.0775618341358217,
"learning_rate": 4.237094421851494e-05,
"loss": 1.7696,
"step": 16060
},
{
"epoch": 606.7924528301887,
"grad_norm": 1.201425866436519,
"learning_rate": 4.2297198645901986e-05,
"loss": 1.7424,
"step": 16080
},
{
"epoch": 607.5471698113207,
"grad_norm": 1.29163631265219,
"learning_rate": 4.222345385760079e-05,
"loss": 1.749,
"step": 16100
},
{
"epoch": 608.3018867924528,
"grad_norm": 1.4158324908813191,
"learning_rate": 4.214971013881114e-05,
"loss": 1.7594,
"step": 16120
},
{
"epoch": 609.0566037735849,
"grad_norm": 1.2390733211978042,
"learning_rate": 4.2075967774728675e-05,
"loss": 1.7707,
"step": 16140
},
{
"epoch": 609.811320754717,
"grad_norm": 1.0960663109570459,
"learning_rate": 4.200222705054385e-05,
"loss": 1.7633,
"step": 16160
},
{
"epoch": 610.566037735849,
"grad_norm": 1.167381366879647,
"learning_rate": 4.1928488251440704e-05,
"loss": 1.7735,
"step": 16180
},
{
"epoch": 611.3207547169811,
"grad_norm": 1.468960912277373,
"learning_rate": 4.185475166259588e-05,
"loss": 1.7222,
"step": 16200
},
{
"epoch": 612.0754716981132,
"grad_norm": 1.2572603668608606,
"learning_rate": 4.178101756917746e-05,
"loss": 1.7477,
"step": 16220
},
{
"epoch": 612.8301886792453,
"grad_norm": 1.2661070355556836,
"learning_rate": 4.170728625634387e-05,
"loss": 1.7437,
"step": 16240
},
{
"epoch": 613.5849056603773,
"grad_norm": 1.6793862205908143,
"learning_rate": 4.16335580092428e-05,
"loss": 1.7518,
"step": 16260
},
{
"epoch": 614.3396226415094,
"grad_norm": 1.3347192318840417,
"learning_rate": 4.155983311301006e-05,
"loss": 1.7275,
"step": 16280
},
{
"epoch": 615.0943396226415,
"grad_norm": 1.146186653201129,
"learning_rate": 4.148611185276852e-05,
"loss": 1.7505,
"step": 16300
},
{
"epoch": 615.8490566037735,
"grad_norm": 1.2853858560898548,
"learning_rate": 4.1412394513626976e-05,
"loss": 1.7345,
"step": 16320
},
{
"epoch": 616.6037735849056,
"grad_norm": 1.3689931241044506,
"learning_rate": 4.1338681380679055e-05,
"loss": 1.7372,
"step": 16340
},
{
"epoch": 617.3584905660377,
"grad_norm": 1.2520152979412003,
"learning_rate": 4.126497273900214e-05,
"loss": 1.7749,
"step": 16360
},
{
"epoch": 618.1132075471698,
"grad_norm": 4.4664529214017685,
"learning_rate": 4.119126887365623e-05,
"loss": 1.7291,
"step": 16380
},
{
"epoch": 618.8679245283018,
"grad_norm": 1.4871942552231863,
"learning_rate": 4.111757006968283e-05,
"loss": 1.729,
"step": 16400
},
{
"epoch": 619.622641509434,
"grad_norm": 1.7327021169643824,
"learning_rate": 4.104387661210391e-05,
"loss": 1.7906,
"step": 16420
},
{
"epoch": 620.377358490566,
"grad_norm": 1.3011566548368803,
"learning_rate": 4.0970188785920764e-05,
"loss": 1.7498,
"step": 16440
},
{
"epoch": 621.1320754716982,
"grad_norm": 1.503913785893422,
"learning_rate": 4.0896506876112856e-05,
"loss": 1.7333,
"step": 16460
},
{
"epoch": 621.8867924528302,
"grad_norm": 1.2396874135815048,
"learning_rate": 4.082283116763683e-05,
"loss": 1.7474,
"step": 16480
},
{
"epoch": 622.6415094339623,
"grad_norm": 1.3186465498196096,
"learning_rate": 4.07491619454253e-05,
"loss": 1.7641,
"step": 16500
},
{
"epoch": 623.3962264150944,
"grad_norm": 1.2224446651472063,
"learning_rate": 4.067549949438583e-05,
"loss": 1.7596,
"step": 16520
},
{
"epoch": 624.1509433962265,
"grad_norm": 1.299102298479128,
"learning_rate": 4.060184409939977e-05,
"loss": 1.7399,
"step": 16540
},
{
"epoch": 624.9056603773585,
"grad_norm": 1.2080109960062584,
"learning_rate": 4.052819604532121e-05,
"loss": 1.7545,
"step": 16560
},
{
"epoch": 625.6603773584906,
"grad_norm": 1.1330156099339754,
"learning_rate": 4.04545556169758e-05,
"loss": 1.7514,
"step": 16580
},
{
"epoch": 626.4150943396227,
"grad_norm": 1.877556318395021,
"learning_rate": 4.038092309915976e-05,
"loss": 1.7495,
"step": 16600
},
{
"epoch": 627.1698113207547,
"grad_norm": 1.3430468095941768,
"learning_rate": 4.0307298776638696e-05,
"loss": 1.7387,
"step": 16620
},
{
"epoch": 627.9245283018868,
"grad_norm": 1.3456306138048115,
"learning_rate": 4.023368293414651e-05,
"loss": 1.7586,
"step": 16640
},
{
"epoch": 628.6792452830189,
"grad_norm": 1.2925035537026515,
"learning_rate": 4.016007585638428e-05,
"loss": 1.7222,
"step": 16660
},
{
"epoch": 629.433962264151,
"grad_norm": 1.5060755357936446,
"learning_rate": 4.0086477828019247e-05,
"loss": 1.734,
"step": 16680
},
{
"epoch": 630.188679245283,
"grad_norm": 1.2358138916528858,
"learning_rate": 4.001288913368361e-05,
"loss": 1.7585,
"step": 16700
},
{
"epoch": 630.9433962264151,
"grad_norm": 1.0536359575721053,
"learning_rate": 3.9939310057973496e-05,
"loss": 1.699,
"step": 16720
},
{
"epoch": 631.6981132075472,
"grad_norm": 1.3396521000709494,
"learning_rate": 3.986574088544782e-05,
"loss": 1.745,
"step": 16740
},
{
"epoch": 632.4528301886793,
"grad_norm": 1.1966711285530698,
"learning_rate": 3.979218190062718e-05,
"loss": 1.7049,
"step": 16760
},
{
"epoch": 633.2075471698113,
"grad_norm": 1.3511753835544016,
"learning_rate": 3.971863338799283e-05,
"loss": 1.7319,
"step": 16780
},
{
"epoch": 633.9622641509434,
"grad_norm": 1.2759632464750865,
"learning_rate": 3.964509563198547e-05,
"loss": 1.7431,
"step": 16800
},
{
"epoch": 634.7169811320755,
"grad_norm": 1.5118791481962728,
"learning_rate": 3.957156891700422e-05,
"loss": 1.7549,
"step": 16820
},
{
"epoch": 635.4716981132076,
"grad_norm": 1.358305138616916,
"learning_rate": 3.949805352740549e-05,
"loss": 1.7146,
"step": 16840
},
{
"epoch": 636.2264150943396,
"grad_norm": 1.2468444876323985,
"learning_rate": 3.9424549747501916e-05,
"loss": 1.6839,
"step": 16860
},
{
"epoch": 636.9811320754717,
"grad_norm": 1.6815476229074826,
"learning_rate": 3.9351057861561194e-05,
"loss": 1.7381,
"step": 16880
},
{
"epoch": 637.7358490566038,
"grad_norm": 1.3183944234813532,
"learning_rate": 3.927757815380507e-05,
"loss": 1.737,
"step": 16900
},
{
"epoch": 638.4905660377359,
"grad_norm": 1.2664716657296504,
"learning_rate": 3.920411090840813e-05,
"loss": 1.7552,
"step": 16920
},
{
"epoch": 639.2452830188679,
"grad_norm": 1.4316626122660758,
"learning_rate": 3.9130656409496826e-05,
"loss": 1.7035,
"step": 16940
},
{
"epoch": 640.0,
"grad_norm": 1.624465349724497,
"learning_rate": 3.90572149411483e-05,
"loss": 1.7349,
"step": 16960
},
{
"epoch": 640.7547169811321,
"grad_norm": 1.3525138710560463,
"learning_rate": 3.8983786787389264e-05,
"loss": 1.7196,
"step": 16980
},
{
"epoch": 641.5094339622641,
"grad_norm": 1.1968289253916946,
"learning_rate": 3.891037223219497e-05,
"loss": 1.7288,
"step": 17000
},
{
"epoch": 642.2641509433962,
"grad_norm": 1.3150467173282183,
"learning_rate": 3.883697155948808e-05,
"loss": 1.7478,
"step": 17020
},
{
"epoch": 643.0188679245283,
"grad_norm": 1.3494514082635618,
"learning_rate": 3.876358505313754e-05,
"loss": 1.7208,
"step": 17040
},
{
"epoch": 643.7735849056604,
"grad_norm": 1.5328078930199742,
"learning_rate": 3.869021299695754e-05,
"loss": 1.747,
"step": 17060
},
{
"epoch": 644.5283018867924,
"grad_norm": 1.2945392233470665,
"learning_rate": 3.8616855674706354e-05,
"loss": 1.7225,
"step": 17080
},
{
"epoch": 645.2830188679245,
"grad_norm": 1.2582163265054458,
"learning_rate": 3.854351337008532e-05,
"loss": 1.7428,
"step": 17100
},
{
"epoch": 646.0377358490566,
"grad_norm": 1.1370724946903576,
"learning_rate": 3.847018636673765e-05,
"loss": 1.704,
"step": 17120
},
{
"epoch": 646.7924528301887,
"grad_norm": 1.2161474947016768,
"learning_rate": 3.839687494824741e-05,
"loss": 1.7129,
"step": 17140
},
{
"epoch": 647.5471698113207,
"grad_norm": 1.1033819366614397,
"learning_rate": 3.832357939813837e-05,
"loss": 1.695,
"step": 17160
},
{
"epoch": 648.3018867924528,
"grad_norm": 1.2993665260901381,
"learning_rate": 3.825029999987296e-05,
"loss": 1.7022,
"step": 17180
},
{
"epoch": 649.0566037735849,
"grad_norm": 1.2577276608492982,
"learning_rate": 3.8177037036851115e-05,
"loss": 1.7029,
"step": 17200
},
{
"epoch": 649.811320754717,
"grad_norm": 1.2459092691964395,
"learning_rate": 3.810379079240922e-05,
"loss": 1.7139,
"step": 17220
},
{
"epoch": 650.566037735849,
"grad_norm": 1.3152629296897698,
"learning_rate": 3.8030561549819015e-05,
"loss": 1.7088,
"step": 17240
},
{
"epoch": 651.3207547169811,
"grad_norm": 1.2367123181404969,
"learning_rate": 3.795734959228645e-05,
"loss": 1.6936,
"step": 17260
},
{
"epoch": 652.0754716981132,
"grad_norm": 1.1338754969305556,
"learning_rate": 3.7884155202950696e-05,
"loss": 1.7151,
"step": 17280
},
{
"epoch": 652.8301886792453,
"grad_norm": 1.2942728726977033,
"learning_rate": 3.781097866488291e-05,
"loss": 1.712,
"step": 17300
},
{
"epoch": 653.5849056603773,
"grad_norm": 1.39400718208209,
"learning_rate": 3.773782026108526e-05,
"loss": 1.7181,
"step": 17320
},
{
"epoch": 654.3396226415094,
"grad_norm": 1.3198924641523746,
"learning_rate": 3.766468027448973e-05,
"loss": 1.6913,
"step": 17340
},
{
"epoch": 655.0943396226415,
"grad_norm": 1.1991934682117795,
"learning_rate": 3.759155898795714e-05,
"loss": 1.7093,
"step": 17360
},
{
"epoch": 655.8490566037735,
"grad_norm": 1.239259370659102,
"learning_rate": 3.751845668427593e-05,
"loss": 1.7009,
"step": 17380
},
{
"epoch": 656.6037735849056,
"grad_norm": 1.2833857218204128,
"learning_rate": 3.7445373646161176e-05,
"loss": 1.7005,
"step": 17400
},
{
"epoch": 657.3584905660377,
"grad_norm": 1.454767822481044,
"learning_rate": 3.737231015625341e-05,
"loss": 1.6906,
"step": 17420
},
{
"epoch": 658.1132075471698,
"grad_norm": 1.4542141511941185,
"learning_rate": 3.729926649711759e-05,
"loss": 1.7058,
"step": 17440
},
{
"epoch": 658.8679245283018,
"grad_norm": 1.3091035418860133,
"learning_rate": 3.722624295124197e-05,
"loss": 1.6885,
"step": 17460
},
{
"epoch": 659.622641509434,
"grad_norm": 1.2943161972236163,
"learning_rate": 3.7153239801037014e-05,
"loss": 1.714,
"step": 17480
},
{
"epoch": 660.377358490566,
"grad_norm": 1.3377320776810098,
"learning_rate": 3.708025732883431e-05,
"loss": 1.684,
"step": 17500
},
{
"epoch": 661.1320754716982,
"grad_norm": 1.2629223675934866,
"learning_rate": 3.700729581688547e-05,
"loss": 1.699,
"step": 17520
},
{
"epoch": 661.8867924528302,
"grad_norm": 1.2451499003174673,
"learning_rate": 3.693435554736107e-05,
"loss": 1.6818,
"step": 17540
},
{
"epoch": 662.6415094339623,
"grad_norm": 1.5331175213775703,
"learning_rate": 3.6861436802349504e-05,
"loss": 1.7177,
"step": 17560
},
{
"epoch": 663.3962264150944,
"grad_norm": 1.4360430543768725,
"learning_rate": 3.6788539863855925e-05,
"loss": 1.7119,
"step": 17580
},
{
"epoch": 664.1509433962265,
"grad_norm": 1.3816247903457854,
"learning_rate": 3.671566501380116e-05,
"loss": 1.7148,
"step": 17600
},
{
"epoch": 664.9056603773585,
"grad_norm": 1.2778334686031196,
"learning_rate": 3.6642812534020636e-05,
"loss": 1.6935,
"step": 17620
},
{
"epoch": 665.6603773584906,
"grad_norm": 1.552301737650962,
"learning_rate": 3.656998270626322e-05,
"loss": 1.6917,
"step": 17640
},
{
"epoch": 666.4150943396227,
"grad_norm": 1.1626344688263202,
"learning_rate": 3.649717581219022e-05,
"loss": 1.6869,
"step": 17660
},
{
"epoch": 667.1698113207547,
"grad_norm": 1.2478591651994395,
"learning_rate": 3.642439213337418e-05,
"loss": 1.6964,
"step": 17680
},
{
"epoch": 667.9245283018868,
"grad_norm": 1.1665269494870496,
"learning_rate": 3.635163195129796e-05,
"loss": 1.706,
"step": 17700
},
{
"epoch": 668.6792452830189,
"grad_norm": 1.2417440240279074,
"learning_rate": 3.627889554735346e-05,
"loss": 1.6607,
"step": 17720
},
{
"epoch": 669.433962264151,
"grad_norm": 1.4243990985436537,
"learning_rate": 3.620618320284067e-05,
"loss": 1.6874,
"step": 17740
},
{
"epoch": 670.188679245283,
"grad_norm": 1.4914544739718891,
"learning_rate": 3.613349519896652e-05,
"loss": 1.6908,
"step": 17760
},
{
"epoch": 670.9433962264151,
"grad_norm": 1.3300772606283862,
"learning_rate": 3.606083181684381e-05,
"loss": 1.688,
"step": 17780
},
{
"epoch": 671.6981132075472,
"grad_norm": 1.2461357748180606,
"learning_rate": 3.5988193337490116e-05,
"loss": 1.6547,
"step": 17800
},
{
"epoch": 672.4528301886793,
"grad_norm": 1.370151145210619,
"learning_rate": 3.5915580041826694e-05,
"loss": 1.7193,
"step": 17820
},
{
"epoch": 673.2075471698113,
"grad_norm": 1.2763659906881193,
"learning_rate": 3.5842992210677416e-05,
"loss": 1.6808,
"step": 17840
},
{
"epoch": 673.9622641509434,
"grad_norm": 1.2944519984940064,
"learning_rate": 3.577043012476768e-05,
"loss": 1.7,
"step": 17860
},
{
"epoch": 674.7169811320755,
"grad_norm": 1.3186599824633134,
"learning_rate": 3.56978940647233e-05,
"loss": 1.6954,
"step": 17880
},
{
"epoch": 675.4716981132076,
"grad_norm": 1.252700498164797,
"learning_rate": 3.5625384311069444e-05,
"loss": 1.6686,
"step": 17900
},
{
"epoch": 676.2264150943396,
"grad_norm": 1.5231032873107,
"learning_rate": 3.555290114422955e-05,
"loss": 1.6747,
"step": 17920
},
{
"epoch": 676.9811320754717,
"grad_norm": 1.2910659178037445,
"learning_rate": 3.548044484452421e-05,
"loss": 1.6778,
"step": 17940
},
{
"epoch": 677.7358490566038,
"grad_norm": 1.398570166804289,
"learning_rate": 3.540801569217016e-05,
"loss": 1.6949,
"step": 17960
},
{
"epoch": 678.4905660377359,
"grad_norm": 1.4283155036503146,
"learning_rate": 3.53356139672791e-05,
"loss": 1.682,
"step": 17980
},
{
"epoch": 679.2452830188679,
"grad_norm": 1.3275162110816598,
"learning_rate": 3.526323994985669e-05,
"loss": 1.695,
"step": 18000
},
{
"epoch": 680.0,
"grad_norm": 1.2754138886413842,
"learning_rate": 3.519089391980139e-05,
"loss": 1.6977,
"step": 18020
},
{
"epoch": 680.7547169811321,
"grad_norm": 1.3077633836764546,
"learning_rate": 3.511857615690347e-05,
"loss": 1.6811,
"step": 18040
},
{
"epoch": 681.5094339622641,
"grad_norm": 1.3473268942249876,
"learning_rate": 3.504628694084385e-05,
"loss": 1.6984,
"step": 18060
},
{
"epoch": 682.2641509433962,
"grad_norm": 1.3350261204503644,
"learning_rate": 3.497402655119306e-05,
"loss": 1.6567,
"step": 18080
},
{
"epoch": 683.0188679245283,
"grad_norm": 1.243885167646148,
"learning_rate": 3.490179526741014e-05,
"loss": 1.6837,
"step": 18100
},
{
"epoch": 683.7735849056604,
"grad_norm": 1.4293023473168278,
"learning_rate": 3.48295933688416e-05,
"loss": 1.7039,
"step": 18120
},
{
"epoch": 684.5283018867924,
"grad_norm": 1.3686594771374196,
"learning_rate": 3.4757421134720236e-05,
"loss": 1.7067,
"step": 18140
},
{
"epoch": 685.2830188679245,
"grad_norm": 1.6243192735337049,
"learning_rate": 3.46852788441642e-05,
"loss": 1.6661,
"step": 18160
},
{
"epoch": 686.0377358490566,
"grad_norm": 1.2075045336020302,
"learning_rate": 3.461316677617577e-05,
"loss": 1.6779,
"step": 18180
},
{
"epoch": 686.7924528301887,
"grad_norm": 1.348462905709941,
"learning_rate": 3.4541085209640396e-05,
"loss": 1.6962,
"step": 18200
},
{
"epoch": 687.5471698113207,
"grad_norm": 1.370184561468331,
"learning_rate": 3.446903442332552e-05,
"loss": 1.6819,
"step": 18220
},
{
"epoch": 688.3018867924528,
"grad_norm": 2.4058560541467537,
"learning_rate": 3.439701469587961e-05,
"loss": 1.6562,
"step": 18240
},
{
"epoch": 689.0566037735849,
"grad_norm": 1.2548392090130422,
"learning_rate": 3.4325026305830914e-05,
"loss": 1.662,
"step": 18260
},
{
"epoch": 689.811320754717,
"grad_norm": 1.2311253301629015,
"learning_rate": 3.4253069531586616e-05,
"loss": 1.6629,
"step": 18280
},
{
"epoch": 690.566037735849,
"grad_norm": 1.9966791662877068,
"learning_rate": 3.418114465143153e-05,
"loss": 1.6592,
"step": 18300
},
{
"epoch": 691.3207547169811,
"grad_norm": 1.2370362395857986,
"learning_rate": 3.410925194352715e-05,
"loss": 1.6806,
"step": 18320
},
{
"epoch": 692.0754716981132,
"grad_norm": 1.463146145452869,
"learning_rate": 3.4037391685910566e-05,
"loss": 1.6937,
"step": 18340
},
{
"epoch": 692.8301886792453,
"grad_norm": 1.2590469253316379,
"learning_rate": 3.396556415649336e-05,
"loss": 1.6746,
"step": 18360
},
{
"epoch": 693.5849056603773,
"grad_norm": 1.3472170619382864,
"learning_rate": 3.389376963306052e-05,
"loss": 1.681,
"step": 18380
},
{
"epoch": 694.3396226415094,
"grad_norm": 1.4907805923383493,
"learning_rate": 3.382200839326942e-05,
"loss": 1.6822,
"step": 18400
},
{
"epoch": 695.0943396226415,
"grad_norm": 1.3754366409172392,
"learning_rate": 3.375028071464869e-05,
"loss": 1.6819,
"step": 18420
},
{
"epoch": 695.8490566037735,
"grad_norm": 1.2854564980336112,
"learning_rate": 3.3678586874597176e-05,
"loss": 1.6712,
"step": 18440
},
{
"epoch": 696.6037735849056,
"grad_norm": 1.4614311570416143,
"learning_rate": 3.3606927150382865e-05,
"loss": 1.649,
"step": 18460
},
{
"epoch": 697.3584905660377,
"grad_norm": 1.3139946901519874,
"learning_rate": 3.353530181914178e-05,
"loss": 1.7062,
"step": 18480
},
{
"epoch": 698.1132075471698,
"grad_norm": 1.4895975475886944,
"learning_rate": 3.3463711157876966e-05,
"loss": 1.6841,
"step": 18500
},
{
"epoch": 698.8679245283018,
"grad_norm": 1.2111074764483576,
"learning_rate": 3.339215544345735e-05,
"loss": 1.6799,
"step": 18520
},
{
"epoch": 699.622641509434,
"grad_norm": 1.254964544152517,
"learning_rate": 3.3320634952616736e-05,
"loss": 1.6554,
"step": 18540
},
{
"epoch": 700.377358490566,
"grad_norm": 1.4098934710763775,
"learning_rate": 3.3249149961952686e-05,
"loss": 1.6821,
"step": 18560
},
{
"epoch": 701.1320754716982,
"grad_norm": 1.449098110180846,
"learning_rate": 3.3177700747925484e-05,
"loss": 1.6775,
"step": 18580
},
{
"epoch": 701.8867924528302,
"grad_norm": 1.4166300599178772,
"learning_rate": 3.310628758685702e-05,
"loss": 1.6647,
"step": 18600
},
{
"epoch": 702.6415094339623,
"grad_norm": 1.3321739096846923,
"learning_rate": 3.30349107549298e-05,
"loss": 1.6606,
"step": 18620
},
{
"epoch": 703.3962264150944,
"grad_norm": 1.3195021828180338,
"learning_rate": 3.2963570528185814e-05,
"loss": 1.6414,
"step": 18640
},
{
"epoch": 704.1509433962265,
"grad_norm": 1.2954808039261523,
"learning_rate": 3.2892267182525456e-05,
"loss": 1.6691,
"step": 18660
},
{
"epoch": 704.9056603773585,
"grad_norm": 1.3215765511079391,
"learning_rate": 3.2821000993706524e-05,
"loss": 1.6774,
"step": 18680
},
{
"epoch": 705.6603773584906,
"grad_norm": 1.3256079186058618,
"learning_rate": 3.2749772237343104e-05,
"loss": 1.6675,
"step": 18700
},
{
"epoch": 706.4150943396227,
"grad_norm": 1.3105427183809564,
"learning_rate": 3.26785811889045e-05,
"loss": 1.669,
"step": 18720
},
{
"epoch": 707.1698113207547,
"grad_norm": 1.1406031822674032,
"learning_rate": 3.26074281237142e-05,
"loss": 1.6528,
"step": 18740
},
{
"epoch": 707.9245283018868,
"grad_norm": 1.1721675684528943,
"learning_rate": 3.253631331694882e-05,
"loss": 1.6243,
"step": 18760
},
{
"epoch": 708.6792452830189,
"grad_norm": 1.262858428237141,
"learning_rate": 3.2465237043636945e-05,
"loss": 1.6811,
"step": 18780
},
{
"epoch": 709.433962264151,
"grad_norm": 1.3398257997775693,
"learning_rate": 3.239419957865822e-05,
"loss": 1.6531,
"step": 18800
},
{
"epoch": 710.188679245283,
"grad_norm": 1.3245763474105379,
"learning_rate": 3.2323201196742164e-05,
"loss": 1.6796,
"step": 18820
},
{
"epoch": 710.9433962264151,
"grad_norm": 1.3633874472219405,
"learning_rate": 3.225224217246712e-05,
"loss": 1.6544,
"step": 18840
},
{
"epoch": 711.6981132075472,
"grad_norm": 1.7407734601052158,
"learning_rate": 3.218132278025927e-05,
"loss": 1.6765,
"step": 18860
},
{
"epoch": 712.4528301886793,
"grad_norm": 1.4569167040451834,
"learning_rate": 3.2110443294391486e-05,
"loss": 1.6411,
"step": 18880
},
{
"epoch": 713.2075471698113,
"grad_norm": 1.3711197707215454,
"learning_rate": 3.203960398898234e-05,
"loss": 1.6385,
"step": 18900
},
{
"epoch": 713.9622641509434,
"grad_norm": 1.2731560765553942,
"learning_rate": 3.196880513799497e-05,
"loss": 1.6605,
"step": 18920
},
{
"epoch": 714.7169811320755,
"grad_norm": 1.3127125434194904,
"learning_rate": 3.189804701523608e-05,
"loss": 1.6774,
"step": 18940
},
{
"epoch": 715.4716981132076,
"grad_norm": 1.3249230445075728,
"learning_rate": 3.1827329894354874e-05,
"loss": 1.6753,
"step": 18960
},
{
"epoch": 716.2264150943396,
"grad_norm": 1.4612490587732805,
"learning_rate": 3.1756654048842e-05,
"loss": 1.655,
"step": 18980
},
{
"epoch": 716.9811320754717,
"grad_norm": 1.278645383417836,
"learning_rate": 3.1686019752028424e-05,
"loss": 1.6692,
"step": 19000
},
{
"epoch": 717.7358490566038,
"grad_norm": 1.3408714115191198,
"learning_rate": 3.161542727708446e-05,
"loss": 1.6448,
"step": 19020
},
{
"epoch": 718.4905660377359,
"grad_norm": 1.8695203026536409,
"learning_rate": 3.154487689701869e-05,
"loss": 1.6786,
"step": 19040
},
{
"epoch": 719.2452830188679,
"grad_norm": 1.3167685135936378,
"learning_rate": 3.147436888467689e-05,
"loss": 1.6625,
"step": 19060
},
{
"epoch": 720.0,
"grad_norm": 1.2539486625475944,
"learning_rate": 3.140390351274096e-05,
"loss": 1.6533,
"step": 19080
},
{
"epoch": 720.7547169811321,
"grad_norm": 1.3710221082026877,
"learning_rate": 3.133348105372793e-05,
"loss": 1.677,
"step": 19100
},
{
"epoch": 721.5094339622641,
"grad_norm": 1.528521350034396,
"learning_rate": 3.126310177998883e-05,
"loss": 1.6593,
"step": 19120
},
{
"epoch": 722.2641509433962,
"grad_norm": 1.2092386328287839,
"learning_rate": 3.1192765963707726e-05,
"loss": 1.669,
"step": 19140
},
{
"epoch": 723.0188679245283,
"grad_norm": 1.605845379972632,
"learning_rate": 3.1122473876900574e-05,
"loss": 1.6372,
"step": 19160
},
{
"epoch": 723.7735849056604,
"grad_norm": 1.5877097735994508,
"learning_rate": 3.105222579141423e-05,
"loss": 1.6557,
"step": 19180
},
{
"epoch": 724.5283018867924,
"grad_norm": 1.2516228941598748,
"learning_rate": 3.098202197892538e-05,
"loss": 1.6513,
"step": 19200
},
{
"epoch": 725.2830188679245,
"grad_norm": 1.2391402579938813,
"learning_rate": 3.091186271093947e-05,
"loss": 1.6526,
"step": 19220
},
{
"epoch": 726.0377358490566,
"grad_norm": 1.2782890497326889,
"learning_rate": 3.084174825878972e-05,
"loss": 1.6591,
"step": 19240
},
{
"epoch": 726.7924528301887,
"grad_norm": 1.2506962493164657,
"learning_rate": 3.0771678893635963e-05,
"loss": 1.65,
"step": 19260
},
{
"epoch": 727.5471698113207,
"grad_norm": 1.768116692306316,
"learning_rate": 3.070165488646371e-05,
"loss": 1.6516,
"step": 19280
},
{
"epoch": 728.3018867924528,
"grad_norm": 1.559057461009202,
"learning_rate": 3.063167650808307e-05,
"loss": 1.6616,
"step": 19300
},
{
"epoch": 729.0566037735849,
"grad_norm": 1.2888728962143756,
"learning_rate": 3.0561744029127636e-05,
"loss": 1.6574,
"step": 19320
},
{
"epoch": 729.811320754717,
"grad_norm": 1.2688734788741953,
"learning_rate": 3.049185772005353e-05,
"loss": 1.618,
"step": 19340
},
{
"epoch": 730.566037735849,
"grad_norm": 1.155730285013269,
"learning_rate": 3.0422017851138287e-05,
"loss": 1.6515,
"step": 19360
},
{
"epoch": 731.3207547169811,
"grad_norm": 1.7451043683696195,
"learning_rate": 3.0352224692479883e-05,
"loss": 1.6371,
"step": 19380
},
{
"epoch": 732.0754716981132,
"grad_norm": 1.526187340694129,
"learning_rate": 3.0282478513995598e-05,
"loss": 1.6523,
"step": 19400
},
{
"epoch": 732.8301886792453,
"grad_norm": 1.4075608712323138,
"learning_rate": 3.0212779585421064e-05,
"loss": 1.6335,
"step": 19420
},
{
"epoch": 733.5849056603773,
"grad_norm": 1.345293550699471,
"learning_rate": 3.0143128176309125e-05,
"loss": 1.6505,
"step": 19440
},
{
"epoch": 734.3396226415094,
"grad_norm": 1.3467855791600631,
"learning_rate": 3.007352455602892e-05,
"loss": 1.6591,
"step": 19460
},
{
"epoch": 735.0943396226415,
"grad_norm": 1.3667404544607202,
"learning_rate": 3.000396899376472e-05,
"loss": 1.6244,
"step": 19480
},
{
"epoch": 735.8490566037735,
"grad_norm": 1.2844014927173513,
"learning_rate": 2.9934461758514944e-05,
"loss": 1.6154,
"step": 19500
},
{
"epoch": 736.6037735849056,
"grad_norm": 1.46598947181564,
"learning_rate": 2.986500311909114e-05,
"loss": 1.6443,
"step": 19520
},
{
"epoch": 737.3584905660377,
"grad_norm": 1.2682755408237392,
"learning_rate": 2.9795593344116856e-05,
"loss": 1.6492,
"step": 19540
},
{
"epoch": 738.1132075471698,
"grad_norm": 1.4017683975117536,
"learning_rate": 2.972623270202674e-05,
"loss": 1.6614,
"step": 19560
},
{
"epoch": 738.8679245283018,
"grad_norm": 1.5142927604100354,
"learning_rate": 2.9656921461065357e-05,
"loss": 1.6357,
"step": 19580
},
{
"epoch": 739.622641509434,
"grad_norm": 1.2492564466728204,
"learning_rate": 2.958765988928627e-05,
"loss": 1.6468,
"step": 19600
},
{
"epoch": 740.377358490566,
"grad_norm": 1.4008655564779207,
"learning_rate": 2.951844825455089e-05,
"loss": 1.64,
"step": 19620
},
{
"epoch": 741.1320754716982,
"grad_norm": 1.2731601803567079,
"learning_rate": 2.944928682452759e-05,
"loss": 1.6324,
"step": 19640
},
{
"epoch": 741.8867924528302,
"grad_norm": 1.5569572939387173,
"learning_rate": 2.9380175866690493e-05,
"loss": 1.6368,
"step": 19660
},
{
"epoch": 742.6415094339623,
"grad_norm": 1.3215892057968033,
"learning_rate": 2.9311115648318603e-05,
"loss": 1.5918,
"step": 19680
},
{
"epoch": 743.3962264150944,
"grad_norm": 1.301974969557669,
"learning_rate": 2.924210643649462e-05,
"loss": 1.625,
"step": 19700
},
{
"epoch": 744.1509433962265,
"grad_norm": 1.245601615853851,
"learning_rate": 2.917314849810405e-05,
"loss": 1.6436,
"step": 19720
},
{
"epoch": 744.9056603773585,
"grad_norm": 1.4218013050424188,
"learning_rate": 2.9104242099834047e-05,
"loss": 1.633,
"step": 19740
},
{
"epoch": 745.6603773584906,
"grad_norm": 1.584425316406802,
"learning_rate": 2.9035387508172488e-05,
"loss": 1.654,
"step": 19760
},
{
"epoch": 746.4150943396227,
"grad_norm": 1.237326080185327,
"learning_rate": 2.896658498940685e-05,
"loss": 1.6417,
"step": 19780
},
{
"epoch": 747.1698113207547,
"grad_norm": 1.336327325511772,
"learning_rate": 2.8897834809623266e-05,
"loss": 1.6278,
"step": 19800
},
{
"epoch": 747.9245283018868,
"grad_norm": 1.3731531069367304,
"learning_rate": 2.8829137234705436e-05,
"loss": 1.6339,
"step": 19820
},
{
"epoch": 748.6792452830189,
"grad_norm": 1.4396961322439583,
"learning_rate": 2.8760492530333595e-05,
"loss": 1.6132,
"step": 19840
},
{
"epoch": 749.433962264151,
"grad_norm": 1.4566587475130242,
"learning_rate": 2.869190096198354e-05,
"loss": 1.6236,
"step": 19860
},
{
"epoch": 750.188679245283,
"grad_norm": 1.3378675846922892,
"learning_rate": 2.8623362794925554e-05,
"loss": 1.6407,
"step": 19880
},
{
"epoch": 750.9433962264151,
"grad_norm": 1.1248642736382553,
"learning_rate": 2.85548782942234e-05,
"loss": 1.6328,
"step": 19900
},
{
"epoch": 751.6981132075472,
"grad_norm": 1.281060533625914,
"learning_rate": 2.8486447724733283e-05,
"loss": 1.6288,
"step": 19920
},
{
"epoch": 752.4528301886793,
"grad_norm": 1.2477580789710936,
"learning_rate": 2.841807135110286e-05,
"loss": 1.6129,
"step": 19940
},
{
"epoch": 753.2075471698113,
"grad_norm": 1.3050801379092132,
"learning_rate": 2.8349749437770146e-05,
"loss": 1.6259,
"step": 19960
},
{
"epoch": 753.9622641509434,
"grad_norm": 1.6556396088385372,
"learning_rate": 2.8281482248962588e-05,
"loss": 1.6264,
"step": 19980
},
{
"epoch": 754.7169811320755,
"grad_norm": 1.442836668716919,
"learning_rate": 2.8213270048695976e-05,
"loss": 1.6286,
"step": 20000
},
{
"epoch": 755.4716981132076,
"grad_norm": 1.3276233208619523,
"learning_rate": 2.814511310077342e-05,
"loss": 1.6485,
"step": 20020
},
{
"epoch": 756.2264150943396,
"grad_norm": 1.2751456415696178,
"learning_rate": 2.807701166878436e-05,
"loss": 1.622,
"step": 20040
},
{
"epoch": 756.9811320754717,
"grad_norm": 1.2003976158870355,
"learning_rate": 2.8008966016103532e-05,
"loss": 1.6002,
"step": 20060
},
{
"epoch": 757.7358490566038,
"grad_norm": 1.3873947326300384,
"learning_rate": 2.7940976405889962e-05,
"loss": 1.5892,
"step": 20080
},
{
"epoch": 758.4905660377359,
"grad_norm": 1.6648131685984493,
"learning_rate": 2.787304310108591e-05,
"loss": 1.6496,
"step": 20100
},
{
"epoch": 759.2452830188679,
"grad_norm": 1.4092462550250433,
"learning_rate": 2.780516636441591e-05,
"loss": 1.6222,
"step": 20120
},
{
"epoch": 760.0,
"grad_norm": 1.3221797397344044,
"learning_rate": 2.7737346458385732e-05,
"loss": 1.6276,
"step": 20140
},
{
"epoch": 760.7547169811321,
"grad_norm": 1.2328101453363856,
"learning_rate": 2.766958364528132e-05,
"loss": 1.6199,
"step": 20160
},
{
"epoch": 761.5094339622641,
"grad_norm": 1.198723191362267,
"learning_rate": 2.7601878187167865e-05,
"loss": 1.6028,
"step": 20180
},
{
"epoch": 762.2641509433962,
"grad_norm": 1.9424131363478752,
"learning_rate": 2.7534230345888686e-05,
"loss": 1.6155,
"step": 20200
},
{
"epoch": 763.0188679245283,
"grad_norm": 1.3568601924624037,
"learning_rate": 2.7466640383064343e-05,
"loss": 1.615,
"step": 20220
},
{
"epoch": 763.7735849056604,
"grad_norm": 1.6734295204532768,
"learning_rate": 2.7399108560091492e-05,
"loss": 1.6127,
"step": 20240
},
{
"epoch": 764.5283018867924,
"grad_norm": 1.3054727154474908,
"learning_rate": 2.7331635138141997e-05,
"loss": 1.6121,
"step": 20260
},
{
"epoch": 765.2830188679245,
"grad_norm": 1.4085434131191898,
"learning_rate": 2.7264220378161817e-05,
"loss": 1.5995,
"step": 20280
},
{
"epoch": 766.0377358490566,
"grad_norm": 1.2882798163186127,
"learning_rate": 2.719686454087006e-05,
"loss": 1.6209,
"step": 20300
},
{
"epoch": 766.7924528301887,
"grad_norm": 1.3843343328010425,
"learning_rate": 2.712956788675799e-05,
"loss": 1.6253,
"step": 20320
},
{
"epoch": 767.5471698113207,
"grad_norm": 1.2235858453276647,
"learning_rate": 2.7062330676087928e-05,
"loss": 1.5965,
"step": 20340
},
{
"epoch": 768.3018867924528,
"grad_norm": 3.572459256976869,
"learning_rate": 2.6995153168892342e-05,
"loss": 1.6146,
"step": 20360
},
{
"epoch": 769.0566037735849,
"grad_norm": 1.6994398915504043,
"learning_rate": 2.692803562497278e-05,
"loss": 1.6034,
"step": 20380
},
{
"epoch": 769.811320754717,
"grad_norm": 1.2122097844602269,
"learning_rate": 2.6860978303898913e-05,
"loss": 1.6133,
"step": 20400
},
{
"epoch": 770.566037735849,
"grad_norm": 1.5836773539567761,
"learning_rate": 2.6793981465007477e-05,
"loss": 1.6149,
"step": 20420
},
{
"epoch": 771.3207547169811,
"grad_norm": 1.9577284294586506,
"learning_rate": 2.6727045367401357e-05,
"loss": 1.6038,
"step": 20440
},
{
"epoch": 772.0754716981132,
"grad_norm": 1.453554282623515,
"learning_rate": 2.6660170269948445e-05,
"loss": 1.6425,
"step": 20460
},
{
"epoch": 772.8301886792453,
"grad_norm": 1.3031139346537821,
"learning_rate": 2.65933564312808e-05,
"loss": 1.5996,
"step": 20480
},
{
"epoch": 773.5849056603773,
"grad_norm": 1.2921513380534098,
"learning_rate": 2.6526604109793517e-05,
"loss": 1.6097,
"step": 20500
},
{
"epoch": 774.3396226415094,
"grad_norm": 1.2706024142950736,
"learning_rate": 2.6459913563643797e-05,
"loss": 1.6151,
"step": 20520
},
{
"epoch": 775.0943396226415,
"grad_norm": 1.3170228555500274,
"learning_rate": 2.6393285050749948e-05,
"loss": 1.6117,
"step": 20540
},
{
"epoch": 775.8490566037735,
"grad_norm": 1.4811497809397014,
"learning_rate": 2.6326718828790347e-05,
"loss": 1.6065,
"step": 20560
},
{
"epoch": 776.6037735849056,
"grad_norm": 1.3171774089155976,
"learning_rate": 2.6260215155202478e-05,
"loss": 1.5846,
"step": 20580
},
{
"epoch": 777.3584905660377,
"grad_norm": 1.4867958271178354,
"learning_rate": 2.6193774287181905e-05,
"loss": 1.6182,
"step": 20600
},
{
"epoch": 778.1132075471698,
"grad_norm": 1.485845846341643,
"learning_rate": 2.612739648168134e-05,
"loss": 1.618,
"step": 20620
},
{
"epoch": 778.8679245283018,
"grad_norm": 1.3411546152150449,
"learning_rate": 2.6061081995409594e-05,
"loss": 1.5979,
"step": 20640
},
{
"epoch": 779.622641509434,
"grad_norm": 1.352180099861608,
"learning_rate": 2.5994831084830585e-05,
"loss": 1.607,
"step": 20660
},
{
"epoch": 780.377358490566,
"grad_norm": 1.4317453454675355,
"learning_rate": 2.5928644006162356e-05,
"loss": 1.63,
"step": 20680
},
{
"epoch": 781.1320754716982,
"grad_norm": 1.3910753254665948,
"learning_rate": 2.5862521015376083e-05,
"loss": 1.6066,
"step": 20700
},
{
"epoch": 781.8867924528302,
"grad_norm": 1.3073180519851255,
"learning_rate": 2.579646236819513e-05,
"loss": 1.6064,
"step": 20720
},
{
"epoch": 782.6415094339623,
"grad_norm": 1.2595549167905473,
"learning_rate": 2.5730468320093977e-05,
"loss": 1.5911,
"step": 20740
},
{
"epoch": 783.3962264150944,
"grad_norm": 1.2678103789921547,
"learning_rate": 2.566453912629729e-05,
"loss": 1.5817,
"step": 20760
},
{
"epoch": 784.1509433962265,
"grad_norm": 1.5236492215060178,
"learning_rate": 2.5598675041778895e-05,
"loss": 1.6007,
"step": 20780
},
{
"epoch": 784.9056603773585,
"grad_norm": 1.4661421776894412,
"learning_rate": 2.553287632126086e-05,
"loss": 1.5504,
"step": 20800
},
{
"epoch": 785.6603773584906,
"grad_norm": 2.1927891520328635,
"learning_rate": 2.5467143219212452e-05,
"loss": 1.5841,
"step": 20820
},
{
"epoch": 786.4150943396227,
"grad_norm": 1.3795117819084444,
"learning_rate": 2.5401475989849135e-05,
"loss": 1.6066,
"step": 20840
},
{
"epoch": 787.1698113207547,
"grad_norm": 1.4556438165329462,
"learning_rate": 2.5335874887131648e-05,
"loss": 1.5968,
"step": 20860
},
{
"epoch": 787.9245283018868,
"grad_norm": 1.4073316916031215,
"learning_rate": 2.5270340164764954e-05,
"loss": 1.5903,
"step": 20880
},
{
"epoch": 788.6792452830189,
"grad_norm": 1.519045194155026,
"learning_rate": 2.5204872076197373e-05,
"loss": 1.6143,
"step": 20900
},
{
"epoch": 789.433962264151,
"grad_norm": 1.371854180935982,
"learning_rate": 2.513947087461945e-05,
"loss": 1.5956,
"step": 20920
},
{
"epoch": 790.188679245283,
"grad_norm": 1.3445443793198255,
"learning_rate": 2.5074136812963086e-05,
"loss": 1.6161,
"step": 20940
},
{
"epoch": 790.9433962264151,
"grad_norm": 1.3427364962397694,
"learning_rate": 2.5008870143900505e-05,
"loss": 1.5568,
"step": 20960
},
{
"epoch": 791.6981132075472,
"grad_norm": 1.2656549996025988,
"learning_rate": 2.4943671119843328e-05,
"loss": 1.5955,
"step": 20980
},
{
"epoch": 792.4528301886793,
"grad_norm": 1.4205258402430134,
"learning_rate": 2.4878539992941564e-05,
"loss": 1.5806,
"step": 21000
},
{
"epoch": 793.2075471698113,
"grad_norm": 1.6035321030423435,
"learning_rate": 2.4813477015082614e-05,
"loss": 1.6141,
"step": 21020
},
{
"epoch": 793.9622641509434,
"grad_norm": 1.411461969155631,
"learning_rate": 2.4748482437890327e-05,
"loss": 1.613,
"step": 21040
},
{
"epoch": 794.7169811320755,
"grad_norm": 1.5232357865305386,
"learning_rate": 2.4683556512724013e-05,
"loss": 1.5999,
"step": 21060
},
{
"epoch": 795.4716981132076,
"grad_norm": 1.423060839013135,
"learning_rate": 2.4618699490677522e-05,
"loss": 1.6014,
"step": 21080
},
{
"epoch": 796.2264150943396,
"grad_norm": 1.3310370009240546,
"learning_rate": 2.4553911622578173e-05,
"loss": 1.5633,
"step": 21100
},
{
"epoch": 796.9811320754717,
"grad_norm": 1.5449295211536895,
"learning_rate": 2.4489193158985862e-05,
"loss": 1.5948,
"step": 21120
},
{
"epoch": 797.7358490566038,
"grad_norm": 1.4953862144554202,
"learning_rate": 2.4424544350192054e-05,
"loss": 1.5576,
"step": 21140
},
{
"epoch": 798.4905660377359,
"grad_norm": 1.4322654299272977,
"learning_rate": 2.4359965446218893e-05,
"loss": 1.6043,
"step": 21160
},
{
"epoch": 799.2452830188679,
"grad_norm": 1.230403444648656,
"learning_rate": 2.4295456696818116e-05,
"loss": 1.5875,
"step": 21180
},
{
"epoch": 800.0,
"grad_norm": 1.2803680639521113,
"learning_rate": 2.423101835147014e-05,
"loss": 1.5929,
"step": 21200
},
{
"epoch": 800.7547169811321,
"grad_norm": 1.3170298641719804,
"learning_rate": 2.4166650659383118e-05,
"loss": 1.5807,
"step": 21220
},
{
"epoch": 801.5094339622641,
"grad_norm": 1.6269742919477346,
"learning_rate": 2.410235386949199e-05,
"loss": 1.6065,
"step": 21240
},
{
"epoch": 802.2641509433962,
"grad_norm": 1.5458337442207868,
"learning_rate": 2.4038128230457458e-05,
"loss": 1.5717,
"step": 21260
},
{
"epoch": 803.0188679245283,
"grad_norm": 2.161638931230412,
"learning_rate": 2.3973973990665043e-05,
"loss": 1.5762,
"step": 21280
},
{
"epoch": 803.7735849056604,
"grad_norm": 1.4046972399313973,
"learning_rate": 2.3909891398224146e-05,
"loss": 1.5661,
"step": 21300
},
{
"epoch": 804.5283018867924,
"grad_norm": 1.35050834441664,
"learning_rate": 2.3845880700967103e-05,
"loss": 1.5706,
"step": 21320
},
{
"epoch": 805.2830188679245,
"grad_norm": 1.5896148693041472,
"learning_rate": 2.3781942146448204e-05,
"loss": 1.5729,
"step": 21340
},
{
"epoch": 806.0377358490566,
"grad_norm": 1.5191801749378997,
"learning_rate": 2.3718075981942708e-05,
"loss": 1.5602,
"step": 21360
},
{
"epoch": 806.7924528301887,
"grad_norm": 1.1849456023631704,
"learning_rate": 2.3654282454445914e-05,
"loss": 1.5577,
"step": 21380
},
{
"epoch": 807.5471698113207,
"grad_norm": 1.6435607024595327,
"learning_rate": 2.3590561810672222e-05,
"loss": 1.5806,
"step": 21400
},
{
"epoch": 808.3018867924528,
"grad_norm": 2.2764964715817153,
"learning_rate": 2.3526914297054165e-05,
"loss": 1.5465,
"step": 21420
},
{
"epoch": 809.0566037735849,
"grad_norm": 1.143095634553467,
"learning_rate": 2.3463340159741438e-05,
"loss": 1.5608,
"step": 21440
},
{
"epoch": 809.811320754717,
"grad_norm": 1.786405965035776,
"learning_rate": 2.3399839644599966e-05,
"loss": 1.5685,
"step": 21460
},
{
"epoch": 810.566037735849,
"grad_norm": 1.7826688318895536,
"learning_rate": 2.3336412997210945e-05,
"loss": 1.5673,
"step": 21480
},
{
"epoch": 811.3207547169811,
"grad_norm": 1.9973557900265262,
"learning_rate": 2.3273060462869915e-05,
"loss": 1.58,
"step": 21500
},
{
"epoch": 812.0754716981132,
"grad_norm": 2.725826480276118,
"learning_rate": 2.320978228658578e-05,
"loss": 1.5798,
"step": 21520
},
{
"epoch": 812.8301886792453,
"grad_norm": 1.3785885365626125,
"learning_rate": 2.3146578713079873e-05,
"loss": 1.584,
"step": 21540
},
{
"epoch": 813.5849056603773,
"grad_norm": 1.3942585361528321,
"learning_rate": 2.308344998678499e-05,
"loss": 1.5801,
"step": 21560
},
{
"epoch": 814.3396226415094,
"grad_norm": 1.3778634108496939,
"learning_rate": 2.3020396351844476e-05,
"loss": 1.587,
"step": 21580
},
{
"epoch": 815.0943396226415,
"grad_norm": 1.3226870132637325,
"learning_rate": 2.2957418052111304e-05,
"loss": 1.5666,
"step": 21600
},
{
"epoch": 815.8490566037735,
"grad_norm": 1.4483636324260303,
"learning_rate": 2.2894515331147043e-05,
"loss": 1.5721,
"step": 21620
},
{
"epoch": 816.6037735849056,
"grad_norm": 1.2510733452498213,
"learning_rate": 2.2831688432220988e-05,
"loss": 1.5909,
"step": 21640
},
{
"epoch": 817.3584905660377,
"grad_norm": 1.4064726551185514,
"learning_rate": 2.2768937598309226e-05,
"loss": 1.5581,
"step": 21660
},
{
"epoch": 818.1132075471698,
"grad_norm": 1.3598025710319712,
"learning_rate": 2.2706263072093622e-05,
"loss": 1.5798,
"step": 21680
},
{
"epoch": 818.8679245283018,
"grad_norm": 1.4055798705480538,
"learning_rate": 2.2643665095960992e-05,
"loss": 1.5376,
"step": 21700
},
{
"epoch": 819.622641509434,
"grad_norm": 1.3886244820387288,
"learning_rate": 2.258114391200204e-05,
"loss": 1.588,
"step": 21720
},
{
"epoch": 820.377358490566,
"grad_norm": 1.672353853467523,
"learning_rate": 2.2518699762010527e-05,
"loss": 1.5771,
"step": 21740
},
{
"epoch": 821.1320754716982,
"grad_norm": 1.6122695109482281,
"learning_rate": 2.245633288748226e-05,
"loss": 1.5744,
"step": 21760
},
{
"epoch": 821.8867924528302,
"grad_norm": 1.2184243938930763,
"learning_rate": 2.239404352961424e-05,
"loss": 1.5579,
"step": 21780
},
{
"epoch": 822.6415094339623,
"grad_norm": 2.6739030707563383,
"learning_rate": 2.233183192930362e-05,
"loss": 1.5742,
"step": 21800
},
{
"epoch": 823.3962264150944,
"grad_norm": 1.513583565471533,
"learning_rate": 2.22696983271469e-05,
"loss": 1.5543,
"step": 21820
},
{
"epoch": 824.1509433962265,
"grad_norm": 1.5062076381870015,
"learning_rate": 2.2207642963438875e-05,
"loss": 1.5578,
"step": 21840
},
{
"epoch": 824.9056603773585,
"grad_norm": 1.4515191009181103,
"learning_rate": 2.2145666078171794e-05,
"loss": 1.5599,
"step": 21860
},
{
"epoch": 825.6603773584906,
"grad_norm": 1.7800885670540134,
"learning_rate": 2.2083767911034394e-05,
"loss": 1.5724,
"step": 21880
},
{
"epoch": 826.4150943396227,
"grad_norm": 1.655570469233021,
"learning_rate": 2.2021948701410956e-05,
"loss": 1.5722,
"step": 21900
},
{
"epoch": 827.1698113207547,
"grad_norm": 1.682338091450034,
"learning_rate": 2.1960208688380426e-05,
"loss": 1.5289,
"step": 21920
},
{
"epoch": 827.9245283018868,
"grad_norm": 1.3769805944636337,
"learning_rate": 2.189854811071546e-05,
"loss": 1.5523,
"step": 21940
},
{
"epoch": 828.6792452830189,
"grad_norm": 1.2988448014856364,
"learning_rate": 2.183696720688152e-05,
"loss": 1.5493,
"step": 21960
},
{
"epoch": 829.433962264151,
"grad_norm": 1.352528590030774,
"learning_rate": 2.1775466215035887e-05,
"loss": 1.5505,
"step": 21980
},
{
"epoch": 830.188679245283,
"grad_norm": 1.9587571716355492,
"learning_rate": 2.1714045373026878e-05,
"loss": 1.5611,
"step": 22000
},
{
"epoch": 830.9433962264151,
"grad_norm": 1.4092678213797292,
"learning_rate": 2.165270491839274e-05,
"loss": 1.5799,
"step": 22020
},
{
"epoch": 831.6981132075472,
"grad_norm": 1.2980309483736483,
"learning_rate": 2.159144508836092e-05,
"loss": 1.5409,
"step": 22040
},
{
"epoch": 832.4528301886793,
"grad_norm": 2.367411086569801,
"learning_rate": 2.1530266119847e-05,
"loss": 1.5565,
"step": 22060
},
{
"epoch": 833.2075471698113,
"grad_norm": 1.4677294354247894,
"learning_rate": 2.146916824945386e-05,
"loss": 1.567,
"step": 22080
},
{
"epoch": 833.9622641509434,
"grad_norm": 1.2034171336508228,
"learning_rate": 2.1408151713470727e-05,
"loss": 1.5324,
"step": 22100
},
{
"epoch": 834.7169811320755,
"grad_norm": 1.6112142759671855,
"learning_rate": 2.1347216747872316e-05,
"loss": 1.5728,
"step": 22120
},
{
"epoch": 835.4716981132076,
"grad_norm": 2.3612009460762025,
"learning_rate": 2.1286363588317815e-05,
"loss": 1.5777,
"step": 22140
},
{
"epoch": 836.2264150943396,
"grad_norm": 1.3794177780422423,
"learning_rate": 2.122559247015011e-05,
"loss": 1.5337,
"step": 22160
},
{
"epoch": 836.9811320754717,
"grad_norm": 1.4913217058342938,
"learning_rate": 2.116490362839475e-05,
"loss": 1.5712,
"step": 22180
},
{
"epoch": 837.7358490566038,
"grad_norm": 1.393269094002593,
"learning_rate": 2.1104297297759077e-05,
"loss": 1.56,
"step": 22200
},
{
"epoch": 838.4905660377359,
"grad_norm": 1.5277254368751014,
"learning_rate": 2.104377371263138e-05,
"loss": 1.564,
"step": 22220
},
{
"epoch": 839.2452830188679,
"grad_norm": 1.8220574387124733,
"learning_rate": 2.0983333107079923e-05,
"loss": 1.593,
"step": 22240
},
{
"epoch": 840.0,
"grad_norm": 1.4636327213867844,
"learning_rate": 2.0922975714852024e-05,
"loss": 1.5482,
"step": 22260
},
{
"epoch": 840.7547169811321,
"grad_norm": 1.374724993121681,
"learning_rate": 2.0862701769373194e-05,
"loss": 1.5386,
"step": 22280
},
{
"epoch": 841.5094339622641,
"grad_norm": 1.3056844963466483,
"learning_rate": 2.0802511503746282e-05,
"loss": 1.5499,
"step": 22300
},
{
"epoch": 842.2641509433962,
"grad_norm": 1.8941001751457995,
"learning_rate": 2.074240515075041e-05,
"loss": 1.5556,
"step": 22320
},
{
"epoch": 843.0188679245283,
"grad_norm": 1.5811456544096827,
"learning_rate": 2.0682382942840276e-05,
"loss": 1.5301,
"step": 22340
},
{
"epoch": 843.7735849056604,
"grad_norm": 1.6509929914813097,
"learning_rate": 2.062244511214511e-05,
"loss": 1.5114,
"step": 22360
},
{
"epoch": 844.5283018867924,
"grad_norm": 1.7262725135545645,
"learning_rate": 2.0562591890467795e-05,
"loss": 1.5771,
"step": 22380
},
{
"epoch": 845.2830188679245,
"grad_norm": 2.3494461416325176,
"learning_rate": 2.050282350928407e-05,
"loss": 1.5355,
"step": 22400
},
{
"epoch": 846.0377358490566,
"grad_norm": 1.5449531783263548,
"learning_rate": 2.0443140199741506e-05,
"loss": 1.5322,
"step": 22420
},
{
"epoch": 846.7924528301887,
"grad_norm": 1.6993440968380624,
"learning_rate": 2.0383542192658678e-05,
"loss": 1.5595,
"step": 22440
},
{
"epoch": 847.5471698113207,
"grad_norm": 1.4219970620295765,
"learning_rate": 2.0324029718524266e-05,
"loss": 1.544,
"step": 22460
},
{
"epoch": 848.3018867924528,
"grad_norm": 1.4581628071481192,
"learning_rate": 2.0264603007496174e-05,
"loss": 1.5504,
"step": 22480
},
{
"epoch": 849.0566037735849,
"grad_norm": 1.7218288706081564,
"learning_rate": 2.0205262289400635e-05,
"loss": 1.5329,
"step": 22500
},
{
"epoch": 849.811320754717,
"grad_norm": 1.557573117936356,
"learning_rate": 2.0146007793731277e-05,
"loss": 1.5413,
"step": 22520
},
{
"epoch": 850.566037735849,
"grad_norm": 1.556424340318002,
"learning_rate": 2.0086839749648294e-05,
"loss": 1.585,
"step": 22540
},
{
"epoch": 851.3207547169811,
"grad_norm": 1.5130697235799593,
"learning_rate": 2.002775838597753e-05,
"loss": 1.5365,
"step": 22560
},
{
"epoch": 852.0754716981132,
"grad_norm": 1.8393652727073544,
"learning_rate": 1.9968763931209628e-05,
"loss": 1.5459,
"step": 22580
},
{
"epoch": 852.8301886792453,
"grad_norm": 1.5587158507011118,
"learning_rate": 1.9909856613499096e-05,
"loss": 1.5429,
"step": 22600
},
{
"epoch": 853.5849056603773,
"grad_norm": 1.5786253886757977,
"learning_rate": 1.9851036660663427e-05,
"loss": 1.5293,
"step": 22620
},
{
"epoch": 854.3396226415094,
"grad_norm": 1.6955187366248636,
"learning_rate": 1.9792304300182305e-05,
"loss": 1.5488,
"step": 22640
},
{
"epoch": 855.0943396226415,
"grad_norm": 1.429545844614554,
"learning_rate": 1.9733659759196588e-05,
"loss": 1.533,
"step": 22660
},
{
"epoch": 855.8490566037735,
"grad_norm": 1.3624588099774164,
"learning_rate": 1.967510326450757e-05,
"loss": 1.5257,
"step": 22680
},
{
"epoch": 856.6037735849056,
"grad_norm": 1.4701659884745055,
"learning_rate": 1.9616635042575986e-05,
"loss": 1.5579,
"step": 22700
},
{
"epoch": 857.3584905660377,
"grad_norm": 1.258458227155755,
"learning_rate": 1.9558255319521186e-05,
"loss": 1.5174,
"step": 22720
},
{
"epoch": 858.1132075471698,
"grad_norm": 1.147380018733113,
"learning_rate": 1.9499964321120298e-05,
"loss": 1.5483,
"step": 22740
},
{
"epoch": 858.8679245283018,
"grad_norm": 1.427160544906616,
"learning_rate": 1.9441762272807296e-05,
"loss": 1.53,
"step": 22760
},
{
"epoch": 859.622641509434,
"grad_norm": 1.480555621655005,
"learning_rate": 1.9383649399672136e-05,
"loss": 1.5431,
"step": 22780
},
{
"epoch": 860.377358490566,
"grad_norm": 1.6140763796883943,
"learning_rate": 1.9325625926459906e-05,
"loss": 1.5372,
"step": 22800
},
{
"epoch": 861.1320754716982,
"grad_norm": 1.3659868727706357,
"learning_rate": 1.9267692077569966e-05,
"loss": 1.5693,
"step": 22820
},
{
"epoch": 861.8867924528302,
"grad_norm": 1.668704322839176,
"learning_rate": 1.9209848077055063e-05,
"loss": 1.5491,
"step": 22840
},
{
"epoch": 862.6415094339623,
"grad_norm": 1.6416845244091214,
"learning_rate": 1.915209414862045e-05,
"loss": 1.5449,
"step": 22860
},
{
"epoch": 863.3962264150944,
"grad_norm": 1.5619688603918687,
"learning_rate": 1.9094430515623036e-05,
"loss": 1.5109,
"step": 22880
},
{
"epoch": 864.1509433962265,
"grad_norm": 1.5251429637162535,
"learning_rate": 1.9036857401070517e-05,
"loss": 1.5358,
"step": 22900
},
{
"epoch": 864.9056603773585,
"grad_norm": 1.6195136008209567,
"learning_rate": 1.8979375027620553e-05,
"loss": 1.5167,
"step": 22920
},
{
"epoch": 865.6603773584906,
"grad_norm": 1.4453402703839808,
"learning_rate": 1.8921983617579843e-05,
"loss": 1.5345,
"step": 22940
},
{
"epoch": 866.4150943396227,
"grad_norm": 1.6142287693511135,
"learning_rate": 1.8864683392903296e-05,
"loss": 1.5427,
"step": 22960
},
{
"epoch": 867.1698113207547,
"grad_norm": 1.4589091367603184,
"learning_rate": 1.880747457519317e-05,
"loss": 1.4945,
"step": 22980
},
{
"epoch": 867.9245283018868,
"grad_norm": 1.485668957375296,
"learning_rate": 1.8750357385698233e-05,
"loss": 1.5278,
"step": 23000
},
{
"epoch": 868.6792452830189,
"grad_norm": 1.4865684774055008,
"learning_rate": 1.8693332045312905e-05,
"loss": 1.5178,
"step": 23020
},
{
"epoch": 869.433962264151,
"grad_norm": 1.6955473002125137,
"learning_rate": 1.8636398774576337e-05,
"loss": 1.5485,
"step": 23040
},
{
"epoch": 870.188679245283,
"grad_norm": 1.5715186186512253,
"learning_rate": 1.857955779367166e-05,
"loss": 1.5192,
"step": 23060
},
{
"epoch": 870.9433962264151,
"grad_norm": 1.5717069835325073,
"learning_rate": 1.8522809322425036e-05,
"loss": 1.5106,
"step": 23080
},
{
"epoch": 871.6981132075472,
"grad_norm": 1.3775027498551788,
"learning_rate": 1.8466153580304923e-05,
"loss": 1.5255,
"step": 23100
},
{
"epoch": 872.4528301886793,
"grad_norm": 1.7060704667189681,
"learning_rate": 1.8409590786421106e-05,
"loss": 1.5152,
"step": 23120
},
{
"epoch": 873.2075471698113,
"grad_norm": 1.3772746674273528,
"learning_rate": 1.8353121159523913e-05,
"loss": 1.4952,
"step": 23140
},
{
"epoch": 873.9622641509434,
"grad_norm": 1.6021480905291907,
"learning_rate": 1.8296744918003365e-05,
"loss": 1.5548,
"step": 23160
},
{
"epoch": 874.7169811320755,
"grad_norm": 1.6510954563611369,
"learning_rate": 1.8240462279888328e-05,
"loss": 1.5341,
"step": 23180
},
{
"epoch": 875.4716981132076,
"grad_norm": 1.5525128595509998,
"learning_rate": 1.8184273462845678e-05,
"loss": 1.5399,
"step": 23200
},
{
"epoch": 876.2264150943396,
"grad_norm": 1.3584051699815205,
"learning_rate": 1.812817868417943e-05,
"loss": 1.5245,
"step": 23220
},
{
"epoch": 876.9811320754717,
"grad_norm": 1.909931733744526,
"learning_rate": 1.8072178160829906e-05,
"loss": 1.5333,
"step": 23240
},
{
"epoch": 877.7358490566038,
"grad_norm": 1.7102569423853409,
"learning_rate": 1.8016272109372925e-05,
"loss": 1.5131,
"step": 23260
},
{
"epoch": 878.4905660377359,
"grad_norm": 2.4326218341752384,
"learning_rate": 1.7960460746018958e-05,
"loss": 1.4983,
"step": 23280
},
{
"epoch": 879.2452830188679,
"grad_norm": 1.6888708257619338,
"learning_rate": 1.790474428661225e-05,
"loss": 1.5268,
"step": 23300
},
{
"epoch": 880.0,
"grad_norm": 1.4793278776392822,
"learning_rate": 1.784912294663003e-05,
"loss": 1.5144,
"step": 23320
},
{
"epoch": 880.7547169811321,
"grad_norm": 1.3797110952325906,
"learning_rate": 1.7793596941181667e-05,
"loss": 1.5224,
"step": 23340
},
{
"epoch": 881.5094339622641,
"grad_norm": 1.5055338530715117,
"learning_rate": 1.7738166485007843e-05,
"loss": 1.5276,
"step": 23360
},
{
"epoch": 882.2641509433962,
"grad_norm": 1.3850071229139178,
"learning_rate": 1.768283179247969e-05,
"loss": 1.5216,
"step": 23380
},
{
"epoch": 883.0188679245283,
"grad_norm": 1.4681066166997387,
"learning_rate": 1.7627593077597997e-05,
"loss": 1.534,
"step": 23400
},
{
"epoch": 883.7735849056604,
"grad_norm": 1.5242995737679692,
"learning_rate": 1.7572450553992356e-05,
"loss": 1.4992,
"step": 23420
},
{
"epoch": 884.5283018867924,
"grad_norm": 1.642787390621851,
"learning_rate": 1.751740443492039e-05,
"loss": 1.5002,
"step": 23440
},
{
"epoch": 885.2830188679245,
"grad_norm": 1.490074296578881,
"learning_rate": 1.7462454933266846e-05,
"loss": 1.5211,
"step": 23460
},
{
"epoch": 886.0377358490566,
"grad_norm": 1.5694629977285655,
"learning_rate": 1.740760226154283e-05,
"loss": 1.5335,
"step": 23480
},
{
"epoch": 886.7924528301887,
"grad_norm": 1.6846894322403163,
"learning_rate": 1.7352846631884956e-05,
"loss": 1.4995,
"step": 23500
},
{
"epoch": 887.5471698113207,
"grad_norm": 1.4525398790667088,
"learning_rate": 1.7298188256054564e-05,
"loss": 1.4957,
"step": 23520
},
{
"epoch": 888.3018867924528,
"grad_norm": 1.635106498771857,
"learning_rate": 1.7243627345436874e-05,
"loss": 1.5271,
"step": 23540
},
{
"epoch": 889.0566037735849,
"grad_norm": 1.4587656230559394,
"learning_rate": 1.7189164111040147e-05,
"loss": 1.501,
"step": 23560
},
{
"epoch": 889.811320754717,
"grad_norm": 1.5410070982779924,
"learning_rate": 1.71347987634949e-05,
"loss": 1.4982,
"step": 23580
},
{
"epoch": 890.566037735849,
"grad_norm": 1.5645035336411055,
"learning_rate": 1.708053151305308e-05,
"loss": 1.5002,
"step": 23600
},
{
"epoch": 891.3207547169811,
"grad_norm": 1.3307742805961782,
"learning_rate": 1.702636256958728e-05,
"loss": 1.5184,
"step": 23620
},
{
"epoch": 892.0754716981132,
"grad_norm": 1.6962843737118656,
"learning_rate": 1.6972292142589877e-05,
"loss": 1.5107,
"step": 23640
},
{
"epoch": 892.8301886792453,
"grad_norm": 1.8950680189724871,
"learning_rate": 1.6918320441172233e-05,
"loss": 1.517,
"step": 23660
},
{
"epoch": 893.5849056603773,
"grad_norm": 1.7479434721374532,
"learning_rate": 1.686444767406395e-05,
"loss": 1.5051,
"step": 23680
},
{
"epoch": 894.3396226415094,
"grad_norm": 1.8611101959164753,
"learning_rate": 1.6810674049611953e-05,
"loss": 1.5063,
"step": 23700
},
{
"epoch": 895.0943396226415,
"grad_norm": 1.5841028344361991,
"learning_rate": 1.67569997757798e-05,
"loss": 1.481,
"step": 23720
},
{
"epoch": 895.8490566037735,
"grad_norm": 1.5025051335412982,
"learning_rate": 1.6703425060146778e-05,
"loss": 1.5253,
"step": 23740
},
{
"epoch": 896.6037735849056,
"grad_norm": 2.8439948944917757,
"learning_rate": 1.6649950109907165e-05,
"loss": 1.5216,
"step": 23760
},
{
"epoch": 897.3584905660377,
"grad_norm": 1.6268608502019901,
"learning_rate": 1.6596575131869387e-05,
"loss": 1.5334,
"step": 23780
},
{
"epoch": 898.1132075471698,
"grad_norm": 1.4759450457116179,
"learning_rate": 1.6543300332455273e-05,
"loss": 1.5007,
"step": 23800
},
{
"epoch": 898.8679245283018,
"grad_norm": 1.4818248018036755,
"learning_rate": 1.6490125917699203e-05,
"loss": 1.4973,
"step": 23820
},
{
"epoch": 899.622641509434,
"grad_norm": 1.548616527993675,
"learning_rate": 1.6437052093247303e-05,
"loss": 1.517,
"step": 23840
},
{
"epoch": 900.377358490566,
"grad_norm": 1.5445734121981956,
"learning_rate": 1.6384079064356744e-05,
"loss": 1.521,
"step": 23860
},
{
"epoch": 901.1320754716982,
"grad_norm": 1.5970555623190617,
"learning_rate": 1.6331207035894806e-05,
"loss": 1.5172,
"step": 23880
},
{
"epoch": 901.8867924528302,
"grad_norm": 1.389904429038452,
"learning_rate": 1.6278436212338226e-05,
"loss": 1.4987,
"step": 23900
},
{
"epoch": 902.6415094339623,
"grad_norm": 1.3455191149235926,
"learning_rate": 1.62257667977723e-05,
"loss": 1.5047,
"step": 23920
},
{
"epoch": 903.3962264150944,
"grad_norm": 1.4729168638466097,
"learning_rate": 1.6173198995890152e-05,
"loss": 1.5032,
"step": 23940
},
{
"epoch": 904.1509433962265,
"grad_norm": 1.5230989764955487,
"learning_rate": 1.612073300999191e-05,
"loss": 1.5244,
"step": 23960
},
{
"epoch": 904.9056603773585,
"grad_norm": 1.4504907356107584,
"learning_rate": 1.6068369042983987e-05,
"loss": 1.5072,
"step": 23980
},
{
"epoch": 905.6603773584906,
"grad_norm": 1.3570035581449431,
"learning_rate": 1.601610729737819e-05,
"loss": 1.5002,
"step": 24000
},
{
"epoch": 906.4150943396227,
"grad_norm": 1.408532335123701,
"learning_rate": 1.5963947975291056e-05,
"loss": 1.4974,
"step": 24020
},
{
"epoch": 907.1698113207547,
"grad_norm": 1.6703383627319723,
"learning_rate": 1.591189127844295e-05,
"loss": 1.5056,
"step": 24040
},
{
"epoch": 907.9245283018868,
"grad_norm": 1.4548307957349456,
"learning_rate": 1.5859937408157403e-05,
"loss": 1.4836,
"step": 24060
},
{
"epoch": 908.6792452830189,
"grad_norm": 1.622725332424491,
"learning_rate": 1.5808086565360235e-05,
"loss": 1.4652,
"step": 24080
},
{
"epoch": 909.433962264151,
"grad_norm": 1.9382762093036214,
"learning_rate": 1.575633895057883e-05,
"loss": 1.507,
"step": 24100
},
{
"epoch": 910.188679245283,
"grad_norm": 5.171486198720905,
"learning_rate": 1.5704694763941345e-05,
"loss": 1.4918,
"step": 24120
},
{
"epoch": 910.9433962264151,
"grad_norm": 1.318697524518072,
"learning_rate": 1.5653154205175963e-05,
"loss": 1.485,
"step": 24140
},
{
"epoch": 911.6981132075472,
"grad_norm": 1.640456368314345,
"learning_rate": 1.5601717473610066e-05,
"loss": 1.493,
"step": 24160
},
{
"epoch": 912.4528301886793,
"grad_norm": 1.7783411819352481,
"learning_rate": 1.555038476816951e-05,
"loss": 1.5233,
"step": 24180
},
{
"epoch": 913.2075471698113,
"grad_norm": 1.8560943552673308,
"learning_rate": 1.5499156287377857e-05,
"loss": 1.4845,
"step": 24200
},
{
"epoch": 913.9622641509434,
"grad_norm": 1.3922157561757162,
"learning_rate": 1.544803222935555e-05,
"loss": 1.513,
"step": 24220
},
{
"epoch": 914.7169811320755,
"grad_norm": 1.5964166307266414,
"learning_rate": 1.5397012791819248e-05,
"loss": 1.5029,
"step": 24240
},
{
"epoch": 915.4716981132076,
"grad_norm": 1.581271765982569,
"learning_rate": 1.5346098172080947e-05,
"loss": 1.5139,
"step": 24260
},
{
"epoch": 916.2264150943396,
"grad_norm": 1.3829789961056094,
"learning_rate": 1.5295288567047304e-05,
"loss": 1.4727,
"step": 24280
},
{
"epoch": 916.9811320754717,
"grad_norm": 1.595484488791353,
"learning_rate": 1.5244584173218816e-05,
"loss": 1.4764,
"step": 24300
},
{
"epoch": 917.7358490566038,
"grad_norm": 1.9817110984943331,
"learning_rate": 1.5193985186689126e-05,
"loss": 1.488,
"step": 24320
},
{
"epoch": 918.4905660377359,
"grad_norm": 1.5041365073617188,
"learning_rate": 1.5143491803144183e-05,
"loss": 1.4823,
"step": 24340
},
{
"epoch": 919.2452830188679,
"grad_norm": 1.623717820636255,
"learning_rate": 1.5093104217861574e-05,
"loss": 1.4711,
"step": 24360
},
{
"epoch": 920.0,
"grad_norm": 1.4153896302283269,
"learning_rate": 1.5042822625709687e-05,
"loss": 1.4729,
"step": 24380
},
{
"epoch": 920.7547169811321,
"grad_norm": 1.8914526627670851,
"learning_rate": 1.499264722114699e-05,
"loss": 1.4744,
"step": 24400
},
{
"epoch": 921.5094339622641,
"grad_norm": 1.3579367015171855,
"learning_rate": 1.494257819822132e-05,
"loss": 1.5068,
"step": 24420
},
{
"epoch": 922.2641509433962,
"grad_norm": 1.7241565511209502,
"learning_rate": 1.4892615750569062e-05,
"loss": 1.4629,
"step": 24440
},
{
"epoch": 923.0188679245283,
"grad_norm": 1.6169769566812962,
"learning_rate": 1.4842760071414446e-05,
"loss": 1.4987,
"step": 24460
},
{
"epoch": 923.7735849056604,
"grad_norm": 1.9954016377464863,
"learning_rate": 1.4793011353568764e-05,
"loss": 1.5263,
"step": 24480
},
{
"epoch": 924.5283018867924,
"grad_norm": 1.4779174235189176,
"learning_rate": 1.4743369789429686e-05,
"loss": 1.4769,
"step": 24500
},
{
"epoch": 925.2830188679245,
"grad_norm": 1.7019641943900714,
"learning_rate": 1.4693835570980468e-05,
"loss": 1.4749,
"step": 24520
},
{
"epoch": 926.0377358490566,
"grad_norm": 1.5323014302848716,
"learning_rate": 1.4644408889789189e-05,
"loss": 1.4984,
"step": 24540
},
{
"epoch": 926.7924528301887,
"grad_norm": 1.446942162217049,
"learning_rate": 1.4595089937008062e-05,
"loss": 1.4998,
"step": 24560
},
{
"epoch": 927.5471698113207,
"grad_norm": 1.3609927181175356,
"learning_rate": 1.4545878903372663e-05,
"loss": 1.4765,
"step": 24580
},
{
"epoch": 928.3018867924528,
"grad_norm": 1.4584582755904496,
"learning_rate": 1.4496775979201224e-05,
"loss": 1.4828,
"step": 24600
},
{
"epoch": 929.0566037735849,
"grad_norm": 1.4254389674669559,
"learning_rate": 1.444778135439385e-05,
"loss": 1.5041,
"step": 24620
},
{
"epoch": 929.811320754717,
"grad_norm": 1.5655038573484212,
"learning_rate": 1.4398895218431825e-05,
"loss": 1.4995,
"step": 24640
},
{
"epoch": 930.566037735849,
"grad_norm": 1.623569066402965,
"learning_rate": 1.4350117760376843e-05,
"loss": 1.4966,
"step": 24660
},
{
"epoch": 931.3207547169811,
"grad_norm": 1.594778698950599,
"learning_rate": 1.4301449168870325e-05,
"loss": 1.4899,
"step": 24680
},
{
"epoch": 932.0754716981132,
"grad_norm": 1.7627482209727463,
"learning_rate": 1.4252889632132667e-05,
"loss": 1.4784,
"step": 24700
},
{
"epoch": 932.8301886792453,
"grad_norm": 1.5595702425460922,
"learning_rate": 1.4204439337962486e-05,
"loss": 1.4962,
"step": 24720
},
{
"epoch": 933.5849056603773,
"grad_norm": 1.6175712268221147,
"learning_rate": 1.4156098473735903e-05,
"loss": 1.4858,
"step": 24740
},
{
"epoch": 934.3396226415094,
"grad_norm": 1.5528087670883148,
"learning_rate": 1.4107867226405882e-05,
"loss": 1.4959,
"step": 24760
},
{
"epoch": 935.0943396226415,
"grad_norm": 1.5105693139489524,
"learning_rate": 1.4059745782501403e-05,
"loss": 1.4694,
"step": 24780
},
{
"epoch": 935.8490566037735,
"grad_norm": 1.424625384350829,
"learning_rate": 1.4011734328126825e-05,
"loss": 1.4531,
"step": 24800
},
{
"epoch": 936.6037735849056,
"grad_norm": 1.921412092336305,
"learning_rate": 1.3963833048961103e-05,
"loss": 1.5003,
"step": 24820
},
{
"epoch": 937.3584905660377,
"grad_norm": 1.5289456190701718,
"learning_rate": 1.3916042130257145e-05,
"loss": 1.5177,
"step": 24840
},
{
"epoch": 938.1132075471698,
"grad_norm": 1.410017115369323,
"learning_rate": 1.3868361756841036e-05,
"loss": 1.4957,
"step": 24860
},
{
"epoch": 938.8679245283018,
"grad_norm": 1.3741594118478162,
"learning_rate": 1.3820792113111323e-05,
"loss": 1.4876,
"step": 24880
},
{
"epoch": 939.622641509434,
"grad_norm": 1.5111524219290895,
"learning_rate": 1.377333338303833e-05,
"loss": 1.4789,
"step": 24900
},
{
"epoch": 940.377358490566,
"grad_norm": 1.2690279082779223,
"learning_rate": 1.3725985750163418e-05,
"loss": 1.4851,
"step": 24920
},
{
"epoch": 941.1320754716982,
"grad_norm": 1.5760629816984877,
"learning_rate": 1.3678749397598337e-05,
"loss": 1.4993,
"step": 24940
},
{
"epoch": 941.8867924528302,
"grad_norm": 1.5719387109025893,
"learning_rate": 1.363162450802443e-05,
"loss": 1.4654,
"step": 24960
},
{
"epoch": 942.6415094339623,
"grad_norm": 1.51578687737706,
"learning_rate": 1.3584611263691974e-05,
"loss": 1.4985,
"step": 24980
},
{
"epoch": 943.3962264150944,
"grad_norm": 1.5864417766142165,
"learning_rate": 1.353770984641948e-05,
"loss": 1.4891,
"step": 25000
},
{
"epoch": 944.1509433962265,
"grad_norm": 1.5330683898736195,
"learning_rate": 1.3490920437592985e-05,
"loss": 1.4928,
"step": 25020
},
{
"epoch": 944.9056603773585,
"grad_norm": 1.8666313722767156,
"learning_rate": 1.344424321816535e-05,
"loss": 1.4558,
"step": 25040
},
{
"epoch": 945.6603773584906,
"grad_norm": 1.4103376741909914,
"learning_rate": 1.3397678368655534e-05,
"loss": 1.467,
"step": 25060
},
{
"epoch": 946.4150943396227,
"grad_norm": 1.6978974580611665,
"learning_rate": 1.3351226069147934e-05,
"loss": 1.4586,
"step": 25080
},
{
"epoch": 947.1698113207547,
"grad_norm": 1.3043741098462962,
"learning_rate": 1.3304886499291653e-05,
"loss": 1.4651,
"step": 25100
},
{
"epoch": 947.9245283018868,
"grad_norm": 1.5721530761043376,
"learning_rate": 1.3258659838299863e-05,
"loss": 1.4851,
"step": 25120
},
{
"epoch": 948.6792452830189,
"grad_norm": 2.445174125656233,
"learning_rate": 1.3212546264949038e-05,
"loss": 1.4861,
"step": 25140
},
{
"epoch": 949.433962264151,
"grad_norm": 3.0455557993861584,
"learning_rate": 1.3166545957578312e-05,
"loss": 1.4956,
"step": 25160
},
{
"epoch": 950.188679245283,
"grad_norm": 1.481231036001675,
"learning_rate": 1.3120659094088763e-05,
"loss": 1.4786,
"step": 25180
},
{
"epoch": 950.9433962264151,
"grad_norm": 1.6177001101633584,
"learning_rate": 1.3074885851942757e-05,
"loss": 1.4691,
"step": 25200
},
{
"epoch": 951.6981132075472,
"grad_norm": 1.7370265253795278,
"learning_rate": 1.3029226408163237e-05,
"loss": 1.456,
"step": 25220
},
{
"epoch": 952.4528301886793,
"grad_norm": 1.476098649785593,
"learning_rate": 1.2983680939333043e-05,
"loss": 1.457,
"step": 25240
},
{
"epoch": 953.2075471698113,
"grad_norm": 1.9700691780666086,
"learning_rate": 1.2938249621594219e-05,
"loss": 1.4916,
"step": 25260
},
{
"epoch": 953.9622641509434,
"grad_norm": 1.4124078828516038,
"learning_rate": 1.289293263064734e-05,
"loss": 1.4442,
"step": 25280
},
{
"epoch": 954.7169811320755,
"grad_norm": 1.609015057343637,
"learning_rate": 1.284773014175086e-05,
"loss": 1.4808,
"step": 25300
},
{
"epoch": 955.4716981132076,
"grad_norm": 1.545457288749583,
"learning_rate": 1.2802642329720385e-05,
"loss": 1.4388,
"step": 25320
},
{
"epoch": 956.2264150943396,
"grad_norm": 1.4137648487617847,
"learning_rate": 1.275766936892803e-05,
"loss": 1.4558,
"step": 25340
},
{
"epoch": 956.9811320754717,
"grad_norm": 1.7375121010804517,
"learning_rate": 1.2712811433301723e-05,
"loss": 1.4864,
"step": 25360
},
{
"epoch": 957.7358490566038,
"grad_norm": 2.170614678870875,
"learning_rate": 1.2668068696324572e-05,
"loss": 1.4668,
"step": 25380
},
{
"epoch": 958.4905660377359,
"grad_norm": 1.3921099231821001,
"learning_rate": 1.2623441331034153e-05,
"loss": 1.466,
"step": 25400
},
{
"epoch": 959.2452830188679,
"grad_norm": 1.763881906266782,
"learning_rate": 1.2578929510021851e-05,
"loss": 1.4556,
"step": 25420
},
{
"epoch": 960.0,
"grad_norm": 1.6251732366885816,
"learning_rate": 1.2534533405432192e-05,
"loss": 1.4831,
"step": 25440
},
{
"epoch": 960.7547169811321,
"grad_norm": 1.35568804382613,
"learning_rate": 1.2490253188962184e-05,
"loss": 1.4637,
"step": 25460
},
{
"epoch": 961.5094339622641,
"grad_norm": 1.5192686857357145,
"learning_rate": 1.2446089031860666e-05,
"loss": 1.5039,
"step": 25480
},
{
"epoch": 962.2641509433962,
"grad_norm": 1.645823339942095,
"learning_rate": 1.2402041104927622e-05,
"loss": 1.4643,
"step": 25500
},
{
"epoch": 963.0188679245283,
"grad_norm": 1.5266645922223165,
"learning_rate": 1.2358109578513502e-05,
"loss": 1.4609,
"step": 25520
},
{
"epoch": 963.7735849056604,
"grad_norm": 2.012096934939658,
"learning_rate": 1.2314294622518637e-05,
"loss": 1.4707,
"step": 25540
},
{
"epoch": 964.5283018867924,
"grad_norm": 1.6019652732905527,
"learning_rate": 1.227059640639251e-05,
"loss": 1.4624,
"step": 25560
},
{
"epoch": 965.2830188679245,
"grad_norm": 1.5459039987734797,
"learning_rate": 1.2227015099133119e-05,
"loss": 1.4462,
"step": 25580
},
{
"epoch": 966.0377358490566,
"grad_norm": 1.4581354369376407,
"learning_rate": 1.2183550869286346e-05,
"loss": 1.4602,
"step": 25600
},
{
"epoch": 966.7924528301887,
"grad_norm": 1.5627139982974774,
"learning_rate": 1.2140203884945257e-05,
"loss": 1.4558,
"step": 25620
},
{
"epoch": 967.5471698113207,
"grad_norm": 1.6163383081813927,
"learning_rate": 1.2096974313749544e-05,
"loss": 1.442,
"step": 25640
},
{
"epoch": 968.3018867924528,
"grad_norm": 1.4708485221948149,
"learning_rate": 1.2053862322884756e-05,
"loss": 1.4449,
"step": 25660
},
{
"epoch": 969.0566037735849,
"grad_norm": 1.457232110275896,
"learning_rate": 1.2010868079081735e-05,
"loss": 1.4714,
"step": 25680
},
{
"epoch": 969.811320754717,
"grad_norm": 2.130030633684405,
"learning_rate": 1.1967991748615972e-05,
"loss": 1.4672,
"step": 25700
},
{
"epoch": 970.566037735849,
"grad_norm": 1.6585416945015101,
"learning_rate": 1.1925233497306898e-05,
"loss": 1.4582,
"step": 25720
},
{
"epoch": 971.3207547169811,
"grad_norm": 1.696646559562477,
"learning_rate": 1.1882593490517333e-05,
"loss": 1.4616,
"step": 25740
},
{
"epoch": 972.0754716981132,
"grad_norm": 1.8347228047889477,
"learning_rate": 1.1840071893152767e-05,
"loss": 1.4412,
"step": 25760
},
{
"epoch": 972.8301886792453,
"grad_norm": 1.5105738469091443,
"learning_rate": 1.1797668869660753e-05,
"loss": 1.4476,
"step": 25780
},
{
"epoch": 973.5849056603773,
"grad_norm": 1.6402649798470197,
"learning_rate": 1.1755384584030287e-05,
"loss": 1.4458,
"step": 25800
},
{
"epoch": 974.3396226415094,
"grad_norm": 1.4580507747280478,
"learning_rate": 1.171321919979116e-05,
"loss": 1.4414,
"step": 25820
},
{
"epoch": 975.0943396226415,
"grad_norm": 1.8999226743757298,
"learning_rate": 1.1671172880013328e-05,
"loss": 1.4501,
"step": 25840
},
{
"epoch": 975.8490566037735,
"grad_norm": 1.3767670402035495,
"learning_rate": 1.1629245787306247e-05,
"loss": 1.4422,
"step": 25860
},
{
"epoch": 976.6037735849056,
"grad_norm": 1.3303378991562944,
"learning_rate": 1.158743808381832e-05,
"loss": 1.437,
"step": 25880
},
{
"epoch": 977.3584905660377,
"grad_norm": 1.5011235086965091,
"learning_rate": 1.1545749931236199e-05,
"loss": 1.4225,
"step": 25900
},
{
"epoch": 978.1132075471698,
"grad_norm": 1.7853875208460404,
"learning_rate": 1.1504181490784197e-05,
"loss": 1.4405,
"step": 25920
},
{
"epoch": 978.8679245283018,
"grad_norm": 1.4852022947554018,
"learning_rate": 1.1462732923223643e-05,
"loss": 1.4197,
"step": 25940
},
{
"epoch": 979.622641509434,
"grad_norm": 1.492057926353613,
"learning_rate": 1.1421404388852275e-05,
"loss": 1.4516,
"step": 25960
},
{
"epoch": 980.377358490566,
"grad_norm": 1.8767944270145316,
"learning_rate": 1.1380196047503614e-05,
"loss": 1.4613,
"step": 25980
},
{
"epoch": 981.1320754716982,
"grad_norm": 1.5723288438267475,
"learning_rate": 1.1339108058546365e-05,
"loss": 1.4636,
"step": 26000
},
{
"epoch": 981.8867924528302,
"grad_norm": 1.4572390965943247,
"learning_rate": 1.1298140580883752e-05,
"loss": 1.4291,
"step": 26020
},
{
"epoch": 982.6415094339623,
"grad_norm": 2.0340602707703566,
"learning_rate": 1.1257293772952971e-05,
"loss": 1.4342,
"step": 26040
},
{
"epoch": 983.3962264150944,
"grad_norm": 1.7563358001308935,
"learning_rate": 1.1216567792724513e-05,
"loss": 1.44,
"step": 26060
},
{
"epoch": 984.1509433962265,
"grad_norm": 1.7195863256249895,
"learning_rate": 1.1175962797701585e-05,
"loss": 1.473,
"step": 26080
},
{
"epoch": 984.9056603773585,
"grad_norm": 1.5325109929141458,
"learning_rate": 1.1135478944919515e-05,
"loss": 1.4537,
"step": 26100
},
{
"epoch": 985.6603773584906,
"grad_norm": 1.4246338183010563,
"learning_rate": 1.1095116390945116e-05,
"loss": 1.4576,
"step": 26120
},
{
"epoch": 986.4150943396227,
"grad_norm": 1.5264334254918077,
"learning_rate": 1.1054875291876081e-05,
"loss": 1.4355,
"step": 26140
},
{
"epoch": 987.1698113207547,
"grad_norm": 1.7871427472844674,
"learning_rate": 1.101475580334039e-05,
"loss": 1.4285,
"step": 26160
},
{
"epoch": 987.9245283018868,
"grad_norm": 1.628111810825388,
"learning_rate": 1.0974758080495742e-05,
"loss": 1.432,
"step": 26180
},
{
"epoch": 988.6792452830189,
"grad_norm": 1.6079918141380485,
"learning_rate": 1.0934882278028875e-05,
"loss": 1.473,
"step": 26200
},
{
"epoch": 989.433962264151,
"grad_norm": 1.9227955059143975,
"learning_rate": 1.0895128550155048e-05,
"loss": 1.4319,
"step": 26220
},
{
"epoch": 990.188679245283,
"grad_norm": 1.4777834491856459,
"learning_rate": 1.0855497050617383e-05,
"loss": 1.4715,
"step": 26240
},
{
"epoch": 990.9433962264151,
"grad_norm": 1.752347342407413,
"learning_rate": 1.0815987932686322e-05,
"loss": 1.4483,
"step": 26260
},
{
"epoch": 991.6981132075472,
"grad_norm": 1.7965242738400287,
"learning_rate": 1.0776601349158992e-05,
"loss": 1.445,
"step": 26280
},
{
"epoch": 992.4528301886793,
"grad_norm": 1.6880482866877031,
"learning_rate": 1.0737337452358643e-05,
"loss": 1.4289,
"step": 26300
},
{
"epoch": 993.2075471698113,
"grad_norm": 1.3587051959850933,
"learning_rate": 1.0698196394134027e-05,
"loss": 1.4248,
"step": 26320
},
{
"epoch": 993.9622641509434,
"grad_norm": 1.6893835419836905,
"learning_rate": 1.0659178325858868e-05,
"loss": 1.4593,
"step": 26340
},
{
"epoch": 994.7169811320755,
"grad_norm": 1.6372424305822535,
"learning_rate": 1.0620283398431196e-05,
"loss": 1.4248,
"step": 26360
},
{
"epoch": 995.4716981132076,
"grad_norm": 1.628959331603337,
"learning_rate": 1.0581511762272856e-05,
"loss": 1.459,
"step": 26380
},
{
"epoch": 996.2264150943396,
"grad_norm": 1.9899303146490552,
"learning_rate": 1.0542863567328837e-05,
"loss": 1.4608,
"step": 26400
},
{
"epoch": 996.9811320754717,
"grad_norm": 1.6980987241375505,
"learning_rate": 1.0504338963066745e-05,
"loss": 1.4489,
"step": 26420
},
{
"epoch": 997.7358490566038,
"grad_norm": 1.791483449843248,
"learning_rate": 1.0465938098476226e-05,
"loss": 1.4647,
"step": 26440
},
{
"epoch": 998.4905660377359,
"grad_norm": 1.3823874629634854,
"learning_rate": 1.0427661122068363e-05,
"loss": 1.431,
"step": 26460
},
{
"epoch": 999.2452830188679,
"grad_norm": 1.7547951381187532,
"learning_rate": 1.0389508181875114e-05,
"loss": 1.4374,
"step": 26480
},
{
"epoch": 1000.0,
"grad_norm": 1.6329317283212297,
"learning_rate": 1.035147942544874e-05,
"loss": 1.4436,
"step": 26500
},
{
"epoch": 1000.7547169811321,
"grad_norm": 1.482848334089,
"learning_rate": 1.0313574999861255e-05,
"loss": 1.4263,
"step": 26520
},
{
"epoch": 1001.5094339622641,
"grad_norm": 1.4085297987389735,
"learning_rate": 1.027579505170381e-05,
"loss": 1.4423,
"step": 26540
},
{
"epoch": 1002.2641509433962,
"grad_norm": 1.586157768854042,
"learning_rate": 1.0238139727086178e-05,
"loss": 1.4289,
"step": 26560
},
{
"epoch": 1003.0188679245283,
"grad_norm": 1.4910507620311724,
"learning_rate": 1.020060917163614e-05,
"loss": 1.4555,
"step": 26580
},
{
"epoch": 1003.7735849056604,
"grad_norm": 1.7298473240434828,
"learning_rate": 1.0163203530498955e-05,
"loss": 1.4176,
"step": 26600
},
{
"epoch": 1004.5283018867924,
"grad_norm": 1.9395741512745615,
"learning_rate": 1.0125922948336813e-05,
"loss": 1.4297,
"step": 26620
},
{
"epoch": 1005.2830188679245,
"grad_norm": 1.3752095871887702,
"learning_rate": 1.0088767569328215e-05,
"loss": 1.4224,
"step": 26640
},
{
"epoch": 1006.0377358490566,
"grad_norm": 1.6566420053219757,
"learning_rate": 1.0051737537167479e-05,
"loss": 1.4416,
"step": 26660
},
{
"epoch": 1006.7924528301887,
"grad_norm": 1.8401842062612699,
"learning_rate": 1.001483299506413e-05,
"loss": 1.4406,
"step": 26680
},
{
"epoch": 1007.5471698113207,
"grad_norm": 1.5895021822365676,
"learning_rate": 9.978054085742407e-06,
"loss": 1.4104,
"step": 26700
},
{
"epoch": 1008.3018867924528,
"grad_norm": 1.5495688189805843,
"learning_rate": 9.941400951440674e-06,
"loss": 1.4446,
"step": 26720
},
{
"epoch": 1009.0566037735849,
"grad_norm": 1.6376917222270109,
"learning_rate": 9.904873733910852e-06,
"loss": 1.4023,
"step": 26740
},
{
"epoch": 1009.811320754717,
"grad_norm": 1.7729521919831477,
"learning_rate": 9.868472574417906e-06,
"loss": 1.4409,
"step": 26760
},
{
"epoch": 1010.566037735849,
"grad_norm": 1.5909106157325896,
"learning_rate": 9.832197613739278e-06,
"loss": 1.4284,
"step": 26780
},
{
"epoch": 1011.3207547169811,
"grad_norm": 1.5416992698357255,
"learning_rate": 9.79604899216437e-06,
"loss": 1.4165,
"step": 26800
},
{
"epoch": 1012.0754716981132,
"grad_norm": 1.7245150906399498,
"learning_rate": 9.760026849493962e-06,
"loss": 1.4281,
"step": 26820
},
{
"epoch": 1012.8301886792453,
"grad_norm": 1.8518007110272525,
"learning_rate": 9.7241313250397e-06,
"loss": 1.4223,
"step": 26840
},
{
"epoch": 1013.5849056603773,
"grad_norm": 1.593106128312966,
"learning_rate": 9.688362557623527e-06,
"loss": 1.4377,
"step": 26860
},
{
"epoch": 1014.3396226415094,
"grad_norm": 1.6557177655883284,
"learning_rate": 9.6527206855772e-06,
"loss": 1.4394,
"step": 26880
},
{
"epoch": 1015.0943396226415,
"grad_norm": 1.5950355314495743,
"learning_rate": 9.617205846741719e-06,
"loss": 1.4506,
"step": 26900
},
{
"epoch": 1015.8490566037735,
"grad_norm": 1.7685274450403552,
"learning_rate": 9.58181817846677e-06,
"loss": 1.4484,
"step": 26920
},
{
"epoch": 1016.6037735849056,
"grad_norm": 1.4639040403309866,
"learning_rate": 9.54655781761023e-06,
"loss": 1.4043,
"step": 26940
},
{
"epoch": 1017.3584905660377,
"grad_norm": 1.6074583945207908,
"learning_rate": 9.511424900537656e-06,
"loss": 1.4197,
"step": 26960
},
{
"epoch": 1018.1132075471698,
"grad_norm": 1.5459146912367183,
"learning_rate": 9.476419563121698e-06,
"loss": 1.4232,
"step": 26980
},
{
"epoch": 1018.8679245283018,
"grad_norm": 1.6166722954994783,
"learning_rate": 9.441541940741613e-06,
"loss": 1.4407,
"step": 27000
},
{
"epoch": 1019.622641509434,
"grad_norm": 1.6533674302686083,
"learning_rate": 9.406792168282739e-06,
"loss": 1.4393,
"step": 27020
},
{
"epoch": 1020.377358490566,
"grad_norm": 2.1409264555789123,
"learning_rate": 9.37217038013597e-06,
"loss": 1.4507,
"step": 27040
},
{
"epoch": 1021.1320754716982,
"grad_norm": 1.9876202106584275,
"learning_rate": 9.337676710197243e-06,
"loss": 1.4486,
"step": 27060
},
{
"epoch": 1021.8867924528302,
"grad_norm": 1.6321392819191982,
"learning_rate": 9.303311291866996e-06,
"loss": 1.4337,
"step": 27080
},
{
"epoch": 1022.6415094339623,
"grad_norm": 1.5614664744291826,
"learning_rate": 9.269074258049671e-06,
"loss": 1.4245,
"step": 27100
},
{
"epoch": 1023.3962264150944,
"grad_norm": 1.775529049395487,
"learning_rate": 9.234965741153195e-06,
"loss": 1.4284,
"step": 27120
},
{
"epoch": 1024.1509433962265,
"grad_norm": 1.4430739083306536,
"learning_rate": 9.200985873088487e-06,
"loss": 1.4235,
"step": 27140
},
{
"epoch": 1024.9056603773586,
"grad_norm": 2.0811882500763255,
"learning_rate": 9.167134785268918e-06,
"loss": 1.402,
"step": 27160
},
{
"epoch": 1025.6603773584907,
"grad_norm": 1.5403915703954525,
"learning_rate": 9.133412608609811e-06,
"loss": 1.4302,
"step": 27180
},
{
"epoch": 1026.4150943396226,
"grad_norm": 1.9685065156678565,
"learning_rate": 9.099819473527936e-06,
"loss": 1.3969,
"step": 27200
},
{
"epoch": 1027.1698113207547,
"grad_norm": 1.5336587010545035,
"learning_rate": 9.066355509941036e-06,
"loss": 1.428,
"step": 27220
},
{
"epoch": 1027.9245283018868,
"grad_norm": 1.9045363331404057,
"learning_rate": 9.033020847267277e-06,
"loss": 1.4521,
"step": 27240
},
{
"epoch": 1028.6792452830189,
"grad_norm": 1.7010720746106325,
"learning_rate": 8.999815614424768e-06,
"loss": 1.4408,
"step": 27260
},
{
"epoch": 1029.433962264151,
"grad_norm": 1.6652770284797922,
"learning_rate": 8.966739939831065e-06,
"loss": 1.4275,
"step": 27280
},
{
"epoch": 1030.188679245283,
"grad_norm": 1.438920885601344,
"learning_rate": 8.933793951402666e-06,
"loss": 1.4363,
"step": 27300
},
{
"epoch": 1030.9433962264152,
"grad_norm": 1.523374273868093,
"learning_rate": 8.900977776554543e-06,
"loss": 1.4178,
"step": 27320
},
{
"epoch": 1031.698113207547,
"grad_norm": 1.9388166404138083,
"learning_rate": 8.868291542199601e-06,
"loss": 1.4339,
"step": 27340
},
{
"epoch": 1032.4528301886792,
"grad_norm": 1.910046684059762,
"learning_rate": 8.835735374748235e-06,
"loss": 1.407,
"step": 27360
},
{
"epoch": 1033.2075471698113,
"grad_norm": 1.5548634820286755,
"learning_rate": 8.803309400107802e-06,
"loss": 1.4183,
"step": 27380
},
{
"epoch": 1033.9622641509434,
"grad_norm": 1.5932417218331991,
"learning_rate": 8.771013743682171e-06,
"loss": 1.4447,
"step": 27400
},
{
"epoch": 1034.7169811320755,
"grad_norm": 1.4796581852592556,
"learning_rate": 8.738848530371221e-06,
"loss": 1.3946,
"step": 27420
},
{
"epoch": 1035.4716981132076,
"grad_norm": 1.6106803868616077,
"learning_rate": 8.706813884570337e-06,
"loss": 1.4152,
"step": 27440
},
{
"epoch": 1036.2264150943397,
"grad_norm": 1.5383725584269896,
"learning_rate": 8.674909930169968e-06,
"loss": 1.4344,
"step": 27460
},
{
"epoch": 1036.9811320754718,
"grad_norm": 1.6971458233324348,
"learning_rate": 8.643136790555101e-06,
"loss": 1.42,
"step": 27480
},
{
"epoch": 1037.7358490566037,
"grad_norm": 1.7975384013574476,
"learning_rate": 8.61149458860486e-06,
"loss": 1.4456,
"step": 27500
},
{
"epoch": 1038.4905660377358,
"grad_norm": 1.5540181334521903,
"learning_rate": 8.579983446691931e-06,
"loss": 1.3976,
"step": 27520
},
{
"epoch": 1039.245283018868,
"grad_norm": 1.7107813027346386,
"learning_rate": 8.548603486682165e-06,
"loss": 1.4119,
"step": 27540
},
{
"epoch": 1040.0,
"grad_norm": 1.7225563012589893,
"learning_rate": 8.517354829934086e-06,
"loss": 1.4347,
"step": 27560
},
{
"epoch": 1040.754716981132,
"grad_norm": 1.6396983385388997,
"learning_rate": 8.486237597298396e-06,
"loss": 1.4076,
"step": 27580
},
{
"epoch": 1041.5094339622642,
"grad_norm": 1.59607993020723,
"learning_rate": 8.455251909117562e-06,
"loss": 1.391,
"step": 27600
},
{
"epoch": 1042.2641509433963,
"grad_norm": 1.6787714792885464,
"learning_rate": 8.424397885225284e-06,
"loss": 1.4319,
"step": 27620
},
{
"epoch": 1043.0188679245282,
"grad_norm": 1.514103336557697,
"learning_rate": 8.39367564494608e-06,
"loss": 1.4282,
"step": 27640
},
{
"epoch": 1043.7735849056603,
"grad_norm": 1.6827281624065857,
"learning_rate": 8.3630853070948e-06,
"loss": 1.4268,
"step": 27660
},
{
"epoch": 1044.5283018867924,
"grad_norm": 1.5242384493420091,
"learning_rate": 8.332626989976201e-06,
"loss": 1.394,
"step": 27680
},
{
"epoch": 1045.2830188679245,
"grad_norm": 1.5477899241579378,
"learning_rate": 8.302300811384443e-06,
"loss": 1.4188,
"step": 27700
},
{
"epoch": 1046.0377358490566,
"grad_norm": 1.7533265453937938,
"learning_rate": 8.272106888602644e-06,
"loss": 1.4147,
"step": 27720
},
{
"epoch": 1046.7924528301887,
"grad_norm": 1.7810905836721207,
"learning_rate": 8.242045338402464e-06,
"loss": 1.4249,
"step": 27740
},
{
"epoch": 1047.5471698113208,
"grad_norm": 1.6994451629715164,
"learning_rate": 8.212116277043624e-06,
"loss": 1.4087,
"step": 27760
},
{
"epoch": 1048.301886792453,
"grad_norm": 1.5273771258038336,
"learning_rate": 8.18231982027344e-06,
"loss": 1.4105,
"step": 27780
},
{
"epoch": 1049.0566037735848,
"grad_norm": 1.7986470388936215,
"learning_rate": 8.15265608332641e-06,
"loss": 1.417,
"step": 27800
},
{
"epoch": 1049.811320754717,
"grad_norm": 3.7362962798847605,
"learning_rate": 8.123125180923732e-06,
"loss": 1.4428,
"step": 27820
},
{
"epoch": 1050.566037735849,
"grad_norm": 1.4871345729412693,
"learning_rate": 8.093727227272918e-06,
"loss": 1.3913,
"step": 27840
},
{
"epoch": 1051.3207547169811,
"grad_norm": 1.6862935331038202,
"learning_rate": 8.064462336067288e-06,
"loss": 1.4099,
"step": 27860
},
{
"epoch": 1052.0754716981132,
"grad_norm": 1.5729155867984972,
"learning_rate": 8.03533062048555e-06,
"loss": 1.3896,
"step": 27880
},
{
"epoch": 1052.8301886792453,
"grad_norm": 1.7312033654611378,
"learning_rate": 8.006332193191406e-06,
"loss": 1.4183,
"step": 27900
},
{
"epoch": 1053.5849056603774,
"grad_norm": 1.737310060702965,
"learning_rate": 7.977467166333041e-06,
"loss": 1.4098,
"step": 27920
},
{
"epoch": 1054.3396226415093,
"grad_norm": 1.787345801838152,
"learning_rate": 7.948735651542762e-06,
"loss": 1.4472,
"step": 27940
},
{
"epoch": 1055.0943396226414,
"grad_norm": 1.6643759736424013,
"learning_rate": 7.920137759936503e-06,
"loss": 1.4248,
"step": 27960
},
{
"epoch": 1055.8490566037735,
"grad_norm": 1.665184448890738,
"learning_rate": 7.891673602113444e-06,
"loss": 1.4184,
"step": 27980
},
{
"epoch": 1056.6037735849056,
"grad_norm": 1.4651905410431068,
"learning_rate": 7.863343288155553e-06,
"loss": 1.4117,
"step": 28000
},
{
"epoch": 1057.3584905660377,
"grad_norm": 1.761583496091816,
"learning_rate": 7.835146927627195e-06,
"loss": 1.4173,
"step": 28020
},
{
"epoch": 1058.1132075471698,
"grad_norm": 1.4468036902445778,
"learning_rate": 7.807084629574648e-06,
"loss": 1.3899,
"step": 28040
},
{
"epoch": 1058.867924528302,
"grad_norm": 1.9317915574764288,
"learning_rate": 7.779156502525752e-06,
"loss": 1.4283,
"step": 28060
},
{
"epoch": 1059.622641509434,
"grad_norm": 1.6586645034969292,
"learning_rate": 7.751362654489442e-06,
"loss": 1.3729,
"step": 28080
},
{
"epoch": 1060.377358490566,
"grad_norm": 1.54736903517111,
"learning_rate": 7.72370319295533e-06,
"loss": 1.4323,
"step": 28100
},
{
"epoch": 1061.132075471698,
"grad_norm": 1.7410908156190221,
"learning_rate": 7.696178224893333e-06,
"loss": 1.4446,
"step": 28120
},
{
"epoch": 1061.8867924528302,
"grad_norm": 1.5846972848377703,
"learning_rate": 7.668787856753206e-06,
"loss": 1.4069,
"step": 28140
},
{
"epoch": 1062.6415094339623,
"grad_norm": 2.0032825052950005,
"learning_rate": 7.641532194464159e-06,
"loss": 1.4091,
"step": 28160
},
{
"epoch": 1063.3962264150944,
"grad_norm": 1.5526416600245057,
"learning_rate": 7.6144113434344445e-06,
"loss": 1.3988,
"step": 28180
},
{
"epoch": 1064.1509433962265,
"grad_norm": 1.6399869572854062,
"learning_rate": 7.587425408550953e-06,
"loss": 1.4317,
"step": 28200
},
{
"epoch": 1064.9056603773586,
"grad_norm": 2.218545819761043,
"learning_rate": 7.560574494178785e-06,
"loss": 1.4166,
"step": 28220
},
{
"epoch": 1065.6603773584907,
"grad_norm": 1.610893838079929,
"learning_rate": 7.5338587041608855e-06,
"loss": 1.4034,
"step": 28240
},
{
"epoch": 1066.4150943396226,
"grad_norm": 1.901849515787354,
"learning_rate": 7.507278141817603e-06,
"loss": 1.4082,
"step": 28260
},
{
"epoch": 1067.1698113207547,
"grad_norm": 1.9915752693535391,
"learning_rate": 7.4808329099463165e-06,
"loss": 1.4202,
"step": 28280
},
{
"epoch": 1067.9245283018868,
"grad_norm": 2.337231756702343,
"learning_rate": 7.454523110821034e-06,
"loss": 1.4033,
"step": 28300
},
{
"epoch": 1068.6792452830189,
"grad_norm": 1.4499700621594815,
"learning_rate": 7.428348846191982e-06,
"loss": 1.4106,
"step": 28320
},
{
"epoch": 1069.433962264151,
"grad_norm": 1.7981102056016145,
"learning_rate": 7.402310217285226e-06,
"loss": 1.4061,
"step": 28340
},
{
"epoch": 1070.188679245283,
"grad_norm": 1.7129433355903898,
"learning_rate": 7.376407324802275e-06,
"loss": 1.4019,
"step": 28360
},
{
"epoch": 1070.9433962264152,
"grad_norm": 1.5382026111028457,
"learning_rate": 7.350640268919691e-06,
"loss": 1.4197,
"step": 28380
},
{
"epoch": 1071.698113207547,
"grad_norm": 1.7225324354326523,
"learning_rate": 7.325009149288721e-06,
"loss": 1.4061,
"step": 28400
},
{
"epoch": 1072.4528301886792,
"grad_norm": 1.9701222408661871,
"learning_rate": 7.299514065034864e-06,
"loss": 1.399,
"step": 28420
},
{
"epoch": 1073.2075471698113,
"grad_norm": 2.560013262107365,
"learning_rate": 7.2741551147575365e-06,
"loss": 1.4011,
"step": 28440
},
{
"epoch": 1073.9622641509434,
"grad_norm": 1.7468598350718882,
"learning_rate": 7.248932396529666e-06,
"loss": 1.3906,
"step": 28460
},
{
"epoch": 1074.7169811320755,
"grad_norm": 1.5217037013529344,
"learning_rate": 7.223846007897321e-06,
"loss": 1.3824,
"step": 28480
},
{
"epoch": 1075.4716981132076,
"grad_norm": 1.9246360758156291,
"learning_rate": 7.198896045879323e-06,
"loss": 1.401,
"step": 28500
},
{
"epoch": 1076.2264150943397,
"grad_norm": 1.6887933139540061,
"learning_rate": 7.174082606966883e-06,
"loss": 1.4025,
"step": 28520
},
{
"epoch": 1076.9811320754718,
"grad_norm": 1.6294766788073725,
"learning_rate": 7.149405787123236e-06,
"loss": 1.3986,
"step": 28540
},
{
"epoch": 1077.7358490566037,
"grad_norm": 1.5618807274404587,
"learning_rate": 7.124865681783234e-06,
"loss": 1.4005,
"step": 28560
},
{
"epoch": 1078.4905660377358,
"grad_norm": 1.6678211596916697,
"learning_rate": 7.100462385853021e-06,
"loss": 1.4071,
"step": 28580
},
{
"epoch": 1079.245283018868,
"grad_norm": 1.9223978868928677,
"learning_rate": 7.07619599370964e-06,
"loss": 1.4135,
"step": 28600
},
{
"epoch": 1080.0,
"grad_norm": 1.6632265815235145,
"learning_rate": 7.052066599200659e-06,
"loss": 1.3882,
"step": 28620
},
{
"epoch": 1080.754716981132,
"grad_norm": 1.6022030717394165,
"learning_rate": 7.028074295643851e-06,
"loss": 1.3972,
"step": 28640
},
{
"epoch": 1081.5094339622642,
"grad_norm": 1.4991746539828543,
"learning_rate": 7.004219175826785e-06,
"loss": 1.382,
"step": 28660
},
{
"epoch": 1082.2641509433963,
"grad_norm": 1.6838520383575963,
"learning_rate": 6.9805013320064956e-06,
"loss": 1.4146,
"step": 28680
},
{
"epoch": 1083.0188679245282,
"grad_norm": 1.8350778781710608,
"learning_rate": 6.9569208559091e-06,
"loss": 1.4138,
"step": 28700
},
{
"epoch": 1083.7735849056603,
"grad_norm": 1.5249940477637465,
"learning_rate": 6.9334778387294835e-06,
"loss": 1.403,
"step": 28720
},
{
"epoch": 1084.5283018867924,
"grad_norm": 1.4543697117371763,
"learning_rate": 6.910172371130925e-06,
"loss": 1.4115,
"step": 28740
},
{
"epoch": 1085.2830188679245,
"grad_norm": 1.8878771205671918,
"learning_rate": 6.8870045432447285e-06,
"loss": 1.3783,
"step": 28760
},
{
"epoch": 1086.0377358490566,
"grad_norm": 1.6650946199070653,
"learning_rate": 6.8639744446698945e-06,
"loss": 1.4065,
"step": 28780
},
{
"epoch": 1086.7924528301887,
"grad_norm": 1.9063799347508024,
"learning_rate": 6.84108216447278e-06,
"loss": 1.3896,
"step": 28800
},
{
"epoch": 1087.5471698113208,
"grad_norm": 1.7745103676453513,
"learning_rate": 6.818327791186747e-06,
"loss": 1.4068,
"step": 28820
},
{
"epoch": 1088.301886792453,
"grad_norm": 1.6208415487366228,
"learning_rate": 6.795711412811805e-06,
"loss": 1.3827,
"step": 28840
},
{
"epoch": 1089.0566037735848,
"grad_norm": 1.4568669649899233,
"learning_rate": 6.773233116814289e-06,
"loss": 1.3918,
"step": 28860
},
{
"epoch": 1089.811320754717,
"grad_norm": 1.861515176168054,
"learning_rate": 6.750892990126514e-06,
"loss": 1.3901,
"step": 28880
},
{
"epoch": 1090.566037735849,
"grad_norm": 1.7283660067362911,
"learning_rate": 6.728691119146446e-06,
"loss": 1.4157,
"step": 28900
},
{
"epoch": 1091.3207547169811,
"grad_norm": 1.679598340558233,
"learning_rate": 6.706627589737369e-06,
"loss": 1.3938,
"step": 28920
},
{
"epoch": 1092.0754716981132,
"grad_norm": 1.5691857730547452,
"learning_rate": 6.6847024872275215e-06,
"loss": 1.4176,
"step": 28940
},
{
"epoch": 1092.8301886792453,
"grad_norm": 1.5537251935711112,
"learning_rate": 6.66291589640982e-06,
"loss": 1.3967,
"step": 28960
},
{
"epoch": 1093.5849056603774,
"grad_norm": 1.8881979410475171,
"learning_rate": 6.641267901541472e-06,
"loss": 1.418,
"step": 28980
},
{
"epoch": 1094.3396226415093,
"grad_norm": 1.613241830342873,
"learning_rate": 6.619758586343714e-06,
"loss": 1.3901,
"step": 29000
},
{
"epoch": 1095.0943396226414,
"grad_norm": 1.5946632443607534,
"learning_rate": 6.598388034001433e-06,
"loss": 1.3634,
"step": 29020
},
{
"epoch": 1095.8490566037735,
"grad_norm": 1.8962995366661943,
"learning_rate": 6.577156327162867e-06,
"loss": 1.392,
"step": 29040
},
{
"epoch": 1096.6037735849056,
"grad_norm": 1.629681556076702,
"learning_rate": 6.55606354793928e-06,
"loss": 1.4078,
"step": 29060
},
{
"epoch": 1097.3584905660377,
"grad_norm": 1.6952819453222434,
"learning_rate": 6.535109777904677e-06,
"loss": 1.4017,
"step": 29080
},
{
"epoch": 1098.1132075471698,
"grad_norm": 2.5813616029432267,
"learning_rate": 6.514295098095432e-06,
"loss": 1.3986,
"step": 29100
},
{
"epoch": 1098.867924528302,
"grad_norm": 1.5192224713062508,
"learning_rate": 6.493619589010008e-06,
"loss": 1.3995,
"step": 29120
},
{
"epoch": 1099.622641509434,
"grad_norm": 1.5723195273483208,
"learning_rate": 6.4730833306086425e-06,
"loss": 1.3804,
"step": 29140
},
{
"epoch": 1100.377358490566,
"grad_norm": 1.6397338659549336,
"learning_rate": 6.452686402313042e-06,
"loss": 1.386,
"step": 29160
},
{
"epoch": 1101.132075471698,
"grad_norm": 1.5791257173150743,
"learning_rate": 6.43242888300607e-06,
"loss": 1.3847,
"step": 29180
},
{
"epoch": 1101.8867924528302,
"grad_norm": 1.5559887095506482,
"learning_rate": 6.412310851031428e-06,
"loss": 1.393,
"step": 29200
},
{
"epoch": 1102.6415094339623,
"grad_norm": 1.6663466000474887,
"learning_rate": 6.392332384193371e-06,
"loss": 1.3896,
"step": 29220
},
{
"epoch": 1103.3962264150944,
"grad_norm": 1.9956674599720932,
"learning_rate": 6.372493559756415e-06,
"loss": 1.378,
"step": 29240
},
{
"epoch": 1104.1509433962265,
"grad_norm": 1.787105155690102,
"learning_rate": 6.352794454445007e-06,
"loss": 1.3879,
"step": 29260
},
{
"epoch": 1104.9056603773586,
"grad_norm": 1.561482889041861,
"learning_rate": 6.333235144443262e-06,
"loss": 1.402,
"step": 29280
},
{
"epoch": 1105.6603773584907,
"grad_norm": 1.8736117457797759,
"learning_rate": 6.31381570539463e-06,
"loss": 1.3879,
"step": 29300
},
{
"epoch": 1106.4150943396226,
"grad_norm": 1.4692581652153442,
"learning_rate": 6.294536212401641e-06,
"loss": 1.3914,
"step": 29320
},
{
"epoch": 1107.1698113207547,
"grad_norm": 1.4908544439114542,
"learning_rate": 6.275396740025605e-06,
"loss": 1.4028,
"step": 29340
},
{
"epoch": 1107.9245283018868,
"grad_norm": 1.488666750171173,
"learning_rate": 6.256397362286306e-06,
"loss": 1.3799,
"step": 29360
},
{
"epoch": 1108.6792452830189,
"grad_norm": 1.517431762228245,
"learning_rate": 6.237538152661723e-06,
"loss": 1.3765,
"step": 29380
},
{
"epoch": 1109.433962264151,
"grad_norm": 2.2381909450089803,
"learning_rate": 6.218819184087767e-06,
"loss": 1.4079,
"step": 29400
},
{
"epoch": 1110.188679245283,
"grad_norm": 1.7858504458920295,
"learning_rate": 6.200240528957965e-06,
"loss": 1.3554,
"step": 29420
},
{
"epoch": 1110.9433962264152,
"grad_norm": 1.7350524849254911,
"learning_rate": 6.181802259123219e-06,
"loss": 1.3967,
"step": 29440
},
{
"epoch": 1111.698113207547,
"grad_norm": 1.702971597589678,
"learning_rate": 6.163504445891484e-06,
"loss": 1.3671,
"step": 29460
},
{
"epoch": 1112.4528301886792,
"grad_norm": 1.7712134929173684,
"learning_rate": 6.145347160027524e-06,
"loss": 1.3829,
"step": 29480
},
{
"epoch": 1113.2075471698113,
"grad_norm": 1.4073555395505457,
"learning_rate": 6.1273304717526284e-06,
"loss": 1.4108,
"step": 29500
},
{
"epoch": 1113.9622641509434,
"grad_norm": 1.6527537265171588,
"learning_rate": 6.10945445074435e-06,
"loss": 1.4068,
"step": 29520
},
{
"epoch": 1114.7169811320755,
"grad_norm": 1.6866987009556351,
"learning_rate": 6.091719166136209e-06,
"loss": 1.3793,
"step": 29540
},
{
"epoch": 1115.4716981132076,
"grad_norm": 1.7073159356044332,
"learning_rate": 6.074124686517448e-06,
"loss": 1.3826,
"step": 29560
},
{
"epoch": 1116.2264150943397,
"grad_norm": 1.8230785653176147,
"learning_rate": 6.056671079932781e-06,
"loss": 1.4153,
"step": 29580
},
{
"epoch": 1116.9811320754718,
"grad_norm": 1.6857598634250675,
"learning_rate": 6.0393584138820814e-06,
"loss": 1.3887,
"step": 29600
},
{
"epoch": 1117.7358490566037,
"grad_norm": 1.5568678463492682,
"learning_rate": 6.022186755320181e-06,
"loss": 1.3901,
"step": 29620
},
{
"epoch": 1118.4905660377358,
"grad_norm": 1.8571545157336313,
"learning_rate": 6.0051561706565545e-06,
"loss": 1.4013,
"step": 29640
},
{
"epoch": 1119.245283018868,
"grad_norm": 2.413996452708785,
"learning_rate": 5.988266725755103e-06,
"loss": 1.3613,
"step": 29660
},
{
"epoch": 1120.0,
"grad_norm": 1.687989711452293,
"learning_rate": 5.9715184859338745e-06,
"loss": 1.4031,
"step": 29680
},
{
"epoch": 1120.754716981132,
"grad_norm": 1.7351377623187432,
"learning_rate": 5.9549115159648416e-06,
"loss": 1.3949,
"step": 29700
},
{
"epoch": 1121.5094339622642,
"grad_norm": 1.6317556572084198,
"learning_rate": 5.9384458800736175e-06,
"loss": 1.3769,
"step": 29720
},
{
"epoch": 1122.2641509433963,
"grad_norm": 1.5268456230996348,
"learning_rate": 5.922121641939213e-06,
"loss": 1.3816,
"step": 29740
},
{
"epoch": 1123.0188679245282,
"grad_norm": 1.712558259908726,
"learning_rate": 5.905938864693819e-06,
"loss": 1.3798,
"step": 29760
},
{
"epoch": 1123.7735849056603,
"grad_norm": 2.381990895927805,
"learning_rate": 5.889897610922528e-06,
"loss": 1.3607,
"step": 29780
},
{
"epoch": 1124.5283018867924,
"grad_norm": 1.880675021280631,
"learning_rate": 5.873997942663118e-06,
"loss": 1.3886,
"step": 29800
},
{
"epoch": 1125.2830188679245,
"grad_norm": 1.7160648060328811,
"learning_rate": 5.858239921405781e-06,
"loss": 1.4049,
"step": 29820
},
{
"epoch": 1126.0377358490566,
"grad_norm": 2.0253315053102656,
"learning_rate": 5.842623608092928e-06,
"loss": 1.393,
"step": 29840
},
{
"epoch": 1126.7924528301887,
"grad_norm": 1.7870648066969081,
"learning_rate": 5.8271490631189085e-06,
"loss": 1.3654,
"step": 29860
},
{
"epoch": 1127.5471698113208,
"grad_norm": 2.0620223544323393,
"learning_rate": 5.811816346329819e-06,
"loss": 1.3776,
"step": 29880
},
{
"epoch": 1128.301886792453,
"grad_norm": 1.892915815700359,
"learning_rate": 5.796625517023236e-06,
"loss": 1.377,
"step": 29900
},
{
"epoch": 1129.0566037735848,
"grad_norm": 1.6134589423454577,
"learning_rate": 5.781576633948012e-06,
"loss": 1.3958,
"step": 29920
},
{
"epoch": 1129.811320754717,
"grad_norm": 1.8880173462636753,
"learning_rate": 5.766669755304027e-06,
"loss": 1.3707,
"step": 29940
},
{
"epoch": 1130.566037735849,
"grad_norm": 1.899687605902805,
"learning_rate": 5.75190493874199e-06,
"loss": 1.3648,
"step": 29960
},
{
"epoch": 1131.3207547169811,
"grad_norm": 2.0484945041635143,
"learning_rate": 5.737282241363189e-06,
"loss": 1.3689,
"step": 29980
},
{
"epoch": 1132.0754716981132,
"grad_norm": 1.676321084433534,
"learning_rate": 5.72280171971928e-06,
"loss": 1.4161,
"step": 30000
},
{
"epoch": 1132.8301886792453,
"grad_norm": 1.7718376566707665,
"learning_rate": 5.708463429812077e-06,
"loss": 1.3427,
"step": 30020
},
{
"epoch": 1133.5849056603774,
"grad_norm": 1.9751240318001524,
"learning_rate": 5.694267427093333e-06,
"loss": 1.3674,
"step": 30040
},
{
"epoch": 1134.3396226415093,
"grad_norm": 2.3259508666245754,
"learning_rate": 5.680213766464505e-06,
"loss": 1.3815,
"step": 30060
},
{
"epoch": 1135.0943396226414,
"grad_norm": 1.7499567507331477,
"learning_rate": 5.6663025022765734e-06,
"loss": 1.3898,
"step": 30080
},
{
"epoch": 1135.8490566037735,
"grad_norm": 1.7700410283415744,
"learning_rate": 5.652533688329809e-06,
"loss": 1.3801,
"step": 30100
},
{
"epoch": 1136.6037735849056,
"grad_norm": 2.4028113618062843,
"learning_rate": 5.638907377873572e-06,
"loss": 1.4025,
"step": 30120
},
{
"epoch": 1137.3584905660377,
"grad_norm": 1.7119758682153656,
"learning_rate": 5.625423623606109e-06,
"loss": 1.3933,
"step": 30140
},
{
"epoch": 1138.1132075471698,
"grad_norm": 1.6434771622606816,
"learning_rate": 5.612082477674341e-06,
"loss": 1.3723,
"step": 30160
},
{
"epoch": 1138.867924528302,
"grad_norm": 1.6260264586830788,
"learning_rate": 5.598883991673678e-06,
"loss": 1.4009,
"step": 30180
},
{
"epoch": 1139.622641509434,
"grad_norm": 3.7353731641696166,
"learning_rate": 5.58582821664779e-06,
"loss": 1.3621,
"step": 30200
},
{
"epoch": 1140.377358490566,
"grad_norm": 1.567966811159742,
"learning_rate": 5.572915203088453e-06,
"loss": 1.3679,
"step": 30220
},
{
"epoch": 1141.132075471698,
"grad_norm": 1.7536276327044822,
"learning_rate": 5.560145000935302e-06,
"loss": 1.3899,
"step": 30240
},
{
"epoch": 1141.8867924528302,
"grad_norm": 1.6246811713037859,
"learning_rate": 5.547517659575683e-06,
"loss": 1.3754,
"step": 30260
},
{
"epoch": 1142.6415094339623,
"grad_norm": 1.5935354859602073,
"learning_rate": 5.535033227844446e-06,
"loss": 1.3783,
"step": 30280
},
{
"epoch": 1143.3962264150944,
"grad_norm": 1.5837499746804282,
"learning_rate": 5.522691754023736e-06,
"loss": 1.3664,
"step": 30300
},
{
"epoch": 1144.1509433962265,
"grad_norm": 1.5561292753074283,
"learning_rate": 5.5104932858428386e-06,
"loss": 1.3934,
"step": 30320
},
{
"epoch": 1144.9056603773586,
"grad_norm": 1.5051486824601223,
"learning_rate": 5.498437870477979e-06,
"loss": 1.3569,
"step": 30340
},
{
"epoch": 1145.6603773584907,
"grad_norm": 1.5724530317281036,
"learning_rate": 5.48652555455214e-06,
"loss": 1.384,
"step": 30360
},
{
"epoch": 1146.4150943396226,
"grad_norm": 1.7499070562961392,
"learning_rate": 5.474756384134872e-06,
"loss": 1.3661,
"step": 30380
},
{
"epoch": 1147.1698113207547,
"grad_norm": 1.682172454392295,
"learning_rate": 5.46313040474215e-06,
"loss": 1.3668,
"step": 30400
},
{
"epoch": 1147.9245283018868,
"grad_norm": 1.6400451026874565,
"learning_rate": 5.4516476613361565e-06,
"loss": 1.3605,
"step": 30420
},
{
"epoch": 1148.6792452830189,
"grad_norm": 1.952384343786011,
"learning_rate": 5.440308198325125e-06,
"loss": 1.388,
"step": 30440
},
{
"epoch": 1149.433962264151,
"grad_norm": 2.052044266530817,
"learning_rate": 5.4291120595631796e-06,
"loss": 1.3699,
"step": 30460
},
{
"epoch": 1150.188679245283,
"grad_norm": 1.868354121694302,
"learning_rate": 5.4180592883501325e-06,
"loss": 1.4099,
"step": 30480
},
{
"epoch": 1150.9433962264152,
"grad_norm": 1.650613691746538,
"learning_rate": 5.40714992743136e-06,
"loss": 1.3788,
"step": 30500
},
{
"epoch": 1151.698113207547,
"grad_norm": 1.48074352750423,
"learning_rate": 5.3963840189976066e-06,
"loss": 1.3587,
"step": 30520
},
{
"epoch": 1152.4528301886792,
"grad_norm": 1.914894176993607,
"learning_rate": 5.385761604684826e-06,
"loss": 1.3622,
"step": 30540
},
{
"epoch": 1153.2075471698113,
"grad_norm": 1.736154691724524,
"learning_rate": 5.375282725574028e-06,
"loss": 1.3451,
"step": 30560
},
{
"epoch": 1153.9622641509434,
"grad_norm": 1.7175923216328703,
"learning_rate": 5.364947422191111e-06,
"loss": 1.385,
"step": 30580
},
{
"epoch": 1154.7169811320755,
"grad_norm": 1.8230347081955776,
"learning_rate": 5.3547557345067295e-06,
"loss": 1.3797,
"step": 30600
},
{
"epoch": 1155.4716981132076,
"grad_norm": 1.4897355923840079,
"learning_rate": 5.344707701936093e-06,
"loss": 1.3812,
"step": 30620
},
{
"epoch": 1156.2264150943397,
"grad_norm": 1.7795720356372806,
"learning_rate": 5.334803363338855e-06,
"loss": 1.3508,
"step": 30640
},
{
"epoch": 1156.9811320754718,
"grad_norm": 2.461699887903762,
"learning_rate": 5.325042757018952e-06,
"loss": 1.3904,
"step": 30660
},
{
"epoch": 1157.7358490566037,
"grad_norm": 1.7684288169829847,
"learning_rate": 5.315425920724443e-06,
"loss": 1.362,
"step": 30680
},
{
"epoch": 1158.4905660377358,
"grad_norm": 1.9326301215722892,
"learning_rate": 5.3059528916473754e-06,
"loss": 1.3764,
"step": 30700
},
{
"epoch": 1159.245283018868,
"grad_norm": 1.7547993585411785,
"learning_rate": 5.296623706423637e-06,
"loss": 1.3624,
"step": 30720
},
{
"epoch": 1160.0,
"grad_norm": 2.2647989876543897,
"learning_rate": 5.2874384011328235e-06,
"loss": 1.3804,
"step": 30740
},
{
"epoch": 1160.754716981132,
"grad_norm": 1.897412746168143,
"learning_rate": 5.278397011298081e-06,
"loss": 1.3882,
"step": 30760
},
{
"epoch": 1161.5094339622642,
"grad_norm": 1.5286725772277845,
"learning_rate": 5.269499571885985e-06,
"loss": 1.381,
"step": 30780
},
{
"epoch": 1162.2641509433963,
"grad_norm": 1.6848292059915215,
"learning_rate": 5.260746117306394e-06,
"loss": 1.361,
"step": 30800
},
{
"epoch": 1163.0188679245282,
"grad_norm": 1.4576957104143031,
"learning_rate": 5.25213668141232e-06,
"loss": 1.3773,
"step": 30820
},
{
"epoch": 1163.7735849056603,
"grad_norm": 1.6655981961615232,
"learning_rate": 5.243671297499806e-06,
"loss": 1.3403,
"step": 30840
},
{
"epoch": 1164.5283018867924,
"grad_norm": 6.016182274377044,
"learning_rate": 5.235349998307786e-06,
"loss": 1.3994,
"step": 30860
},
{
"epoch": 1165.2830188679245,
"grad_norm": 1.7659588641922745,
"learning_rate": 5.227172816017956e-06,
"loss": 1.3507,
"step": 30880
},
{
"epoch": 1166.0377358490566,
"grad_norm": 2.0037468459561962,
"learning_rate": 5.219139782254665e-06,
"loss": 1.3703,
"step": 30900
},
{
"epoch": 1166.7924528301887,
"grad_norm": 2.15024644673786,
"learning_rate": 5.211250928084786e-06,
"loss": 1.3473,
"step": 30920
},
{
"epoch": 1167.5471698113208,
"grad_norm": 2.5013172573697466,
"learning_rate": 5.203506284017583e-06,
"loss": 1.3814,
"step": 30940
},
{
"epoch": 1168.301886792453,
"grad_norm": 1.5816513523971083,
"learning_rate": 5.195905880004609e-06,
"loss": 1.3668,
"step": 30960
},
{
"epoch": 1169.0566037735848,
"grad_norm": 1.512996764161357,
"learning_rate": 5.188449745439581e-06,
"loss": 1.3581,
"step": 30980
},
{
"epoch": 1169.811320754717,
"grad_norm": 1.536263448282502,
"learning_rate": 5.181137909158276e-06,
"loss": 1.3277,
"step": 31000
},
{
"epoch": 1170.566037735849,
"grad_norm": 1.6755767673451942,
"learning_rate": 5.1739703994384105e-06,
"loss": 1.3923,
"step": 31020
},
{
"epoch": 1171.3207547169811,
"grad_norm": 1.7976047665675525,
"learning_rate": 5.166947243999532e-06,
"loss": 1.3671,
"step": 31040
},
{
"epoch": 1172.0754716981132,
"grad_norm": 1.5604607884699584,
"learning_rate": 5.1600684700029165e-06,
"loss": 1.3613,
"step": 31060
},
{
"epoch": 1172.8301886792453,
"grad_norm": 1.5133379987405895,
"learning_rate": 5.1533341040514576e-06,
"loss": 1.3696,
"step": 31080
},
{
"epoch": 1173.5849056603774,
"grad_norm": 1.8992042289915705,
"learning_rate": 5.146744172189571e-06,
"loss": 1.3464,
"step": 31100
},
{
"epoch": 1174.3396226415093,
"grad_norm": 1.8549085471784923,
"learning_rate": 5.140298699903085e-06,
"loss": 1.3478,
"step": 31120
},
{
"epoch": 1175.0943396226414,
"grad_norm": 1.6926406458235648,
"learning_rate": 5.133997712119152e-06,
"loss": 1.3526,
"step": 31140
},
{
"epoch": 1175.8490566037735,
"grad_norm": 1.9538672940442745,
"learning_rate": 5.127841233206144e-06,
"loss": 1.3686,
"step": 31160
},
{
"epoch": 1176.6037735849056,
"grad_norm": 1.850655603319905,
"learning_rate": 5.1218292869735606e-06,
"loss": 1.3906,
"step": 31180
},
{
"epoch": 1177.3584905660377,
"grad_norm": 1.7127479688627378,
"learning_rate": 5.115961896671935e-06,
"loss": 1.3703,
"step": 31200
},
{
"epoch": 1178.1132075471698,
"grad_norm": 1.556614260381109,
"learning_rate": 5.110239084992749e-06,
"loss": 1.3532,
"step": 31220
},
{
"epoch": 1178.867924528302,
"grad_norm": 2.001126139034296,
"learning_rate": 5.1046608740683435e-06,
"loss": 1.3929,
"step": 31240
},
{
"epoch": 1179.622641509434,
"grad_norm": 2.127747604876417,
"learning_rate": 5.09922728547183e-06,
"loss": 1.3657,
"step": 31260
},
{
"epoch": 1180.377358490566,
"grad_norm": 1.8364327564945553,
"learning_rate": 5.093938340217008e-06,
"loss": 1.3426,
"step": 31280
},
{
"epoch": 1181.132075471698,
"grad_norm": 1.9292610849222944,
"learning_rate": 5.088794058758295e-06,
"loss": 1.368,
"step": 31300
},
{
"epoch": 1181.8867924528302,
"grad_norm": 2.0114024877177505,
"learning_rate": 5.083794460990618e-06,
"loss": 1.39,
"step": 31320
},
{
"epoch": 1182.6415094339623,
"grad_norm": 1.5735214803674382,
"learning_rate": 5.078939566249372e-06,
"loss": 1.3632,
"step": 31340
},
{
"epoch": 1183.3962264150944,
"grad_norm": 1.8428642902345547,
"learning_rate": 5.074229393310324e-06,
"loss": 1.3757,
"step": 31360
},
{
"epoch": 1184.1509433962265,
"grad_norm": 1.697897177712772,
"learning_rate": 5.06966396038955e-06,
"loss": 1.354,
"step": 31380
},
{
"epoch": 1184.9056603773586,
"grad_norm": 1.807086734591878,
"learning_rate": 5.065243285143349e-06,
"loss": 1.3757,
"step": 31400
},
{
"epoch": 1185.6603773584907,
"grad_norm": 1.743179055242126,
"learning_rate": 5.0609673846681936e-06,
"loss": 1.3819,
"step": 31420
},
{
"epoch": 1186.4150943396226,
"grad_norm": 1.8735264452983302,
"learning_rate": 5.056836275500658e-06,
"loss": 1.3579,
"step": 31440
},
{
"epoch": 1187.1698113207547,
"grad_norm": 1.5862970321945447,
"learning_rate": 5.052849973617347e-06,
"loss": 1.3445,
"step": 31460
},
{
"epoch": 1187.9245283018868,
"grad_norm": 1.692517823714256,
"learning_rate": 5.049008494434844e-06,
"loss": 1.3694,
"step": 31480
},
{
"epoch": 1188.6792452830189,
"grad_norm": 1.6212477472255649,
"learning_rate": 5.045311852809638e-06,
"loss": 1.3929,
"step": 31500
},
{
"epoch": 1189.433962264151,
"grad_norm": 1.52306373987035,
"learning_rate": 5.041760063038081e-06,
"loss": 1.3579,
"step": 31520
},
{
"epoch": 1190.188679245283,
"grad_norm": 1.7830544839573095,
"learning_rate": 5.038353138856331e-06,
"loss": 1.348,
"step": 31540
},
{
"epoch": 1190.9433962264152,
"grad_norm": 1.7203728735463606,
"learning_rate": 5.035091093440292e-06,
"loss": 1.37,
"step": 31560
},
{
"epoch": 1191.698113207547,
"grad_norm": 1.9298089743408848,
"learning_rate": 5.0319739394055525e-06,
"loss": 1.3627,
"step": 31580
},
{
"epoch": 1192.4528301886792,
"grad_norm": 1.9488940650586162,
"learning_rate": 5.029001688807368e-06,
"loss": 1.3537,
"step": 31600
},
{
"epoch": 1193.2075471698113,
"grad_norm": 2.0609178957358667,
"learning_rate": 5.026174353140584e-06,
"loss": 1.3521,
"step": 31620
},
{
"epoch": 1193.9622641509434,
"grad_norm": 1.710559073613117,
"learning_rate": 5.0234919433396115e-06,
"loss": 1.3768,
"step": 31640
},
{
"epoch": 1194.7169811320755,
"grad_norm": 1.5082465689013147,
"learning_rate": 5.02095446977837e-06,
"loss": 1.3893,
"step": 31660
},
{
"epoch": 1195.4716981132076,
"grad_norm": 2.4105153089947526,
"learning_rate": 5.018561942270259e-06,
"loss": 1.3532,
"step": 31680
},
{
"epoch": 1196.2264150943397,
"grad_norm": 1.5148689250273666,
"learning_rate": 5.016314370068112e-06,
"loss": 1.3429,
"step": 31700
},
{
"epoch": 1196.9811320754718,
"grad_norm": 1.7305388649029056,
"learning_rate": 5.014211761864169e-06,
"loss": 1.3559,
"step": 31720
},
{
"epoch": 1197.7358490566037,
"grad_norm": 3.661229816284544,
"learning_rate": 5.012254125790028e-06,
"loss": 1.37,
"step": 31740
},
{
"epoch": 1198.4905660377358,
"grad_norm": 1.9493540072501139,
"learning_rate": 5.010441469416635e-06,
"loss": 1.3808,
"step": 31760
},
{
"epoch": 1199.245283018868,
"grad_norm": 1.6896444872077154,
"learning_rate": 5.008773799754234e-06,
"loss": 1.3631,
"step": 31780
},
{
"epoch": 1200.0,
"grad_norm": 1.884439542410789,
"learning_rate": 5.007251123252356e-06,
"loss": 1.3638,
"step": 31800
},
{
"epoch": 1200.754716981132,
"grad_norm": 1.98761366434412,
"learning_rate": 5.005873445799779e-06,
"loss": 1.35,
"step": 31820
},
{
"epoch": 1201.5094339622642,
"grad_norm": 1.8352779283455332,
"learning_rate": 5.004640772724519e-06,
"loss": 1.3369,
"step": 31840
},
{
"epoch": 1202.2641509433963,
"grad_norm": 1.712020294826759,
"learning_rate": 5.003553108793802e-06,
"loss": 1.3511,
"step": 31860
},
{
"epoch": 1203.0188679245282,
"grad_norm": 1.6743616923339946,
"learning_rate": 5.002610458214054e-06,
"loss": 1.3259,
"step": 31880
},
{
"epoch": 1203.7735849056603,
"grad_norm": 1.8393462234102256,
"learning_rate": 5.001812824630864e-06,
"loss": 1.3646,
"step": 31900
},
{
"epoch": 1204.5283018867924,
"grad_norm": 1.7631293985305598,
"learning_rate": 5.001160211128995e-06,
"loss": 1.3384,
"step": 31920
},
{
"epoch": 1205.2830188679245,
"grad_norm": 1.6536424071703635,
"learning_rate": 5.0006526202323554e-06,
"loss": 1.3605,
"step": 31940
},
{
"epoch": 1206.0377358490566,
"grad_norm": 1.5387931434470863,
"learning_rate": 5.000290053904e-06,
"loss": 1.3892,
"step": 31960
},
{
"epoch": 1206.7924528301887,
"grad_norm": 1.948827205429464,
"learning_rate": 5.0000725135461104e-06,
"loss": 1.3541,
"step": 31980
},
{
"epoch": 1207.5471698113208,
"grad_norm": 1.595259284912312,
"learning_rate": 5e-06,
"loss": 1.3478,
"step": 32000
},
{
"epoch": 1231.5094339622642,
"grad_norm": 1.9859843003442184,
"learning_rate": 3.1745653570607866e-05,
"loss": 1.4161,
"step": 32020
},
{
"epoch": 1232.2641509433963,
"grad_norm": 2.399291840461689,
"learning_rate": 3.170382168563073e-05,
"loss": 1.4292,
"step": 32040
},
{
"epoch": 1233.0188679245282,
"grad_norm": 2.2207067356830597,
"learning_rate": 3.166200444421923e-05,
"loss": 1.4248,
"step": 32060
},
{
"epoch": 1233.7735849056603,
"grad_norm": 1.7538222025729717,
"learning_rate": 3.1620201903092876e-05,
"loss": 1.4549,
"step": 32080
},
{
"epoch": 1234.5283018867924,
"grad_norm": 1.7296401624898199,
"learning_rate": 3.157841411895116e-05,
"loss": 1.4544,
"step": 32100
},
{
"epoch": 1235.2830188679245,
"grad_norm": 1.6657757057870137,
"learning_rate": 3.153664114847362e-05,
"loss": 1.4734,
"step": 32120
},
{
"epoch": 1236.0377358490566,
"grad_norm": 1.7240277610891936,
"learning_rate": 3.149488304831967e-05,
"loss": 1.451,
"step": 32140
},
{
"epoch": 1236.7924528301887,
"grad_norm": 1.6885797820089437,
"learning_rate": 3.145313987512854e-05,
"loss": 1.4366,
"step": 32160
},
{
"epoch": 1237.5471698113208,
"grad_norm": 1.4963776794399322,
"learning_rate": 3.141141168551928e-05,
"loss": 1.4652,
"step": 32180
},
{
"epoch": 1238.301886792453,
"grad_norm": 1.4609983523815115,
"learning_rate": 3.1369698536090554e-05,
"loss": 1.4648,
"step": 32200
},
{
"epoch": 1239.0566037735848,
"grad_norm": 1.9029419687473905,
"learning_rate": 3.132800048342065e-05,
"loss": 1.4664,
"step": 32220
},
{
"epoch": 1239.811320754717,
"grad_norm": 1.7932066669770592,
"learning_rate": 3.128631758406736e-05,
"loss": 1.4585,
"step": 32240
},
{
"epoch": 1240.566037735849,
"grad_norm": 1.6253328044167166,
"learning_rate": 3.1244649894567945e-05,
"loss": 1.4492,
"step": 32260
},
{
"epoch": 1241.3207547169811,
"grad_norm": 1.507775786714413,
"learning_rate": 3.120299747143905e-05,
"loss": 1.4934,
"step": 32280
},
{
"epoch": 1242.0754716981132,
"grad_norm": 1.7801850010709415,
"learning_rate": 3.1161360371176566e-05,
"loss": 1.4486,
"step": 32300
},
{
"epoch": 1242.8301886792453,
"grad_norm": 1.6106209389195743,
"learning_rate": 3.111973865025564e-05,
"loss": 1.4468,
"step": 32320
},
{
"epoch": 1243.5849056603774,
"grad_norm": 1.8027839874458171,
"learning_rate": 3.107813236513054e-05,
"loss": 1.477,
"step": 32340
},
{
"epoch": 1244.3396226415093,
"grad_norm": 1.883131295400716,
"learning_rate": 3.1036541572234594e-05,
"loss": 1.4555,
"step": 32360
},
{
"epoch": 1245.0943396226414,
"grad_norm": 1.591157945654413,
"learning_rate": 3.099496632798014e-05,
"loss": 1.4708,
"step": 32380
},
{
"epoch": 1245.8490566037735,
"grad_norm": 1.6694778342522842,
"learning_rate": 3.095340668875842e-05,
"loss": 1.4639,
"step": 32400
},
{
"epoch": 1246.6037735849056,
"grad_norm": 1.6841562206011031,
"learning_rate": 3.091186271093947e-05,
"loss": 1.5116,
"step": 32420
},
{
"epoch": 1247.3584905660377,
"grad_norm": 2.3369379900409943,
"learning_rate": 3.0870334450872156e-05,
"loss": 1.4754,
"step": 32440
},
{
"epoch": 1248.1132075471698,
"grad_norm": 1.720534890104194,
"learning_rate": 3.0828821964883944e-05,
"loss": 1.4941,
"step": 32460
},
{
"epoch": 1248.867924528302,
"grad_norm": 1.7549772489735695,
"learning_rate": 3.0787325309280966e-05,
"loss": 1.4799,
"step": 32480
},
{
"epoch": 1249.622641509434,
"grad_norm": 1.8182084066575632,
"learning_rate": 3.074584454034788e-05,
"loss": 1.4715,
"step": 32500
},
{
"epoch": 1250.377358490566,
"grad_norm": 1.5605662428278646,
"learning_rate": 3.0704379714347736e-05,
"loss": 1.4783,
"step": 32520
},
{
"epoch": 1251.132075471698,
"grad_norm": 1.569853865239183,
"learning_rate": 3.066293088752203e-05,
"loss": 1.4638,
"step": 32540
},
{
"epoch": 1251.8867924528302,
"grad_norm": 1.704579985134968,
"learning_rate": 3.062149811609051e-05,
"loss": 1.492,
"step": 32560
},
{
"epoch": 1252.6415094339623,
"grad_norm": 1.7794864973864697,
"learning_rate": 3.058008145625118e-05,
"loss": 1.4705,
"step": 32580
},
{
"epoch": 1253.3962264150944,
"grad_norm": 1.8222736973302784,
"learning_rate": 3.053868096418017e-05,
"loss": 1.4893,
"step": 32600
},
{
"epoch": 1254.1509433962265,
"grad_norm": 1.5789611538013155,
"learning_rate": 3.0497296696031678e-05,
"loss": 1.4665,
"step": 32620
},
{
"epoch": 1254.9056603773586,
"grad_norm": 1.657785958532039,
"learning_rate": 3.0455928707937924e-05,
"loss": 1.491,
"step": 32640
},
{
"epoch": 1255.6603773584907,
"grad_norm": 1.3254023383839637,
"learning_rate": 3.0414577056008995e-05,
"loss": 1.4823,
"step": 32660
},
{
"epoch": 1256.4150943396226,
"grad_norm": 1.5602437010509045,
"learning_rate": 3.0373241796332887e-05,
"loss": 1.4704,
"step": 32680
},
{
"epoch": 1257.1698113207547,
"grad_norm": 2.029474586920305,
"learning_rate": 3.0331922984975316e-05,
"loss": 1.4765,
"step": 32700
},
{
"epoch": 1257.9245283018868,
"grad_norm": 1.8553896972815955,
"learning_rate": 3.0290620677979688e-05,
"loss": 1.5096,
"step": 32720
},
{
"epoch": 1258.6792452830189,
"grad_norm": 1.4989759048156965,
"learning_rate": 3.0249334931367046e-05,
"loss": 1.5122,
"step": 32740
},
{
"epoch": 1259.433962264151,
"grad_norm": 1.6763111597334728,
"learning_rate": 3.0208065801135942e-05,
"loss": 1.4787,
"step": 32760
},
{
"epoch": 1260.188679245283,
"grad_norm": 1.469251133196546,
"learning_rate": 3.016681334326244e-05,
"loss": 1.4854,
"step": 32780
},
{
"epoch": 1260.9433962264152,
"grad_norm": 1.8501919367454238,
"learning_rate": 3.0125577613699926e-05,
"loss": 1.4929,
"step": 32800
},
{
"epoch": 1261.698113207547,
"grad_norm": 1.5790438820656068,
"learning_rate": 3.0084358668379155e-05,
"loss": 1.5055,
"step": 32820
},
{
"epoch": 1262.4528301886792,
"grad_norm": 1.5952733717783116,
"learning_rate": 3.004315656320806e-05,
"loss": 1.4907,
"step": 32840
},
{
"epoch": 1263.2075471698113,
"grad_norm": 1.6182930520428953,
"learning_rate": 3.0001971354071772e-05,
"loss": 1.4909,
"step": 32860
},
{
"epoch": 1263.9622641509434,
"grad_norm": 2.2886630268428663,
"learning_rate": 2.996080309683252e-05,
"loss": 1.4992,
"step": 32880
},
{
"epoch": 1264.7169811320755,
"grad_norm": 1.3793974197803296,
"learning_rate": 2.9919651847329483e-05,
"loss": 1.5061,
"step": 32900
},
{
"epoch": 1265.4716981132076,
"grad_norm": 1.39182833894468,
"learning_rate": 2.9878517661378828e-05,
"loss": 1.4591,
"step": 32920
},
{
"epoch": 1266.2264150943397,
"grad_norm": 1.6904437738848905,
"learning_rate": 2.9837400594773515e-05,
"loss": 1.5118,
"step": 32940
},
{
"epoch": 1266.9811320754718,
"grad_norm": 1.6447748796714898,
"learning_rate": 2.979630070328336e-05,
"loss": 1.4881,
"step": 32960
},
{
"epoch": 1267.7358490566037,
"grad_norm": 1.3512114550316146,
"learning_rate": 2.975521804265484e-05,
"loss": 1.4719,
"step": 32980
},
{
"epoch": 1268.4905660377358,
"grad_norm": 1.6317892668767962,
"learning_rate": 2.971415266861105e-05,
"loss": 1.5057,
"step": 33000
},
{
"epoch": 1269.245283018868,
"grad_norm": 1.6596450520295813,
"learning_rate": 2.967310463685166e-05,
"loss": 1.481,
"step": 33020
},
{
"epoch": 1270.0,
"grad_norm": 1.6548890468178368,
"learning_rate": 2.9632074003052808e-05,
"loss": 1.5136,
"step": 33040
},
{
"epoch": 1270.754716981132,
"grad_norm": 1.5074284840254797,
"learning_rate": 2.9591060822867042e-05,
"loss": 1.4971,
"step": 33060
},
{
"epoch": 1271.5094339622642,
"grad_norm": 1.5075074748556512,
"learning_rate": 2.9550065151923238e-05,
"loss": 1.4647,
"step": 33080
},
{
"epoch": 1272.2641509433963,
"grad_norm": 1.7144775848474376,
"learning_rate": 2.9509087045826505e-05,
"loss": 1.5145,
"step": 33100
},
{
"epoch": 1273.0188679245282,
"grad_norm": 1.5547570517351919,
"learning_rate": 2.946812656015815e-05,
"loss": 1.4806,
"step": 33120
},
{
"epoch": 1273.7735849056603,
"grad_norm": 1.91096744807036,
"learning_rate": 2.942718375047554e-05,
"loss": 1.4953,
"step": 33140
},
{
"epoch": 1274.5283018867924,
"grad_norm": 1.690681911094072,
"learning_rate": 2.9386258672312143e-05,
"loss": 1.5043,
"step": 33160
},
{
"epoch": 1275.2830188679245,
"grad_norm": 1.6094990513366627,
"learning_rate": 2.93453513811773e-05,
"loss": 1.4656,
"step": 33180
},
{
"epoch": 1276.0377358490566,
"grad_norm": 1.7166760221415358,
"learning_rate": 2.9304461932556262e-05,
"loss": 1.5049,
"step": 33200
},
{
"epoch": 1276.7924528301887,
"grad_norm": 1.4781436729661779,
"learning_rate": 2.9263590381910078e-05,
"loss": 1.4901,
"step": 33220
},
{
"epoch": 1277.5471698113208,
"grad_norm": 1.6055713664381628,
"learning_rate": 2.9222736784675506e-05,
"loss": 1.4744,
"step": 33240
},
{
"epoch": 1278.301886792453,
"grad_norm": 1.6185246350349134,
"learning_rate": 2.9181901196264983e-05,
"loss": 1.4809,
"step": 33260
},
{
"epoch": 1279.0566037735848,
"grad_norm": 1.876852753612874,
"learning_rate": 2.9141083672066472e-05,
"loss": 1.4737,
"step": 33280
},
{
"epoch": 1279.811320754717,
"grad_norm": 1.646333221814719,
"learning_rate": 2.910028426744349e-05,
"loss": 1.4807,
"step": 33300
},
{
"epoch": 1280.566037735849,
"grad_norm": 1.4950158846180641,
"learning_rate": 2.9059503037734925e-05,
"loss": 1.4871,
"step": 33320
},
{
"epoch": 1281.3207547169811,
"grad_norm": 2.5440304246025702,
"learning_rate": 2.9018740038255044e-05,
"loss": 1.4869,
"step": 33340
},
{
"epoch": 1282.0754716981132,
"grad_norm": 1.5221803613837093,
"learning_rate": 2.897799532429339e-05,
"loss": 1.4756,
"step": 33360
},
{
"epoch": 1282.8301886792453,
"grad_norm": 1.459833552438949,
"learning_rate": 2.8937268951114686e-05,
"loss": 1.4782,
"step": 33380
},
{
"epoch": 1283.5849056603774,
"grad_norm": 1.5193291412259906,
"learning_rate": 2.8896560973958796e-05,
"loss": 1.4925,
"step": 33400
},
{
"epoch": 1284.3396226415093,
"grad_norm": 1.457579254538571,
"learning_rate": 2.88558714480406e-05,
"loss": 1.4865,
"step": 33420
},
{
"epoch": 1285.0943396226414,
"grad_norm": 2.116390864572185,
"learning_rate": 2.8815200428549985e-05,
"loss": 1.4823,
"step": 33440
},
{
"epoch": 1285.8490566037735,
"grad_norm": 2.333973476529065,
"learning_rate": 2.8774547970651747e-05,
"loss": 1.4701,
"step": 33460
},
{
"epoch": 1286.6037735849056,
"grad_norm": 1.4347402180741313,
"learning_rate": 2.8733914129485457e-05,
"loss": 1.4964,
"step": 33480
},
{
"epoch": 1287.3584905660377,
"grad_norm": 1.5219049837257324,
"learning_rate": 2.8693298960165473e-05,
"loss": 1.4845,
"step": 33500
},
{
"epoch": 1288.1132075471698,
"grad_norm": 1.7295744561903763,
"learning_rate": 2.8652702517780815e-05,
"loss": 1.4729,
"step": 33520
},
{
"epoch": 1288.867924528302,
"grad_norm": 1.3491913340767474,
"learning_rate": 2.8612124857395097e-05,
"loss": 1.4734,
"step": 33540
},
{
"epoch": 1289.622641509434,
"grad_norm": 1.612399971127458,
"learning_rate": 2.8571566034046486e-05,
"loss": 1.4717,
"step": 33560
},
{
"epoch": 1290.377358490566,
"grad_norm": 1.523340229132746,
"learning_rate": 2.8531026102747552e-05,
"loss": 1.4784,
"step": 33580
},
{
"epoch": 1291.132075471698,
"grad_norm": 1.391650177787444,
"learning_rate": 2.849050511848529e-05,
"loss": 1.4968,
"step": 33600
},
{
"epoch": 1291.8867924528302,
"grad_norm": 1.631972432390494,
"learning_rate": 2.845000313622095e-05,
"loss": 1.4783,
"step": 33620
},
{
"epoch": 1292.6415094339623,
"grad_norm": 1.4676382942374402,
"learning_rate": 2.840952021089003e-05,
"loss": 1.4724,
"step": 33640
},
{
"epoch": 1293.3962264150944,
"grad_norm": 1.5025191965428788,
"learning_rate": 2.83690563974022e-05,
"loss": 1.4958,
"step": 33660
},
{
"epoch": 1294.1509433962265,
"grad_norm": 1.6379644083109945,
"learning_rate": 2.832861175064119e-05,
"loss": 1.4834,
"step": 33680
},
{
"epoch": 1294.9056603773586,
"grad_norm": 1.7312099049664693,
"learning_rate": 2.8288186325464705e-05,
"loss": 1.4941,
"step": 33700
},
{
"epoch": 1295.6603773584907,
"grad_norm": 1.5113721107585405,
"learning_rate": 2.8247780176704408e-05,
"loss": 1.4863,
"step": 33720
},
{
"epoch": 1296.4150943396226,
"grad_norm": 1.4187238404455875,
"learning_rate": 2.8207393359165837e-05,
"loss": 1.4635,
"step": 33740
},
{
"epoch": 1297.1698113207547,
"grad_norm": 1.5036198246572734,
"learning_rate": 2.8167025927628266e-05,
"loss": 1.4663,
"step": 33760
},
{
"epoch": 1297.9245283018868,
"grad_norm": 1.656299435435026,
"learning_rate": 2.8126677936844698e-05,
"loss": 1.4809,
"step": 33780
},
{
"epoch": 1298.6792452830189,
"grad_norm": 1.7227294745544,
"learning_rate": 2.808634944154176e-05,
"loss": 1.4518,
"step": 33800
},
{
"epoch": 1299.433962264151,
"grad_norm": 1.554440422068932,
"learning_rate": 2.8046040496419622e-05,
"loss": 1.4858,
"step": 33820
},
{
"epoch": 1300.188679245283,
"grad_norm": 1.5684395687858594,
"learning_rate": 2.8005751156151996e-05,
"loss": 1.4939,
"step": 33840
},
{
"epoch": 1300.9433962264152,
"grad_norm": 1.4791453327586883,
"learning_rate": 2.7965481475385922e-05,
"loss": 1.4981,
"step": 33860
},
{
"epoch": 1301.698113207547,
"grad_norm": 1.8682361890592045,
"learning_rate": 2.792523150874184e-05,
"loss": 1.485,
"step": 33880
},
{
"epoch": 1302.4528301886792,
"grad_norm": 1.4376784070576631,
"learning_rate": 2.7885001310813394e-05,
"loss": 1.4771,
"step": 33900
},
{
"epoch": 1303.2075471698113,
"grad_norm": 1.4919487782728726,
"learning_rate": 2.7844790936167448e-05,
"loss": 1.4818,
"step": 33920
},
{
"epoch": 1303.9622641509434,
"grad_norm": 1.5926644935407461,
"learning_rate": 2.7804600439344004e-05,
"loss": 1.481,
"step": 33940
},
{
"epoch": 1304.7169811320755,
"grad_norm": 2.129672326977145,
"learning_rate": 2.776442987485605e-05,
"loss": 1.4809,
"step": 33960
},
{
"epoch": 1305.4716981132076,
"grad_norm": 1.4661184798946012,
"learning_rate": 2.7724279297189564e-05,
"loss": 1.4734,
"step": 33980
},
{
"epoch": 1306.2264150943397,
"grad_norm": 1.6422416038082728,
"learning_rate": 2.7684148760803404e-05,
"loss": 1.4706,
"step": 34000
},
{
"epoch": 1306.9811320754718,
"grad_norm": 1.6788541325557527,
"learning_rate": 2.7644038320129247e-05,
"loss": 1.4734,
"step": 34020
},
{
"epoch": 1307.7358490566037,
"grad_norm": 1.5820996412366164,
"learning_rate": 2.7603948029571546e-05,
"loss": 1.4731,
"step": 34040
},
{
"epoch": 1308.4905660377358,
"grad_norm": 1.8093817496261688,
"learning_rate": 2.756387794350737e-05,
"loss": 1.4876,
"step": 34060
},
{
"epoch": 1309.245283018868,
"grad_norm": 1.4611414622430816,
"learning_rate": 2.7523828116286425e-05,
"loss": 1.4958,
"step": 34080
},
{
"epoch": 1310.0,
"grad_norm": 1.4982681857066789,
"learning_rate": 2.7483798602230905e-05,
"loss": 1.4713,
"step": 34100
},
{
"epoch": 1310.754716981132,
"grad_norm": 1.7049190400136933,
"learning_rate": 2.744378945563547e-05,
"loss": 1.4698,
"step": 34120
},
{
"epoch": 1311.5094339622642,
"grad_norm": 1.465072325468145,
"learning_rate": 2.7403800730767165e-05,
"loss": 1.4814,
"step": 34140
},
{
"epoch": 1312.2641509433963,
"grad_norm": 1.6806290813940998,
"learning_rate": 2.7363832481865326e-05,
"loss": 1.4623,
"step": 34160
},
{
"epoch": 1313.0188679245282,
"grad_norm": 1.422949129304357,
"learning_rate": 2.7323884763141494e-05,
"loss": 1.4798,
"step": 34180
},
{
"epoch": 1313.7735849056603,
"grad_norm": 1.5386955048633302,
"learning_rate": 2.728395762877941e-05,
"loss": 1.4588,
"step": 34200
},
{
"epoch": 1314.5283018867924,
"grad_norm": 1.5472770555424338,
"learning_rate": 2.7244051132934836e-05,
"loss": 1.451,
"step": 34220
},
{
"epoch": 1315.2830188679245,
"grad_norm": 1.435168914934391,
"learning_rate": 2.72041653297356e-05,
"loss": 1.4943,
"step": 34240
},
{
"epoch": 1316.0377358490566,
"grad_norm": 1.4183350034608622,
"learning_rate": 2.716430027328143e-05,
"loss": 1.4519,
"step": 34260
},
{
"epoch": 1316.7924528301887,
"grad_norm": 1.7134876611489063,
"learning_rate": 2.7124456017643914e-05,
"loss": 1.4658,
"step": 34280
},
{
"epoch": 1317.5471698113208,
"grad_norm": 1.4042660927164932,
"learning_rate": 2.7084632616866437e-05,
"loss": 1.4665,
"step": 34300
},
{
"epoch": 1318.301886792453,
"grad_norm": 1.7236176772036846,
"learning_rate": 2.7044830124964073e-05,
"loss": 1.4598,
"step": 34320
},
{
"epoch": 1319.0566037735848,
"grad_norm": 1.7345912564178498,
"learning_rate": 2.7005048595923597e-05,
"loss": 1.4941,
"step": 34340
},
{
"epoch": 1319.811320754717,
"grad_norm": 1.6553359599381614,
"learning_rate": 2.696528808370328e-05,
"loss": 1.448,
"step": 34360
},
{
"epoch": 1320.566037735849,
"grad_norm": 1.654924545197036,
"learning_rate": 2.6925548642232916e-05,
"loss": 1.453,
"step": 34380
},
{
"epoch": 1321.3207547169811,
"grad_norm": 1.966241914838029,
"learning_rate": 2.6885830325413732e-05,
"loss": 1.4791,
"step": 34400
},
{
"epoch": 1322.0754716981132,
"grad_norm": 1.613098173730771,
"learning_rate": 2.6846133187118266e-05,
"loss": 1.4456,
"step": 34420
},
{
"epoch": 1322.8301886792453,
"grad_norm": 1.694164161340185,
"learning_rate": 2.6806457281190392e-05,
"loss": 1.4697,
"step": 34440
},
{
"epoch": 1323.5849056603774,
"grad_norm": 1.7709910517494127,
"learning_rate": 2.6766802661445123e-05,
"loss": 1.4767,
"step": 34460
},
{
"epoch": 1324.3396226415093,
"grad_norm": 2.1757270130771547,
"learning_rate": 2.672716938166863e-05,
"loss": 1.5023,
"step": 34480
},
{
"epoch": 1325.0943396226414,
"grad_norm": 1.618966012864335,
"learning_rate": 2.66875574956181e-05,
"loss": 1.4459,
"step": 34500
},
{
"epoch": 1325.8490566037735,
"grad_norm": 1.6395370020860782,
"learning_rate": 2.6647967057021783e-05,
"loss": 1.4716,
"step": 34520
},
{
"epoch": 1326.6037735849056,
"grad_norm": 1.458865429611614,
"learning_rate": 2.6608398119578777e-05,
"loss": 1.4509,
"step": 34540
},
{
"epoch": 1327.3584905660377,
"grad_norm": 1.8785096977087146,
"learning_rate": 2.656885073695903e-05,
"loss": 1.4563,
"step": 34560
},
{
"epoch": 1328.1132075471698,
"grad_norm": 1.9390316222323336,
"learning_rate": 2.652932496280323e-05,
"loss": 1.4851,
"step": 34580
},
{
"epoch": 1328.867924528302,
"grad_norm": 5.310289949887802,
"learning_rate": 2.6489820850722802e-05,
"loss": 1.4768,
"step": 34600
},
{
"epoch": 1329.622641509434,
"grad_norm": 1.4684731158219795,
"learning_rate": 2.6450338454299786e-05,
"loss": 1.4516,
"step": 34620
},
{
"epoch": 1330.377358490566,
"grad_norm": 1.639577731583303,
"learning_rate": 2.641087782708672e-05,
"loss": 1.4654,
"step": 34640
},
{
"epoch": 1331.132075471698,
"grad_norm": 1.6849901015256106,
"learning_rate": 2.6371439022606665e-05,
"loss": 1.4615,
"step": 34660
},
{
"epoch": 1331.8867924528302,
"grad_norm": 1.619952725687253,
"learning_rate": 2.6332022094353024e-05,
"loss": 1.4461,
"step": 34680
},
{
"epoch": 1332.6415094339623,
"grad_norm": 1.5608967063706551,
"learning_rate": 2.6292627095789594e-05,
"loss": 1.4523,
"step": 34700
},
{
"epoch": 1333.3962264150944,
"grad_norm": 1.7568408459896505,
"learning_rate": 2.625325408035041e-05,
"loss": 1.4758,
"step": 34720
},
{
"epoch": 1334.1509433962265,
"grad_norm": 1.5186845485994895,
"learning_rate": 2.6213903101439668e-05,
"loss": 1.4527,
"step": 34740
},
{
"epoch": 1334.9056603773586,
"grad_norm": 1.9016010055715276,
"learning_rate": 2.6174574212431673e-05,
"loss": 1.4708,
"step": 34760
},
{
"epoch": 1335.6603773584907,
"grad_norm": 1.3914584691450766,
"learning_rate": 2.6135267466670776e-05,
"loss": 1.4519,
"step": 34780
},
{
"epoch": 1336.4150943396226,
"grad_norm": 1.7920706183325235,
"learning_rate": 2.6095982917471312e-05,
"loss": 1.4551,
"step": 34800
},
{
"epoch": 1337.1698113207547,
"grad_norm": 1.7415199040517522,
"learning_rate": 2.6056720618117508e-05,
"loss": 1.4618,
"step": 34820
},
{
"epoch": 1337.9245283018868,
"grad_norm": 2.0387577968023423,
"learning_rate": 2.6017480621863382e-05,
"loss": 1.4336,
"step": 34840
},
{
"epoch": 1338.6792452830189,
"grad_norm": 1.7452335041516622,
"learning_rate": 2.5978262981932716e-05,
"loss": 1.4845,
"step": 34860
},
{
"epoch": 1339.433962264151,
"grad_norm": 1.8221491527113842,
"learning_rate": 2.5939067751518968e-05,
"loss": 1.4509,
"step": 34880
},
{
"epoch": 1340.188679245283,
"grad_norm": 1.573534969706598,
"learning_rate": 2.58998949837852e-05,
"loss": 1.4597,
"step": 34900
},
{
"epoch": 1340.9433962264152,
"grad_norm": 1.7418894779202971,
"learning_rate": 2.5860744731864037e-05,
"loss": 1.4509,
"step": 34920
},
{
"epoch": 1341.698113207547,
"grad_norm": 2.3533748801857612,
"learning_rate": 2.5821617048857514e-05,
"loss": 1.4707,
"step": 34940
},
{
"epoch": 1342.4528301886792,
"grad_norm": 1.6384303594662744,
"learning_rate": 2.5782511987837087e-05,
"loss": 1.4483,
"step": 34960
},
{
"epoch": 1343.2075471698113,
"grad_norm": 1.7437935503570192,
"learning_rate": 2.5743429601843493e-05,
"loss": 1.4708,
"step": 34980
},
{
"epoch": 1343.9622641509434,
"grad_norm": 1.6299173329516294,
"learning_rate": 2.5704369943886763e-05,
"loss": 1.4487,
"step": 35000
},
{
"epoch": 1344.7169811320755,
"grad_norm": 1.5340708358576824,
"learning_rate": 2.5665333066946082e-05,
"loss": 1.4659,
"step": 35020
},
{
"epoch": 1345.4716981132076,
"grad_norm": 1.70280338168885,
"learning_rate": 2.5626319023969715e-05,
"loss": 1.4547,
"step": 35040
},
{
"epoch": 1346.2264150943397,
"grad_norm": 1.6585665666032239,
"learning_rate": 2.558732786787497e-05,
"loss": 1.4514,
"step": 35060
},
{
"epoch": 1346.9811320754718,
"grad_norm": 1.562613257380082,
"learning_rate": 2.5548359651548126e-05,
"loss": 1.4661,
"step": 35080
},
{
"epoch": 1347.7358490566037,
"grad_norm": 1.7392138600300024,
"learning_rate": 2.550941442784431e-05,
"loss": 1.4546,
"step": 35100
},
{
"epoch": 1348.4905660377358,
"grad_norm": 1.9111375288571992,
"learning_rate": 2.5470492249587522e-05,
"loss": 1.4478,
"step": 35120
},
{
"epoch": 1349.245283018868,
"grad_norm": 1.4950805686503206,
"learning_rate": 2.5431593169570446e-05,
"loss": 1.4535,
"step": 35140
},
{
"epoch": 1350.0,
"grad_norm": 2.553809298230812,
"learning_rate": 2.539271724055444e-05,
"loss": 1.464,
"step": 35160
},
{
"epoch": 1350.754716981132,
"grad_norm": 1.562798272416066,
"learning_rate": 2.5353864515269525e-05,
"loss": 1.4665,
"step": 35180
},
{
"epoch": 1351.5094339622642,
"grad_norm": 1.5956415565820565,
"learning_rate": 2.531503504641416e-05,
"loss": 1.4174,
"step": 35200
},
{
"epoch": 1352.2641509433963,
"grad_norm": 1.745867042261029,
"learning_rate": 2.5276228886655333e-05,
"loss": 1.4738,
"step": 35220
},
{
"epoch": 1353.0188679245282,
"grad_norm": 1.8454370598772634,
"learning_rate": 2.5237446088628384e-05,
"loss": 1.4407,
"step": 35240
},
{
"epoch": 1353.7735849056603,
"grad_norm": 1.70704191729437,
"learning_rate": 2.5198686704936945e-05,
"loss": 1.4617,
"step": 35260
},
{
"epoch": 1354.5283018867924,
"grad_norm": 1.410719238952515,
"learning_rate": 2.5159950788152942e-05,
"loss": 1.4397,
"step": 35280
},
{
"epoch": 1355.2830188679245,
"grad_norm": 1.811804083528806,
"learning_rate": 2.512123839081642e-05,
"loss": 1.443,
"step": 35300
},
{
"epoch": 1356.0377358490566,
"grad_norm": 1.659319824434148,
"learning_rate": 2.508254956543557e-05,
"loss": 1.4577,
"step": 35320
},
{
"epoch": 1356.7924528301887,
"grad_norm": 1.5084615900612242,
"learning_rate": 2.504388436448657e-05,
"loss": 1.4702,
"step": 35340
},
{
"epoch": 1357.5471698113208,
"grad_norm": 1.6272545133599885,
"learning_rate": 2.500524284041357e-05,
"loss": 1.4397,
"step": 35360
},
{
"epoch": 1358.301886792453,
"grad_norm": 1.470645864952112,
"learning_rate": 2.4966625045628615e-05,
"loss": 1.4435,
"step": 35380
},
{
"epoch": 1359.0566037735848,
"grad_norm": 1.455775463587072,
"learning_rate": 2.4928031032511544e-05,
"loss": 1.4554,
"step": 35400
},
{
"epoch": 1359.811320754717,
"grad_norm": 1.6787988136879601,
"learning_rate": 2.4889460853409974e-05,
"loss": 1.4692,
"step": 35420
},
{
"epoch": 1360.566037735849,
"grad_norm": 1.5640507822516196,
"learning_rate": 2.485091456063916e-05,
"loss": 1.4528,
"step": 35440
},
{
"epoch": 1361.3207547169811,
"grad_norm": 1.396621608357886,
"learning_rate": 2.4812392206481945e-05,
"loss": 1.4371,
"step": 35460
},
{
"epoch": 1362.0754716981132,
"grad_norm": 1.8537494645554213,
"learning_rate": 2.477389384318876e-05,
"loss": 1.4395,
"step": 35480
},
{
"epoch": 1362.8301886792453,
"grad_norm": 1.512928732642698,
"learning_rate": 2.4735419522977467e-05,
"loss": 1.4914,
"step": 35500
},
{
"epoch": 1363.5849056603774,
"grad_norm": 1.6340922613193214,
"learning_rate": 2.46969692980333e-05,
"loss": 1.4654,
"step": 35520
},
{
"epoch": 1364.3396226415093,
"grad_norm": 1.5378015561259157,
"learning_rate": 2.465854322050881e-05,
"loss": 1.4246,
"step": 35540
},
{
"epoch": 1365.0943396226414,
"grad_norm": 1.8471949838761705,
"learning_rate": 2.462014134252384e-05,
"loss": 1.4386,
"step": 35560
},
{
"epoch": 1365.8490566037735,
"grad_norm": 2.139477793232749,
"learning_rate": 2.4581763716165345e-05,
"loss": 1.4314,
"step": 35580
},
{
"epoch": 1366.6037735849056,
"grad_norm": 1.5366713623147805,
"learning_rate": 2.454341039348746e-05,
"loss": 1.4514,
"step": 35600
},
{
"epoch": 1367.3584905660377,
"grad_norm": 2.6632963736452018,
"learning_rate": 2.4505081426511286e-05,
"loss": 1.4244,
"step": 35620
},
{
"epoch": 1368.1132075471698,
"grad_norm": 1.7507517183403924,
"learning_rate": 2.4466776867224914e-05,
"loss": 1.4401,
"step": 35640
},
{
"epoch": 1368.867924528302,
"grad_norm": 1.7263277038654796,
"learning_rate": 2.4428496767583355e-05,
"loss": 1.4569,
"step": 35660
},
{
"epoch": 1369.622641509434,
"grad_norm": 1.664393561735168,
"learning_rate": 2.4390241179508404e-05,
"loss": 1.4387,
"step": 35680
},
{
"epoch": 1370.377358490566,
"grad_norm": 1.723479394345894,
"learning_rate": 2.435201015488865e-05,
"loss": 1.4411,
"step": 35700
},
{
"epoch": 1371.132075471698,
"grad_norm": 1.434976866992101,
"learning_rate": 2.4313803745579318e-05,
"loss": 1.4284,
"step": 35720
},
{
"epoch": 1371.8867924528302,
"grad_norm": 1.4785579710843697,
"learning_rate": 2.4275622003402272e-05,
"loss": 1.442,
"step": 35740
},
{
"epoch": 1372.6415094339623,
"grad_norm": 1.4377021405339876,
"learning_rate": 2.4237464980145938e-05,
"loss": 1.4585,
"step": 35760
},
{
"epoch": 1373.3962264150944,
"grad_norm": 1.4439423657468624,
"learning_rate": 2.4199332727565162e-05,
"loss": 1.4415,
"step": 35780
},
{
"epoch": 1374.1509433962265,
"grad_norm": 1.6286933767716432,
"learning_rate": 2.4161225297381257e-05,
"loss": 1.4191,
"step": 35800
},
{
"epoch": 1374.9056603773586,
"grad_norm": 1.8061947503706157,
"learning_rate": 2.412314274128181e-05,
"loss": 1.4328,
"step": 35820
},
{
"epoch": 1375.6603773584907,
"grad_norm": 1.4892866827277318,
"learning_rate": 2.408508511092069e-05,
"loss": 1.426,
"step": 35840
},
{
"epoch": 1376.4150943396226,
"grad_norm": 2.1944517889347206,
"learning_rate": 2.4047052457917976e-05,
"loss": 1.4383,
"step": 35860
},
{
"epoch": 1377.1698113207547,
"grad_norm": 1.657764612011157,
"learning_rate": 2.4009044833859837e-05,
"loss": 1.4335,
"step": 35880
},
{
"epoch": 1377.9245283018868,
"grad_norm": 1.6641457685651413,
"learning_rate": 2.397106229029853e-05,
"loss": 1.449,
"step": 35900
},
{
"epoch": 1378.6792452830189,
"grad_norm": 1.6180638342974163,
"learning_rate": 2.3933104878752255e-05,
"loss": 1.4531,
"step": 35920
},
{
"epoch": 1379.433962264151,
"grad_norm": 1.4294375910343768,
"learning_rate": 2.3895172650705135e-05,
"loss": 1.394,
"step": 35940
},
{
"epoch": 1380.188679245283,
"grad_norm": 1.8277501896092694,
"learning_rate": 2.3857265657607175e-05,
"loss": 1.3907,
"step": 35960
},
{
"epoch": 1380.9433962264152,
"grad_norm": 1.498142714401942,
"learning_rate": 2.381938395087408e-05,
"loss": 1.427,
"step": 35980
},
{
"epoch": 1381.698113207547,
"grad_norm": 1.6446695245077154,
"learning_rate": 2.3781527581887328e-05,
"loss": 1.4267,
"step": 36000
},
{
"epoch": 1382.4528301886792,
"grad_norm": 2.126047948088478,
"learning_rate": 2.3743696601993973e-05,
"loss": 1.4513,
"step": 36020
},
{
"epoch": 1383.2075471698113,
"grad_norm": 1.5906073184513956,
"learning_rate": 2.3705891062506686e-05,
"loss": 1.4468,
"step": 36040
},
{
"epoch": 1383.9622641509434,
"grad_norm": 1.6659051387541641,
"learning_rate": 2.366811101470359e-05,
"loss": 1.4397,
"step": 36060
},
{
"epoch": 1384.7169811320755,
"grad_norm": 1.7950603394090476,
"learning_rate": 2.363035650982822e-05,
"loss": 1.4314,
"step": 36080
},
{
"epoch": 1385.4716981132076,
"grad_norm": 1.7227503126171113,
"learning_rate": 2.359262759908953e-05,
"loss": 1.4305,
"step": 36100
},
{
"epoch": 1386.2264150943397,
"grad_norm": 1.5686879263532916,
"learning_rate": 2.355492433366169e-05,
"loss": 1.4606,
"step": 36120
},
{
"epoch": 1386.9811320754718,
"grad_norm": 1.6010165898998077,
"learning_rate": 2.3517246764684138e-05,
"loss": 1.441,
"step": 36140
},
{
"epoch": 1387.7358490566037,
"grad_norm": 3.491710911332113,
"learning_rate": 2.3479594943261428e-05,
"loss": 1.4341,
"step": 36160
},
{
"epoch": 1388.4905660377358,
"grad_norm": 1.6931483101249463,
"learning_rate": 2.3441968920463175e-05,
"loss": 1.4059,
"step": 36180
},
{
"epoch": 1389.245283018868,
"grad_norm": 1.5814288881168233,
"learning_rate": 2.340436874732406e-05,
"loss": 1.4494,
"step": 36200
},
{
"epoch": 1390.0,
"grad_norm": 1.7550476965929234,
"learning_rate": 2.3366794474843636e-05,
"loss": 1.4461,
"step": 36220
},
{
"epoch": 1390.754716981132,
"grad_norm": 1.6037325519139611,
"learning_rate": 2.332924615398638e-05,
"loss": 1.4324,
"step": 36240
},
{
"epoch": 1391.5094339622642,
"grad_norm": 1.5872440902961078,
"learning_rate": 2.3291723835681542e-05,
"loss": 1.4229,
"step": 36260
},
{
"epoch": 1392.2641509433963,
"grad_norm": 1.6075974238110624,
"learning_rate": 2.3254227570823088e-05,
"loss": 1.4319,
"step": 36280
},
{
"epoch": 1393.0188679245282,
"grad_norm": 1.664082496030561,
"learning_rate": 2.3216757410269688e-05,
"loss": 1.4133,
"step": 36300
},
{
"epoch": 1393.7735849056603,
"grad_norm": 1.868185444331913,
"learning_rate": 2.3179313404844556e-05,
"loss": 1.4303,
"step": 36320
},
{
"epoch": 1394.5283018867924,
"grad_norm": 1.5709216565532331,
"learning_rate": 2.314189560533549e-05,
"loss": 1.4136,
"step": 36340
},
{
"epoch": 1395.2830188679245,
"grad_norm": 1.6461901097795721,
"learning_rate": 2.3104504062494673e-05,
"loss": 1.4359,
"step": 36360
},
{
"epoch": 1396.0377358490566,
"grad_norm": 1.4737937485692245,
"learning_rate": 2.306713882703874e-05,
"loss": 1.4417,
"step": 36380
},
{
"epoch": 1396.7924528301887,
"grad_norm": 1.62600468664324,
"learning_rate": 2.3029799949648578e-05,
"loss": 1.4471,
"step": 36400
},
{
"epoch": 1397.5471698113208,
"grad_norm": 2.4473530264247914,
"learning_rate": 2.2992487480969405e-05,
"loss": 1.4239,
"step": 36420
},
{
"epoch": 1398.301886792453,
"grad_norm": 1.451788707298732,
"learning_rate": 2.295520147161054e-05,
"loss": 1.4213,
"step": 36440
},
{
"epoch": 1399.0566037735848,
"grad_norm": 1.6561495842890779,
"learning_rate": 2.2917941972145448e-05,
"loss": 1.4289,
"step": 36460
},
{
"epoch": 1399.811320754717,
"grad_norm": 1.7199804756862742,
"learning_rate": 2.288070903311165e-05,
"loss": 1.4089,
"step": 36480
},
{
"epoch": 1400.566037735849,
"grad_norm": 1.3767860468778748,
"learning_rate": 2.2843502705010602e-05,
"loss": 1.43,
"step": 36500
},
{
"epoch": 1401.3207547169811,
"grad_norm": 1.629044071752712,
"learning_rate": 2.2806323038307724e-05,
"loss": 1.4353,
"step": 36520
},
{
"epoch": 1402.0754716981132,
"grad_norm": 1.5402931594748135,
"learning_rate": 2.2769170083432224e-05,
"loss": 1.4002,
"step": 36540
},
{
"epoch": 1402.8301886792453,
"grad_norm": 1.6851610727649395,
"learning_rate": 2.273204389077707e-05,
"loss": 1.4303,
"step": 36560
},
{
"epoch": 1403.5849056603774,
"grad_norm": 1.6351932980143555,
"learning_rate": 2.2694944510698992e-05,
"loss": 1.4324,
"step": 36580
},
{
"epoch": 1404.3396226415093,
"grad_norm": 1.3360407707287731,
"learning_rate": 2.265787199351829e-05,
"loss": 1.4296,
"step": 36600
},
{
"epoch": 1405.0943396226414,
"grad_norm": 1.6229856547835415,
"learning_rate": 2.2620826389518878e-05,
"loss": 1.4132,
"step": 36620
},
{
"epoch": 1405.8490566037735,
"grad_norm": 1.5762261444691155,
"learning_rate": 2.258380774894813e-05,
"loss": 1.4189,
"step": 36640
},
{
"epoch": 1406.6037735849056,
"grad_norm": 1.6330786646124598,
"learning_rate": 2.254681612201684e-05,
"loss": 1.4229,
"step": 36660
},
{
"epoch": 1407.3584905660377,
"grad_norm": 1.6074464661210397,
"learning_rate": 2.2509851558899212e-05,
"loss": 1.4438,
"step": 36680
},
{
"epoch": 1408.1132075471698,
"grad_norm": 2.912277484153031,
"learning_rate": 2.2472914109732686e-05,
"loss": 1.4195,
"step": 36700
},
{
"epoch": 1408.867924528302,
"grad_norm": 1.6223740817719732,
"learning_rate": 2.2436003824617963e-05,
"loss": 1.4099,
"step": 36720
},
{
"epoch": 1409.622641509434,
"grad_norm": 1.766781857646511,
"learning_rate": 2.2399120753618896e-05,
"loss": 1.4168,
"step": 36740
},
{
"epoch": 1410.377358490566,
"grad_norm": 1.5296965456959557,
"learning_rate": 2.2362264946762392e-05,
"loss": 1.4118,
"step": 36760
},
{
"epoch": 1411.132075471698,
"grad_norm": 1.6610041335566879,
"learning_rate": 2.232543645403842e-05,
"loss": 1.4166,
"step": 36780
},
{
"epoch": 1411.8867924528302,
"grad_norm": 1.5205836616470723,
"learning_rate": 2.228863532539987e-05,
"loss": 1.4246,
"step": 36800
},
{
"epoch": 1412.6415094339623,
"grad_norm": 2.018497485986653,
"learning_rate": 2.2251861610762556e-05,
"loss": 1.4219,
"step": 36820
},
{
"epoch": 1413.3962264150944,
"grad_norm": 1.495393210690481,
"learning_rate": 2.221511536000505e-05,
"loss": 1.4201,
"step": 36840
},
{
"epoch": 1414.1509433962265,
"grad_norm": 1.5817177891641536,
"learning_rate": 2.2178396622968714e-05,
"loss": 1.4301,
"step": 36860
},
{
"epoch": 1414.9056603773586,
"grad_norm": 1.5602680564678848,
"learning_rate": 2.2141705449457588e-05,
"loss": 1.4246,
"step": 36880
},
{
"epoch": 1415.6603773584907,
"grad_norm": 1.5687723652001904,
"learning_rate": 2.2105041889238327e-05,
"loss": 1.4291,
"step": 36900
},
{
"epoch": 1416.4150943396226,
"grad_norm": 1.6516438298835592,
"learning_rate": 2.2068405992040127e-05,
"loss": 1.4186,
"step": 36920
},
{
"epoch": 1417.1698113207547,
"grad_norm": 1.6972963029742167,
"learning_rate": 2.2031797807554646e-05,
"loss": 1.4026,
"step": 36940
},
{
"epoch": 1417.9245283018868,
"grad_norm": 1.6936263753645908,
"learning_rate": 2.1995217385435962e-05,
"loss": 1.3882,
"step": 36960
},
{
"epoch": 1418.6792452830189,
"grad_norm": 1.697372534880421,
"learning_rate": 2.1958664775300517e-05,
"loss": 1.4228,
"step": 36980
},
{
"epoch": 1419.433962264151,
"grad_norm": 1.4972148012217616,
"learning_rate": 2.192214002672703e-05,
"loss": 1.3961,
"step": 37000
},
{
"epoch": 1420.188679245283,
"grad_norm": 1.6926137674291781,
"learning_rate": 2.1885643189256404e-05,
"loss": 1.4005,
"step": 37020
},
{
"epoch": 1420.9433962264152,
"grad_norm": 1.6182171449313734,
"learning_rate": 2.1849174312391693e-05,
"loss": 1.3939,
"step": 37040
},
{
"epoch": 1421.698113207547,
"grad_norm": 1.6235165658387523,
"learning_rate": 2.181273344559802e-05,
"loss": 1.414,
"step": 37060
},
{
"epoch": 1422.4528301886792,
"grad_norm": 1.7354641628437306,
"learning_rate": 2.1776320638302533e-05,
"loss": 1.4039,
"step": 37080
},
{
"epoch": 1423.2075471698113,
"grad_norm": 1.7598777416483105,
"learning_rate": 2.1739935939894332e-05,
"loss": 1.4319,
"step": 37100
},
{
"epoch": 1423.9622641509434,
"grad_norm": 1.6119817066147992,
"learning_rate": 2.170357939972436e-05,
"loss": 1.4083,
"step": 37120
},
{
"epoch": 1424.7169811320755,
"grad_norm": 1.5177195143064601,
"learning_rate": 2.1667251067105383e-05,
"loss": 1.4084,
"step": 37140
},
{
"epoch": 1425.4716981132076,
"grad_norm": 1.6531623474873094,
"learning_rate": 2.1630950991311884e-05,
"loss": 1.3961,
"step": 37160
},
{
"epoch": 1426.2264150943397,
"grad_norm": 1.9866189092494402,
"learning_rate": 2.159467922158006e-05,
"loss": 1.4205,
"step": 37180
},
{
"epoch": 1426.9811320754718,
"grad_norm": 1.6409536663163726,
"learning_rate": 2.15584358071077e-05,
"loss": 1.4065,
"step": 37200
},
{
"epoch": 1427.7358490566037,
"grad_norm": 1.5972136032609723,
"learning_rate": 2.1522220797054117e-05,
"loss": 1.3999,
"step": 37220
},
{
"epoch": 1428.4905660377358,
"grad_norm": 1.7176147072411343,
"learning_rate": 2.1486034240540095e-05,
"loss": 1.4077,
"step": 37240
},
{
"epoch": 1429.245283018868,
"grad_norm": 2.2258677114656655,
"learning_rate": 2.1449876186647868e-05,
"loss": 1.4174,
"step": 37260
},
{
"epoch": 1430.0,
"grad_norm": 1.702909141767608,
"learning_rate": 2.1413746684420938e-05,
"loss": 1.3745,
"step": 37280
},
{
"epoch": 1430.754716981132,
"grad_norm": 2.1998514759828915,
"learning_rate": 2.1377645782864164e-05,
"loss": 1.421,
"step": 37300
},
{
"epoch": 1431.5094339622642,
"grad_norm": 1.4634078104497494,
"learning_rate": 2.134157353094355e-05,
"loss": 1.4219,
"step": 37320
},
{
"epoch": 1432.2641509433963,
"grad_norm": 1.7232746233155163,
"learning_rate": 2.1305529977586244e-05,
"loss": 1.4236,
"step": 37340
},
{
"epoch": 1433.0188679245282,
"grad_norm": 1.7372503788909404,
"learning_rate": 2.1269515171680505e-05,
"loss": 1.391,
"step": 37360
},
{
"epoch": 1433.7735849056603,
"grad_norm": 1.8471672382610358,
"learning_rate": 2.1233529162075586e-05,
"loss": 1.4087,
"step": 37380
},
{
"epoch": 1434.5283018867924,
"grad_norm": 1.6217649320497987,
"learning_rate": 2.1197571997581665e-05,
"loss": 1.4239,
"step": 37400
},
{
"epoch": 1435.2830188679245,
"grad_norm": 1.5296478429731253,
"learning_rate": 2.1161643726969807e-05,
"loss": 1.3958,
"step": 37420
},
{
"epoch": 1436.0377358490566,
"grad_norm": 1.560888539193858,
"learning_rate": 2.1125744398971865e-05,
"loss": 1.3979,
"step": 37440
},
{
"epoch": 1436.7924528301887,
"grad_norm": 4.524184692042414,
"learning_rate": 2.1089874062280467e-05,
"loss": 1.4068,
"step": 37460
},
{
"epoch": 1437.5471698113208,
"grad_norm": 1.9907802095010148,
"learning_rate": 2.1054032765548943e-05,
"loss": 1.4128,
"step": 37480
},
{
"epoch": 1438.301886792453,
"grad_norm": 1.5158745007996666,
"learning_rate": 2.1018220557391152e-05,
"loss": 1.4206,
"step": 37500
},
{
"epoch": 1439.0566037735848,
"grad_norm": 1.922346737191028,
"learning_rate": 2.0982437486381567e-05,
"loss": 1.4155,
"step": 37520
},
{
"epoch": 1439.811320754717,
"grad_norm": 1.6671399833246607,
"learning_rate": 2.094668360105509e-05,
"loss": 1.4052,
"step": 37540
},
{
"epoch": 1440.566037735849,
"grad_norm": 1.6551308179910114,
"learning_rate": 2.0910958949907086e-05,
"loss": 1.3986,
"step": 37560
},
{
"epoch": 1441.3207547169811,
"grad_norm": 1.5091823329163863,
"learning_rate": 2.087526358139325e-05,
"loss": 1.3842,
"step": 37580
},
{
"epoch": 1442.0754716981132,
"grad_norm": 1.5775979624954766,
"learning_rate": 2.0839597543929547e-05,
"loss": 1.3695,
"step": 37600
},
{
"epoch": 1442.8301886792453,
"grad_norm": 1.65888589339979,
"learning_rate": 2.0803960885892166e-05,
"loss": 1.4212,
"step": 37620
},
{
"epoch": 1443.5849056603774,
"grad_norm": 1.7548961067858515,
"learning_rate": 2.0768353655617437e-05,
"loss": 1.4113,
"step": 37640
},
{
"epoch": 1444.3396226415093,
"grad_norm": 1.6568444527605615,
"learning_rate": 2.0732775901401787e-05,
"loss": 1.4097,
"step": 37660
},
{
"epoch": 1445.0943396226414,
"grad_norm": 1.569007851847122,
"learning_rate": 2.0697227671501686e-05,
"loss": 1.4025,
"step": 37680
},
{
"epoch": 1445.8490566037735,
"grad_norm": 2.054795832820314,
"learning_rate": 2.0661709014133507e-05,
"loss": 1.379,
"step": 37700
},
{
"epoch": 1446.6037735849056,
"grad_norm": 1.825419627550906,
"learning_rate": 2.0626219977473546e-05,
"loss": 1.4141,
"step": 37720
},
{
"epoch": 1447.3584905660377,
"grad_norm": 1.5477059924334846,
"learning_rate": 2.05907606096579e-05,
"loss": 1.3764,
"step": 37740
},
{
"epoch": 1448.1132075471698,
"grad_norm": 1.6756003225697567,
"learning_rate": 2.0555330958782456e-05,
"loss": 1.3943,
"step": 37760
},
{
"epoch": 1448.867924528302,
"grad_norm": 1.6181024178942431,
"learning_rate": 2.0519931072902775e-05,
"loss": 1.3828,
"step": 37780
},
{
"epoch": 1449.622641509434,
"grad_norm": 1.5075137093108786,
"learning_rate": 2.0484561000034048e-05,
"loss": 1.3993,
"step": 37800
},
{
"epoch": 1450.377358490566,
"grad_norm": 1.7611806649373956,
"learning_rate": 2.0449220788151017e-05,
"loss": 1.4025,
"step": 37820
},
{
"epoch": 1451.132075471698,
"grad_norm": 1.5899695714047575,
"learning_rate": 2.0413910485187918e-05,
"loss": 1.4011,
"step": 37840
},
{
"epoch": 1451.8867924528302,
"grad_norm": 1.9368764939203147,
"learning_rate": 2.0378630139038477e-05,
"loss": 1.3914,
"step": 37860
},
{
"epoch": 1452.6415094339623,
"grad_norm": 2.2298811573087938,
"learning_rate": 2.0343379797555718e-05,
"loss": 1.4096,
"step": 37880
},
{
"epoch": 1453.3962264150944,
"grad_norm": 1.8812446095800621,
"learning_rate": 2.0308159508552003e-05,
"loss": 1.3994,
"step": 37900
},
{
"epoch": 1454.1509433962265,
"grad_norm": 1.546640573436516,
"learning_rate": 2.0272969319798898e-05,
"loss": 1.3901,
"step": 37920
},
{
"epoch": 1454.9056603773586,
"grad_norm": 1.6385943250375863,
"learning_rate": 2.0237809279027187e-05,
"loss": 1.3954,
"step": 37940
},
{
"epoch": 1455.6603773584907,
"grad_norm": 1.5716563974399815,
"learning_rate": 2.0202679433926757e-05,
"loss": 1.3935,
"step": 37960
},
{
"epoch": 1456.4150943396226,
"grad_norm": 1.735984609092,
"learning_rate": 2.0167579832146505e-05,
"loss": 1.4118,
"step": 37980
},
{
"epoch": 1457.1698113207547,
"grad_norm": 1.7313373844094564,
"learning_rate": 2.013251052129433e-05,
"loss": 1.3767,
"step": 38000
},
{
"epoch": 1457.9245283018868,
"grad_norm": 1.7960534040522838,
"learning_rate": 2.0097471548937024e-05,
"loss": 1.3803,
"step": 38020
},
{
"epoch": 1458.6792452830189,
"grad_norm": 2.5283310401144434,
"learning_rate": 2.0062462962600258e-05,
"loss": 1.3763,
"step": 38040
},
{
"epoch": 1459.433962264151,
"grad_norm": 1.5401697781512245,
"learning_rate": 2.0027484809768506e-05,
"loss": 1.3768,
"step": 38060
},
{
"epoch": 1460.188679245283,
"grad_norm": 2.291171375246112,
"learning_rate": 1.9992537137884905e-05,
"loss": 1.389,
"step": 38080
},
{
"epoch": 1460.9433962264152,
"grad_norm": 1.6878149956470094,
"learning_rate": 1.9957619994351278e-05,
"loss": 1.3978,
"step": 38100
},
{
"epoch": 1461.698113207547,
"grad_norm": 1.9279881821004916,
"learning_rate": 1.9922733426528033e-05,
"loss": 1.3576,
"step": 38120
},
{
"epoch": 1462.4528301886792,
"grad_norm": 1.6593792348690906,
"learning_rate": 1.9887877481734122e-05,
"loss": 1.3827,
"step": 38140
},
{
"epoch": 1463.2075471698113,
"grad_norm": 1.6870370599265458,
"learning_rate": 1.9853052207246967e-05,
"loss": 1.3498,
"step": 38160
},
{
"epoch": 1463.9622641509434,
"grad_norm": 1.5201114526632646,
"learning_rate": 1.981825765030236e-05,
"loss": 1.3972,
"step": 38180
},
{
"epoch": 1464.7169811320755,
"grad_norm": 1.676738216954013,
"learning_rate": 1.9783493858094444e-05,
"loss": 1.3751,
"step": 38200
},
{
"epoch": 1465.4716981132076,
"grad_norm": 1.7174190166635537,
"learning_rate": 1.9748760877775622e-05,
"loss": 1.3723,
"step": 38220
},
{
"epoch": 1466.2264150943397,
"grad_norm": 1.6832142484740018,
"learning_rate": 1.9714058756456533e-05,
"loss": 1.383,
"step": 38240
},
{
"epoch": 1466.9811320754718,
"grad_norm": 1.6409050107329164,
"learning_rate": 1.9679387541205946e-05,
"loss": 1.3868,
"step": 38260
},
{
"epoch": 1467.7358490566037,
"grad_norm": 1.870348325922022,
"learning_rate": 1.96447472790507e-05,
"loss": 1.4093,
"step": 38280
},
{
"epoch": 1468.4905660377358,
"grad_norm": 1.622451000807429,
"learning_rate": 1.9610138016975643e-05,
"loss": 1.3908,
"step": 38300
},
{
"epoch": 1469.245283018868,
"grad_norm": 1.5580142525601877,
"learning_rate": 1.9575559801923602e-05,
"loss": 1.3519,
"step": 38320
},
{
"epoch": 1470.0,
"grad_norm": 5.308564996067893,
"learning_rate": 1.95410126807953e-05,
"loss": 1.3806,
"step": 38340
},
{
"epoch": 1470.754716981132,
"grad_norm": 1.697128624848967,
"learning_rate": 1.9506496700449247e-05,
"loss": 1.4021,
"step": 38360
},
{
"epoch": 1471.5094339622642,
"grad_norm": 1.6407506564774348,
"learning_rate": 1.9472011907701736e-05,
"loss": 1.3889,
"step": 38380
},
{
"epoch": 1472.2641509433963,
"grad_norm": 2.5227247039872567,
"learning_rate": 1.9437558349326745e-05,
"loss": 1.3656,
"step": 38400
},
{
"epoch": 1473.0188679245282,
"grad_norm": 2.0102914189329155,
"learning_rate": 1.9403136072055903e-05,
"loss": 1.3631,
"step": 38420
},
{
"epoch": 1473.7735849056603,
"grad_norm": 1.8549253610315775,
"learning_rate": 1.9368745122578427e-05,
"loss": 1.3835,
"step": 38440
},
{
"epoch": 1474.5283018867924,
"grad_norm": 1.7474837425802672,
"learning_rate": 1.9334385547541004e-05,
"loss": 1.3876,
"step": 38460
},
{
"epoch": 1475.2830188679245,
"grad_norm": 1.4221446206180013,
"learning_rate": 1.930005739354778e-05,
"loss": 1.3875,
"step": 38480
},
{
"epoch": 1476.0377358490566,
"grad_norm": 1.567704112230289,
"learning_rate": 1.926576070716028e-05,
"loss": 1.3787,
"step": 38500
},
{
"epoch": 1476.7924528301887,
"grad_norm": 1.8092743011121888,
"learning_rate": 1.9231495534897356e-05,
"loss": 1.3746,
"step": 38520
},
{
"epoch": 1477.5471698113208,
"grad_norm": 1.9871542434365639,
"learning_rate": 1.919726192323512e-05,
"loss": 1.4062,
"step": 38540
},
{
"epoch": 1478.301886792453,
"grad_norm": 2.100192891688555,
"learning_rate": 1.916305991860687e-05,
"loss": 1.372,
"step": 38560
},
{
"epoch": 1479.0566037735848,
"grad_norm": 1.689968827696773,
"learning_rate": 1.912888956740302e-05,
"loss": 1.3994,
"step": 38580
},
{
"epoch": 1479.811320754717,
"grad_norm": 1.59619952456533,
"learning_rate": 1.9094750915971053e-05,
"loss": 1.3547,
"step": 38600
},
{
"epoch": 1480.566037735849,
"grad_norm": 3.206605320072948,
"learning_rate": 1.9060644010615473e-05,
"loss": 1.4052,
"step": 38620
},
{
"epoch": 1481.3207547169811,
"grad_norm": 1.5795369303879008,
"learning_rate": 1.9026568897597735e-05,
"loss": 1.3921,
"step": 38640
},
{
"epoch": 1482.0754716981132,
"grad_norm": 2.1910690965934467,
"learning_rate": 1.8992525623136132e-05,
"loss": 1.3563,
"step": 38660
},
{
"epoch": 1482.8301886792453,
"grad_norm": 1.5353645456337577,
"learning_rate": 1.8958514233405793e-05,
"loss": 1.4077,
"step": 38680
},
{
"epoch": 1483.5849056603774,
"grad_norm": 1.7836996022414107,
"learning_rate": 1.8924534774538593e-05,
"loss": 1.3824,
"step": 38700
},
{
"epoch": 1484.3396226415093,
"grad_norm": 1.6136317181444138,
"learning_rate": 1.8890587292623113e-05,
"loss": 1.3511,
"step": 38720
},
{
"epoch": 1485.0943396226414,
"grad_norm": 1.8211866581007339,
"learning_rate": 1.8856671833704565e-05,
"loss": 1.3725,
"step": 38740
},
{
"epoch": 1485.8490566037735,
"grad_norm": 1.5979573815344084,
"learning_rate": 1.8822788443784704e-05,
"loss": 1.3571,
"step": 38760
},
{
"epoch": 1486.6037735849056,
"grad_norm": 1.6365464316772047,
"learning_rate": 1.878893716882177e-05,
"loss": 1.3588,
"step": 38780
},
{
"epoch": 1487.3584905660377,
"grad_norm": 2.811912712405292,
"learning_rate": 1.8755118054730514e-05,
"loss": 1.3823,
"step": 38800
},
{
"epoch": 1488.1132075471698,
"grad_norm": 1.684830593576563,
"learning_rate": 1.8721331147381986e-05,
"loss": 1.3604,
"step": 38820
},
{
"epoch": 1488.867924528302,
"grad_norm": 1.5101192259883982,
"learning_rate": 1.868757649260362e-05,
"loss": 1.3712,
"step": 38840
},
{
"epoch": 1489.622641509434,
"grad_norm": 1.8516527403548584,
"learning_rate": 1.8653854136179047e-05,
"loss": 1.3576,
"step": 38860
},
{
"epoch": 1490.377358490566,
"grad_norm": 1.5630443819078437,
"learning_rate": 1.8620164123848113e-05,
"loss": 1.3729,
"step": 38880
},
{
"epoch": 1491.132075471698,
"grad_norm": 1.9558078371048477,
"learning_rate": 1.8586506501306792e-05,
"loss": 1.3466,
"step": 38900
},
{
"epoch": 1491.8867924528302,
"grad_norm": 1.6581722869425195,
"learning_rate": 1.8552881314207158e-05,
"loss": 1.3547,
"step": 38920
},
{
"epoch": 1492.6415094339623,
"grad_norm": 1.9162311420660751,
"learning_rate": 1.8519288608157236e-05,
"loss": 1.3995,
"step": 38940
},
{
"epoch": 1493.3962264150944,
"grad_norm": 2.8463480242853874,
"learning_rate": 1.8485728428721025e-05,
"loss": 1.3609,
"step": 38960
},
{
"epoch": 1494.1509433962265,
"grad_norm": 1.7832047879021928,
"learning_rate": 1.845220082141838e-05,
"loss": 1.3966,
"step": 38980
},
{
"epoch": 1494.9056603773586,
"grad_norm": 1.628697490406908,
"learning_rate": 1.841870583172502e-05,
"loss": 1.3577,
"step": 39000
},
{
"epoch": 1495.6603773584907,
"grad_norm": 1.7499682485349517,
"learning_rate": 1.8385243505072403e-05,
"loss": 1.3634,
"step": 39020
},
{
"epoch": 1496.4150943396226,
"grad_norm": 2.369232208734949,
"learning_rate": 1.835181388684767e-05,
"loss": 1.3804,
"step": 39040
},
{
"epoch": 1497.1698113207547,
"grad_norm": 2.002186669217615,
"learning_rate": 1.8318417022393614e-05,
"loss": 1.3775,
"step": 39060
},
{
"epoch": 1497.9245283018868,
"grad_norm": 1.7745981101584183,
"learning_rate": 1.8285052957008572e-05,
"loss": 1.3678,
"step": 39080
},
{
"epoch": 1498.6792452830189,
"grad_norm": 2.3506034698380027,
"learning_rate": 1.825172173594644e-05,
"loss": 1.3819,
"step": 39100
},
{
"epoch": 1499.433962264151,
"grad_norm": 1.5587811175176152,
"learning_rate": 1.8218423404416543e-05,
"loss": 1.3623,
"step": 39120
},
{
"epoch": 1500.188679245283,
"grad_norm": 1.5407388891782507,
"learning_rate": 1.818515800758359e-05,
"loss": 1.3737,
"step": 39140
},
{
"epoch": 1500.9433962264152,
"grad_norm": 1.7105290658502008,
"learning_rate": 1.8151925590567624e-05,
"loss": 1.3416,
"step": 39160
},
{
"epoch": 1501.698113207547,
"grad_norm": 2.1160472894699973,
"learning_rate": 1.811872619844394e-05,
"loss": 1.3596,
"step": 39180
},
{
"epoch": 1502.4528301886792,
"grad_norm": 1.7134114803577327,
"learning_rate": 1.8085559876243068e-05,
"loss": 1.3486,
"step": 39200
},
{
"epoch": 1503.2075471698113,
"grad_norm": 1.5742520539626361,
"learning_rate": 1.805242666895068e-05,
"loss": 1.3737,
"step": 39220
},
{
"epoch": 1503.9622641509434,
"grad_norm": 1.6841300143409803,
"learning_rate": 1.8019326621507504e-05,
"loss": 1.3593,
"step": 39240
},
{
"epoch": 1504.7169811320755,
"grad_norm": 1.4678089134086005,
"learning_rate": 1.7986259778809304e-05,
"loss": 1.3332,
"step": 39260
},
{
"epoch": 1505.4716981132076,
"grad_norm": 1.5583137022685134,
"learning_rate": 1.7953226185706828e-05,
"loss": 1.3532,
"step": 39280
},
{
"epoch": 1506.2264150943397,
"grad_norm": 1.754522974870956,
"learning_rate": 1.7920225887005686e-05,
"loss": 1.3969,
"step": 39300
},
{
"epoch": 1506.9811320754718,
"grad_norm": 2.329959975485945,
"learning_rate": 1.788725892746638e-05,
"loss": 1.3693,
"step": 39320
},
{
"epoch": 1507.7358490566037,
"grad_norm": 1.9736365954487893,
"learning_rate": 1.7854325351804138e-05,
"loss": 1.3545,
"step": 39340
},
{
"epoch": 1508.4905660377358,
"grad_norm": 2.0873147005956274,
"learning_rate": 1.782142520468893e-05,
"loss": 1.357,
"step": 39360
},
{
"epoch": 1509.245283018868,
"grad_norm": 1.8565982391914584,
"learning_rate": 1.7788558530745406e-05,
"loss": 1.3574,
"step": 39380
},
{
"epoch": 1510.0,
"grad_norm": 1.3940404402455406,
"learning_rate": 1.7755725374552767e-05,
"loss": 1.3322,
"step": 39400
},
{
"epoch": 1510.754716981132,
"grad_norm": 1.6757375238937267,
"learning_rate": 1.772292578064481e-05,
"loss": 1.3562,
"step": 39420
},
{
"epoch": 1511.5094339622642,
"grad_norm": 2.021397727623104,
"learning_rate": 1.769015979350977e-05,
"loss": 1.3494,
"step": 39440
},
{
"epoch": 1512.2641509433963,
"grad_norm": 1.9073518931594837,
"learning_rate": 1.7657427457590277e-05,
"loss": 1.3469,
"step": 39460
},
{
"epoch": 1513.0188679245282,
"grad_norm": 1.5299281651503949,
"learning_rate": 1.7624728817283386e-05,
"loss": 1.3347,
"step": 39480
},
{
"epoch": 1513.7735849056603,
"grad_norm": 1.6171286973533487,
"learning_rate": 1.7592063916940385e-05,
"loss": 1.3781,
"step": 39500
},
{
"epoch": 1514.5283018867924,
"grad_norm": 2.3151167774892283,
"learning_rate": 1.7559432800866844e-05,
"loss": 1.3389,
"step": 39520
},
{
"epoch": 1515.2830188679245,
"grad_norm": 1.8404517143557557,
"learning_rate": 1.752683551332248e-05,
"loss": 1.3809,
"step": 39540
},
{
"epoch": 1516.0377358490566,
"grad_norm": 1.80165740372062,
"learning_rate": 1.749427209852112e-05,
"loss": 1.3647,
"step": 39560
},
{
"epoch": 1516.7924528301887,
"grad_norm": 1.5503005965319303,
"learning_rate": 1.7461742600630684e-05,
"loss": 1.3553,
"step": 39580
},
{
"epoch": 1517.5471698113208,
"grad_norm": 1.7389286642537964,
"learning_rate": 1.7429247063773047e-05,
"loss": 1.3566,
"step": 39600
},
{
"epoch": 1518.301886792453,
"grad_norm": 1.5514338805704833,
"learning_rate": 1.7396785532024062e-05,
"loss": 1.3771,
"step": 39620
},
{
"epoch": 1519.0566037735848,
"grad_norm": 1.738553891820026,
"learning_rate": 1.7364358049413427e-05,
"loss": 1.3608,
"step": 39640
},
{
"epoch": 1519.811320754717,
"grad_norm": 2.2590021667446476,
"learning_rate": 1.7331964659924647e-05,
"loss": 1.3594,
"step": 39660
},
{
"epoch": 1520.566037735849,
"grad_norm": 1.8008534873454645,
"learning_rate": 1.729960540749503e-05,
"loss": 1.3446,
"step": 39680
},
{
"epoch": 1521.3207547169811,
"grad_norm": 1.9823359457338208,
"learning_rate": 1.7267280336015543e-05,
"loss": 1.3604,
"step": 39700
},
{
"epoch": 1522.0754716981132,
"grad_norm": 1.6630862297023916,
"learning_rate": 1.723498948933081e-05,
"loss": 1.3831,
"step": 39720
},
{
"epoch": 1522.8301886792453,
"grad_norm": 1.9271729195919085,
"learning_rate": 1.720273291123901e-05,
"loss": 1.3571,
"step": 39740
},
{
"epoch": 1523.5849056603774,
"grad_norm": 1.6944904437475812,
"learning_rate": 1.7170510645491884e-05,
"loss": 1.3845,
"step": 39760
},
{
"epoch": 1524.3396226415093,
"grad_norm": 2.0111059446030164,
"learning_rate": 1.7138322735794582e-05,
"loss": 1.3464,
"step": 39780
},
{
"epoch": 1525.0943396226414,
"grad_norm": 1.6636863494806655,
"learning_rate": 1.7106169225805703e-05,
"loss": 1.3472,
"step": 39800
},
{
"epoch": 1525.8490566037735,
"grad_norm": 1.654778862655826,
"learning_rate": 1.7074050159137155e-05,
"loss": 1.3517,
"step": 39820
},
{
"epoch": 1526.6037735849056,
"grad_norm": 1.9255620043148591,
"learning_rate": 1.7041965579354115e-05,
"loss": 1.359,
"step": 39840
},
{
"epoch": 1527.3584905660377,
"grad_norm": 1.7612527068488755,
"learning_rate": 1.7009915529975046e-05,
"loss": 1.3535,
"step": 39860
},
{
"epoch": 1528.1132075471698,
"grad_norm": 1.6440495946289901,
"learning_rate": 1.69779000544715e-05,
"loss": 1.3275,
"step": 39880
},
{
"epoch": 1528.867924528302,
"grad_norm": 1.8436152132956103,
"learning_rate": 1.6945919196268195e-05,
"loss": 1.3269,
"step": 39900
},
{
"epoch": 1529.622641509434,
"grad_norm": 1.5249165119761414,
"learning_rate": 1.6913972998742855e-05,
"loss": 1.3528,
"step": 39920
},
{
"epoch": 1530.377358490566,
"grad_norm": 1.7116936271233165,
"learning_rate": 1.6882061505226197e-05,
"loss": 1.3351,
"step": 39940
},
{
"epoch": 1531.132075471698,
"grad_norm": 1.9127818443391411,
"learning_rate": 1.68501847590019e-05,
"loss": 1.3649,
"step": 39960
},
{
"epoch": 1531.8867924528302,
"grad_norm": 1.7968703663627887,
"learning_rate": 1.681834280330646e-05,
"loss": 1.3664,
"step": 39980
},
{
"epoch": 1532.6415094339623,
"grad_norm": 2.01086719476703,
"learning_rate": 1.6786535681329242e-05,
"loss": 1.3354,
"step": 40000
},
{
"epoch": 1533.3962264150944,
"grad_norm": 1.8971146877595166,
"learning_rate": 1.6754763436212318e-05,
"loss": 1.3459,
"step": 40020
},
{
"epoch": 1534.1509433962265,
"grad_norm": 1.5538558777058122,
"learning_rate": 1.6723026111050465e-05,
"loss": 1.348,
"step": 40040
},
{
"epoch": 1534.9056603773586,
"grad_norm": 1.6899684943437072,
"learning_rate": 1.6691323748891116e-05,
"loss": 1.3219,
"step": 40060
},
{
"epoch": 1535.6603773584907,
"grad_norm": 1.5696983044378243,
"learning_rate": 1.6659656392734248e-05,
"loss": 1.3523,
"step": 40080
},
{
"epoch": 1536.4150943396226,
"grad_norm": 2.008558955924781,
"learning_rate": 1.6628024085532394e-05,
"loss": 1.3507,
"step": 40100
},
{
"epoch": 1537.1698113207547,
"grad_norm": 1.6814099912171956,
"learning_rate": 1.6596426870190517e-05,
"loss": 1.3271,
"step": 40120
},
{
"epoch": 1537.9245283018868,
"grad_norm": 1.6392939792056798,
"learning_rate": 1.6564864789566017e-05,
"loss": 1.3628,
"step": 40140
},
{
"epoch": 1538.6792452830189,
"grad_norm": 1.6937327457602671,
"learning_rate": 1.6533337886468593e-05,
"loss": 1.3457,
"step": 40160
},
{
"epoch": 1539.433962264151,
"grad_norm": 2.0312461746808674,
"learning_rate": 1.650184620366025e-05,
"loss": 1.345,
"step": 40180
},
{
"epoch": 1540.188679245283,
"grad_norm": 1.6324403361347462,
"learning_rate": 1.647038978385525e-05,
"loss": 1.3614,
"step": 40200
},
{
"epoch": 1540.9433962264152,
"grad_norm": 1.8937794347785448,
"learning_rate": 1.643896866971998e-05,
"loss": 1.3485,
"step": 40220
},
{
"epoch": 1541.698113207547,
"grad_norm": 1.66458657626364,
"learning_rate": 1.6407582903872977e-05,
"loss": 1.3201,
"step": 40240
},
{
"epoch": 1542.4528301886792,
"grad_norm": 1.637256903291043,
"learning_rate": 1.637623252888481e-05,
"loss": 1.3287,
"step": 40260
},
{
"epoch": 1543.2075471698113,
"grad_norm": 1.771255607485422,
"learning_rate": 1.634491758727804e-05,
"loss": 1.3386,
"step": 40280
},
{
"epoch": 1543.9622641509434,
"grad_norm": 2.294826209947056,
"learning_rate": 1.6313638121527195e-05,
"loss": 1.3443,
"step": 40300
},
{
"epoch": 1544.7169811320755,
"grad_norm": 1.5369973618999444,
"learning_rate": 1.6282394174058652e-05,
"loss": 1.3199,
"step": 40320
},
{
"epoch": 1545.4716981132076,
"grad_norm": 1.7805574251016163,
"learning_rate": 1.6251185787250646e-05,
"loss": 1.3427,
"step": 40340
},
{
"epoch": 1546.2264150943397,
"grad_norm": 1.7055546669575088,
"learning_rate": 1.6220013003433163e-05,
"loss": 1.3595,
"step": 40360
},
{
"epoch": 1546.9811320754718,
"grad_norm": 1.6493151345521173,
"learning_rate": 1.618887586488787e-05,
"loss": 1.3417,
"step": 40380
},
{
"epoch": 1547.7358490566037,
"grad_norm": 1.7099299752279526,
"learning_rate": 1.6157774413848147e-05,
"loss": 1.3286,
"step": 40400
},
{
"epoch": 1548.4905660377358,
"grad_norm": 1.6461054638879455,
"learning_rate": 1.61267086924989e-05,
"loss": 1.3651,
"step": 40420
},
{
"epoch": 1549.245283018868,
"grad_norm": 2.239209937375333,
"learning_rate": 1.6095678742976643e-05,
"loss": 1.3402,
"step": 40440
},
{
"epoch": 1550.0,
"grad_norm": 1.9293560078530108,
"learning_rate": 1.6064684607369317e-05,
"loss": 1.3566,
"step": 40460
},
{
"epoch": 1550.754716981132,
"grad_norm": 1.7850904902946119,
"learning_rate": 1.603372632771629e-05,
"loss": 1.3522,
"step": 40480
},
{
"epoch": 1551.5094339622642,
"grad_norm": 1.8694667431709797,
"learning_rate": 1.6002803946008334e-05,
"loss": 1.3254,
"step": 40500
},
{
"epoch": 1552.2641509433963,
"grad_norm": 1.8970095488709016,
"learning_rate": 1.5971917504187483e-05,
"loss": 1.3456,
"step": 40520
},
{
"epoch": 1553.0188679245282,
"grad_norm": 1.473985828394077,
"learning_rate": 1.5941067044147068e-05,
"loss": 1.3425,
"step": 40540
},
{
"epoch": 1553.7735849056603,
"grad_norm": 2.4173810451888436,
"learning_rate": 1.591025260773159e-05,
"loss": 1.3616,
"step": 40560
},
{
"epoch": 1554.5283018867924,
"grad_norm": 4.978545098723231,
"learning_rate": 1.587947423673667e-05,
"loss": 1.3302,
"step": 40580
},
{
"epoch": 1555.2830188679245,
"grad_norm": 1.8210531663627934,
"learning_rate": 1.5848731972909058e-05,
"loss": 1.3208,
"step": 40600
},
{
"epoch": 1556.0377358490566,
"grad_norm": 1.7214332519076236,
"learning_rate": 1.5818025857946504e-05,
"loss": 1.3429,
"step": 40620
},
{
"epoch": 1556.7924528301887,
"grad_norm": 1.7715531344419837,
"learning_rate": 1.5787355933497722e-05,
"loss": 1.3236,
"step": 40640
},
{
"epoch": 1557.5471698113208,
"grad_norm": 1.6774415891925254,
"learning_rate": 1.5756722241162336e-05,
"loss": 1.3038,
"step": 40660
},
{
"epoch": 1558.301886792453,
"grad_norm": 1.7378768400910978,
"learning_rate": 1.5726124822490856e-05,
"loss": 1.3393,
"step": 40680
},
{
"epoch": 1559.0566037735848,
"grad_norm": 1.5050468895919773,
"learning_rate": 1.569556371898455e-05,
"loss": 1.3169,
"step": 40700
},
{
"epoch": 1559.811320754717,
"grad_norm": 1.7494465787076923,
"learning_rate": 1.5665038972095462e-05,
"loss": 1.3219,
"step": 40720
},
{
"epoch": 1560.566037735849,
"grad_norm": 1.942070137365104,
"learning_rate": 1.563455062322631e-05,
"loss": 1.3331,
"step": 40740
},
{
"epoch": 1561.3207547169811,
"grad_norm": 1.4760834184650184,
"learning_rate": 1.560409871373043e-05,
"loss": 1.3371,
"step": 40760
},
{
"epoch": 1562.0754716981132,
"grad_norm": 1.794169465456889,
"learning_rate": 1.5573683284911766e-05,
"loss": 1.361,
"step": 40780
},
{
"epoch": 1562.8301886792453,
"grad_norm": 1.5717564295595021,
"learning_rate": 1.5543304378024745e-05,
"loss": 1.3198,
"step": 40800
},
{
"epoch": 1563.5849056603774,
"grad_norm": 1.679663629392091,
"learning_rate": 1.5512962034274292e-05,
"loss": 1.3225,
"step": 40820
},
{
"epoch": 1564.3396226415093,
"grad_norm": 1.5963236435216681,
"learning_rate": 1.5482656294815706e-05,
"loss": 1.3475,
"step": 40840
},
{
"epoch": 1565.0943396226414,
"grad_norm": 1.6282234240786269,
"learning_rate": 1.5452387200754648e-05,
"loss": 1.341,
"step": 40860
},
{
"epoch": 1565.8490566037735,
"grad_norm": 1.8356401444891661,
"learning_rate": 1.542215479314709e-05,
"loss": 1.3093,
"step": 40880
},
{
"epoch": 1566.6037735849056,
"grad_norm": 1.802086287293627,
"learning_rate": 1.5391959112999222e-05,
"loss": 1.3234,
"step": 40900
},
{
"epoch": 1567.3584905660377,
"grad_norm": 1.596543322520551,
"learning_rate": 1.536180020126744e-05,
"loss": 1.3207,
"step": 40920
},
{
"epoch": 1568.1132075471698,
"grad_norm": 1.5925040590351016,
"learning_rate": 1.5331678098858253e-05,
"loss": 1.3434,
"step": 40940
},
{
"epoch": 1568.867924528302,
"grad_norm": 1.7805635796964523,
"learning_rate": 1.5301592846628236e-05,
"loss": 1.3189,
"step": 40960
},
{
"epoch": 1569.622641509434,
"grad_norm": 2.270882122194477,
"learning_rate": 1.5271544485384005e-05,
"loss": 1.3331,
"step": 40980
},
{
"epoch": 1570.377358490566,
"grad_norm": 1.5460374945916004,
"learning_rate": 1.524153305588211e-05,
"loss": 1.3307,
"step": 41000
},
{
"epoch": 1571.132075471698,
"grad_norm": 1.5240728933202146,
"learning_rate": 1.5211558598829046e-05,
"loss": 1.3261,
"step": 41020
},
{
"epoch": 1571.8867924528302,
"grad_norm": 1.6551356652204947,
"learning_rate": 1.518162115488113e-05,
"loss": 1.3444,
"step": 41040
},
{
"epoch": 1572.6415094339623,
"grad_norm": 2.3662389207443897,
"learning_rate": 1.5151720764644462e-05,
"loss": 1.3078,
"step": 41060
},
{
"epoch": 1573.3962264150944,
"grad_norm": 2.0201453815678336,
"learning_rate": 1.5121857468674923e-05,
"loss": 1.2931,
"step": 41080
},
{
"epoch": 1574.1509433962265,
"grad_norm": 2.1833616270471428,
"learning_rate": 1.509203130747807e-05,
"loss": 1.3113,
"step": 41100
},
{
"epoch": 1574.9056603773586,
"grad_norm": 1.6606152578025972,
"learning_rate": 1.506224232150908e-05,
"loss": 1.3488,
"step": 41120
},
{
"epoch": 1575.6603773584907,
"grad_norm": 2.2621888669728776,
"learning_rate": 1.5032490551172706e-05,
"loss": 1.293,
"step": 41140
},
{
"epoch": 1576.4150943396226,
"grad_norm": 1.8118753564672168,
"learning_rate": 1.5002776036823215e-05,
"loss": 1.3288,
"step": 41160
},
{
"epoch": 1577.1698113207547,
"grad_norm": 1.4675478833771125,
"learning_rate": 1.4973098818764368e-05,
"loss": 1.3181,
"step": 41180
},
{
"epoch": 1577.9245283018868,
"grad_norm": 1.5426718546178322,
"learning_rate": 1.4943458937249337e-05,
"loss": 1.3041,
"step": 41200
},
{
"epoch": 1578.6792452830189,
"grad_norm": 1.7915801444691424,
"learning_rate": 1.4913856432480624e-05,
"loss": 1.3244,
"step": 41220
},
{
"epoch": 1579.433962264151,
"grad_norm": 1.6284356095147676,
"learning_rate": 1.4884291344610055e-05,
"loss": 1.3623,
"step": 41240
},
{
"epoch": 1580.188679245283,
"grad_norm": 2.018808164067539,
"learning_rate": 1.4854763713738692e-05,
"loss": 1.3265,
"step": 41260
},
{
"epoch": 1580.9433962264152,
"grad_norm": 1.9982741446146173,
"learning_rate": 1.48252735799168e-05,
"loss": 1.3174,
"step": 41280
},
{
"epoch": 1581.698113207547,
"grad_norm": 1.6089408891188777,
"learning_rate": 1.4795820983143804e-05,
"loss": 1.3054,
"step": 41300
},
{
"epoch": 1582.4528301886792,
"grad_norm": 1.9322291254142352,
"learning_rate": 1.4766405963368183e-05,
"loss": 1.3288,
"step": 41320
},
{
"epoch": 1583.2075471698113,
"grad_norm": 2.1500428010731105,
"learning_rate": 1.4737028560487459e-05,
"loss": 1.3251,
"step": 41340
},
{
"epoch": 1583.9622641509434,
"grad_norm": 1.6754742169090076,
"learning_rate": 1.470768881434812e-05,
"loss": 1.3111,
"step": 41360
},
{
"epoch": 1584.7169811320755,
"grad_norm": 2.0456793462392864,
"learning_rate": 1.4678386764745604e-05,
"loss": 1.2852,
"step": 41380
},
{
"epoch": 1585.4716981132076,
"grad_norm": 1.6265186141557229,
"learning_rate": 1.4649122451424216e-05,
"loss": 1.3246,
"step": 41400
},
{
"epoch": 1586.2264150943397,
"grad_norm": 1.7034501168928484,
"learning_rate": 1.4619895914077052e-05,
"loss": 1.3061,
"step": 41420
},
{
"epoch": 1586.9811320754718,
"grad_norm": 2.5172683919046834,
"learning_rate": 1.459070719234599e-05,
"loss": 1.3287,
"step": 41440
},
{
"epoch": 1587.7358490566037,
"grad_norm": 1.681004851075849,
"learning_rate": 1.4561556325821593e-05,
"loss": 1.307,
"step": 41460
},
{
"epoch": 1588.4905660377358,
"grad_norm": 1.9545864689840218,
"learning_rate": 1.4532443354043108e-05,
"loss": 1.3015,
"step": 41480
},
{
"epoch": 1589.245283018868,
"grad_norm": 2.2238938165489186,
"learning_rate": 1.4503368316498385e-05,
"loss": 1.3323,
"step": 41500
},
{
"epoch": 1590.0,
"grad_norm": 2.0846781514374704,
"learning_rate": 1.4474331252623795e-05,
"loss": 1.3273,
"step": 41520
},
{
"epoch": 1590.754716981132,
"grad_norm": 1.677709576953602,
"learning_rate": 1.44453322018042e-05,
"loss": 1.3035,
"step": 41540
},
{
"epoch": 1591.5094339622642,
"grad_norm": 1.8283093943528037,
"learning_rate": 1.4416371203372931e-05,
"loss": 1.3261,
"step": 41560
},
{
"epoch": 1592.2641509433963,
"grad_norm": 1.5077647603013566,
"learning_rate": 1.4387448296611699e-05,
"loss": 1.3039,
"step": 41580
},
{
"epoch": 1593.0188679245282,
"grad_norm": 1.7410910545917078,
"learning_rate": 1.4358563520750539e-05,
"loss": 1.3073,
"step": 41600
},
{
"epoch": 1593.7735849056603,
"grad_norm": 1.7856680678107866,
"learning_rate": 1.4329716914967761e-05,
"loss": 1.3128,
"step": 41620
},
{
"epoch": 1594.5283018867924,
"grad_norm": 2.291591913470402,
"learning_rate": 1.4300908518389904e-05,
"loss": 1.3067,
"step": 41640
},
{
"epoch": 1595.2830188679245,
"grad_norm": 1.785529683540441,
"learning_rate": 1.42721383700917e-05,
"loss": 1.3338,
"step": 41660
},
{
"epoch": 1596.0377358490566,
"grad_norm": 1.5806566985978232,
"learning_rate": 1.4243406509096e-05,
"loss": 1.3212,
"step": 41680
},
{
"epoch": 1596.7924528301887,
"grad_norm": 1.6779162023356309,
"learning_rate": 1.4214712974373703e-05,
"loss": 1.304,
"step": 41700
},
{
"epoch": 1597.5471698113208,
"grad_norm": 1.7562533688231816,
"learning_rate": 1.418605780484373e-05,
"loss": 1.2875,
"step": 41720
},
{
"epoch": 1598.301886792453,
"grad_norm": 1.7567734296175508,
"learning_rate": 1.4157441039372966e-05,
"loss": 1.3145,
"step": 41740
},
{
"epoch": 1599.0566037735848,
"grad_norm": 1.722702892703264,
"learning_rate": 1.4128862716776218e-05,
"loss": 1.3019,
"step": 41760
},
{
"epoch": 1599.811320754717,
"grad_norm": 1.9956191020255551,
"learning_rate": 1.4100322875816148e-05,
"loss": 1.3114,
"step": 41780
},
{
"epoch": 1600.566037735849,
"grad_norm": 1.6531331340969986,
"learning_rate": 1.4071821555203213e-05,
"loss": 1.2966,
"step": 41800
},
{
"epoch": 1601.3207547169811,
"grad_norm": 2.0591826060061376,
"learning_rate": 1.4043358793595621e-05,
"loss": 1.3015,
"step": 41820
},
{
"epoch": 1602.0754716981132,
"grad_norm": 1.6984765409093496,
"learning_rate": 1.4014934629599273e-05,
"loss": 1.2995,
"step": 41840
},
{
"epoch": 1602.8301886792453,
"grad_norm": 2.2995147164644165,
"learning_rate": 1.3986549101767747e-05,
"loss": 1.3184,
"step": 41860
},
{
"epoch": 1603.5849056603774,
"grad_norm": 1.6563391450177631,
"learning_rate": 1.39582022486022e-05,
"loss": 1.3379,
"step": 41880
},
{
"epoch": 1604.3396226415093,
"grad_norm": 1.8150129139182571,
"learning_rate": 1.3929894108551327e-05,
"loss": 1.2831,
"step": 41900
},
{
"epoch": 1605.0943396226414,
"grad_norm": 1.6835080581169954,
"learning_rate": 1.390162472001131e-05,
"loss": 1.3275,
"step": 41920
},
{
"epoch": 1605.8490566037735,
"grad_norm": 1.6294691180875247,
"learning_rate": 1.3873394121325766e-05,
"loss": 1.2913,
"step": 41940
},
{
"epoch": 1606.6037735849056,
"grad_norm": 1.7180885898850626,
"learning_rate": 1.3845202350785745e-05,
"loss": 1.2965,
"step": 41960
},
{
"epoch": 1607.3584905660377,
"grad_norm": 1.5583008263797746,
"learning_rate": 1.3817049446629576e-05,
"loss": 1.2832,
"step": 41980
},
{
"epoch": 1608.1132075471698,
"grad_norm": 1.8079371040717394,
"learning_rate": 1.3788935447042895e-05,
"loss": 1.2954,
"step": 42000
},
{
"epoch": 1608.867924528302,
"grad_norm": 1.8337600854116936,
"learning_rate": 1.3760860390158554e-05,
"loss": 1.309,
"step": 42020
},
{
"epoch": 1609.622641509434,
"grad_norm": 1.6419903042017507,
"learning_rate": 1.3732824314056604e-05,
"loss": 1.3068,
"step": 42040
},
{
"epoch": 1610.377358490566,
"grad_norm": 1.948536117708095,
"learning_rate": 1.370482725676423e-05,
"loss": 1.3399,
"step": 42060
},
{
"epoch": 1611.132075471698,
"grad_norm": 1.9254279275726736,
"learning_rate": 1.3676869256255669e-05,
"loss": 1.3151,
"step": 42080
},
{
"epoch": 1611.8867924528302,
"grad_norm": 1.802137918813917,
"learning_rate": 1.3648950350452192e-05,
"loss": 1.2844,
"step": 42100
},
{
"epoch": 1612.6415094339623,
"grad_norm": 3.3181585562433358,
"learning_rate": 1.3621070577222036e-05,
"loss": 1.3125,
"step": 42120
},
{
"epoch": 1613.3962264150944,
"grad_norm": 1.6631495329844195,
"learning_rate": 1.3593229974380375e-05,
"loss": 1.2908,
"step": 42140
},
{
"epoch": 1614.1509433962265,
"grad_norm": 1.8984095064618975,
"learning_rate": 1.3565428579689256e-05,
"loss": 1.2937,
"step": 42160
},
{
"epoch": 1614.9056603773586,
"grad_norm": 1.8422792147059388,
"learning_rate": 1.3537666430857535e-05,
"loss": 1.284,
"step": 42180
},
{
"epoch": 1615.6603773584907,
"grad_norm": 1.6231527761557085,
"learning_rate": 1.3509943565540833e-05,
"loss": 1.297,
"step": 42200
},
{
"epoch": 1616.4150943396226,
"grad_norm": 1.9453735113539294,
"learning_rate": 1.3482260021341475e-05,
"loss": 1.2902,
"step": 42220
},
{
"epoch": 1617.1698113207547,
"grad_norm": 1.7641921684369601,
"learning_rate": 1.345461583580849e-05,
"loss": 1.282,
"step": 42240
},
{
"epoch": 1617.9245283018868,
"grad_norm": 2.0387885601326228,
"learning_rate": 1.3427011046437513e-05,
"loss": 1.2898,
"step": 42260
},
{
"epoch": 1618.6792452830189,
"grad_norm": 1.777342395003277,
"learning_rate": 1.3399445690670713e-05,
"loss": 1.3168,
"step": 42280
},
{
"epoch": 1619.433962264151,
"grad_norm": 1.6370450829023924,
"learning_rate": 1.33719198058968e-05,
"loss": 1.3075,
"step": 42300
},
{
"epoch": 1620.188679245283,
"grad_norm": 1.779662328060948,
"learning_rate": 1.334443342945093e-05,
"loss": 1.2919,
"step": 42320
},
{
"epoch": 1620.9433962264152,
"grad_norm": 1.7581747425039895,
"learning_rate": 1.3316986598614685e-05,
"loss": 1.3074,
"step": 42340
},
{
"epoch": 1621.698113207547,
"grad_norm": 1.8150800521846453,
"learning_rate": 1.3289579350616015e-05,
"loss": 1.2807,
"step": 42360
},
{
"epoch": 1622.4528301886792,
"grad_norm": 1.7354020247655273,
"learning_rate": 1.3262211722629166e-05,
"loss": 1.2826,
"step": 42380
},
{
"epoch": 1623.2075471698113,
"grad_norm": 1.468102174253192,
"learning_rate": 1.3234883751774644e-05,
"loss": 1.288,
"step": 42400
},
{
"epoch": 1623.9622641509434,
"grad_norm": 1.657454196093378,
"learning_rate": 1.3207595475119152e-05,
"loss": 1.3106,
"step": 42420
},
{
"epoch": 1624.7169811320755,
"grad_norm": 1.6557640300068772,
"learning_rate": 1.3180346929675611e-05,
"loss": 1.2731,
"step": 42440
},
{
"epoch": 1625.4716981132076,
"grad_norm": 1.7735353149818989,
"learning_rate": 1.3153138152402996e-05,
"loss": 1.2763,
"step": 42460
},
{
"epoch": 1626.2264150943397,
"grad_norm": 1.9382385391993158,
"learning_rate": 1.3125969180206349e-05,
"loss": 1.319,
"step": 42480
},
{
"epoch": 1626.9811320754718,
"grad_norm": 2.136894507958651,
"learning_rate": 1.3098840049936733e-05,
"loss": 1.2805,
"step": 42500
},
{
"epoch": 1627.7358490566037,
"grad_norm": 2.1483143587792366,
"learning_rate": 1.3071750798391171e-05,
"loss": 1.2853,
"step": 42520
},
{
"epoch": 1628.4905660377358,
"grad_norm": 1.710874022513826,
"learning_rate": 1.304470146231261e-05,
"loss": 1.2806,
"step": 42540
},
{
"epoch": 1629.245283018868,
"grad_norm": 1.8951457389499449,
"learning_rate": 1.3017692078389823e-05,
"loss": 1.2932,
"step": 42560
},
{
"epoch": 1630.0,
"grad_norm": 1.5984669511746095,
"learning_rate": 1.299072268325742e-05,
"loss": 1.2931,
"step": 42580
},
{
"epoch": 1630.754716981132,
"grad_norm": 1.8192427048671964,
"learning_rate": 1.2963793313495747e-05,
"loss": 1.2736,
"step": 42600
},
{
"epoch": 1631.5094339622642,
"grad_norm": 1.7556743408681688,
"learning_rate": 1.2936904005630886e-05,
"loss": 1.2844,
"step": 42620
},
{
"epoch": 1632.2641509433963,
"grad_norm": 1.9584621506348525,
"learning_rate": 1.2910054796134588e-05,
"loss": 1.2903,
"step": 42640
},
{
"epoch": 1633.0188679245282,
"grad_norm": 1.818910778704821,
"learning_rate": 1.2883245721424182e-05,
"loss": 1.2982,
"step": 42660
},
{
"epoch": 1633.7735849056603,
"grad_norm": 1.6564371207191282,
"learning_rate": 1.2856476817862578e-05,
"loss": 1.2719,
"step": 42680
},
{
"epoch": 1634.5283018867924,
"grad_norm": 2.176105338983291,
"learning_rate": 1.2829748121758186e-05,
"loss": 1.2703,
"step": 42700
},
{
"epoch": 1635.2830188679245,
"grad_norm": 2.053268530527867,
"learning_rate": 1.280305966936491e-05,
"loss": 1.2745,
"step": 42720
},
{
"epoch": 1636.0377358490566,
"grad_norm": 1.9471448193829715,
"learning_rate": 1.2776411496882053e-05,
"loss": 1.2924,
"step": 42740
},
{
"epoch": 1636.7924528301887,
"grad_norm": 1.7439666627316233,
"learning_rate": 1.2749803640454274e-05,
"loss": 1.2883,
"step": 42760
},
{
"epoch": 1637.5471698113208,
"grad_norm": 1.7616849064246298,
"learning_rate": 1.2723236136171557e-05,
"loss": 1.2901,
"step": 42780
},
{
"epoch": 1638.301886792453,
"grad_norm": 1.9785596426579124,
"learning_rate": 1.2696709020069137e-05,
"loss": 1.2806,
"step": 42800
},
{
"epoch": 1639.0566037735848,
"grad_norm": 1.8267869166287358,
"learning_rate": 1.2670222328127502e-05,
"loss": 1.2915,
"step": 42820
},
{
"epoch": 1639.811320754717,
"grad_norm": 1.8394119712662444,
"learning_rate": 1.2643776096272298e-05,
"loss": 1.2959,
"step": 42840
},
{
"epoch": 1640.566037735849,
"grad_norm": 1.5745567144671013,
"learning_rate": 1.2617370360374272e-05,
"loss": 1.2542,
"step": 42860
},
{
"epoch": 1641.3207547169811,
"grad_norm": 1.759927673486542,
"learning_rate": 1.2591005156249265e-05,
"loss": 1.2957,
"step": 42880
},
{
"epoch": 1642.0754716981132,
"grad_norm": 1.8212375700098509,
"learning_rate": 1.2564680519658124e-05,
"loss": 1.2911,
"step": 42900
},
{
"epoch": 1642.8301886792453,
"grad_norm": 1.7170892376270965,
"learning_rate": 1.2538396486306685e-05,
"loss": 1.2815,
"step": 42920
},
{
"epoch": 1643.5849056603774,
"grad_norm": 1.9707121868823774,
"learning_rate": 1.2512153091845724e-05,
"loss": 1.2817,
"step": 42940
},
{
"epoch": 1644.3396226415093,
"grad_norm": 1.6048036207691687,
"learning_rate": 1.2485950371870873e-05,
"loss": 1.2318,
"step": 42960
},
{
"epoch": 1645.0943396226414,
"grad_norm": 1.7616652416059821,
"learning_rate": 1.2459788361922582e-05,
"loss": 1.2482,
"step": 42980
},
{
"epoch": 1645.8490566037735,
"grad_norm": 1.971912319619838,
"learning_rate": 1.2433667097486137e-05,
"loss": 1.2732,
"step": 43000
},
{
"epoch": 1646.6037735849056,
"grad_norm": 1.8554872676424141,
"learning_rate": 1.2407586613991493e-05,
"loss": 1.2862,
"step": 43020
},
{
"epoch": 1647.3584905660377,
"grad_norm": 1.699680830714332,
"learning_rate": 1.2381546946813345e-05,
"loss": 1.2783,
"step": 43040
},
{
"epoch": 1648.1132075471698,
"grad_norm": 2.0246817924294174,
"learning_rate": 1.2355548131271e-05,
"loss": 1.2913,
"step": 43060
},
{
"epoch": 1648.867924528302,
"grad_norm": 1.8248553236680727,
"learning_rate": 1.2329590202628339e-05,
"loss": 1.2982,
"step": 43080
},
{
"epoch": 1649.622641509434,
"grad_norm": 1.5949265423747017,
"learning_rate": 1.2303673196093838e-05,
"loss": 1.2682,
"step": 43100
},
{
"epoch": 1650.377358490566,
"grad_norm": 1.8162411778047456,
"learning_rate": 1.2277797146820398e-05,
"loss": 1.2775,
"step": 43120
},
{
"epoch": 1651.132075471698,
"grad_norm": 1.8713447665462608,
"learning_rate": 1.225196208990544e-05,
"loss": 1.2816,
"step": 43140
},
{
"epoch": 1651.8867924528302,
"grad_norm": 1.7680462074180785,
"learning_rate": 1.2226168060390733e-05,
"loss": 1.2583,
"step": 43160
},
{
"epoch": 1652.6415094339623,
"grad_norm": 1.7924027918393708,
"learning_rate": 1.2200415093262394e-05,
"loss": 1.2631,
"step": 43180
},
{
"epoch": 1653.3962264150944,
"grad_norm": 1.8736863718504475,
"learning_rate": 1.2174703223450895e-05,
"loss": 1.2841,
"step": 43200
},
{
"epoch": 1654.1509433962265,
"grad_norm": 2.4263803178613257,
"learning_rate": 1.2149032485830917e-05,
"loss": 1.2549,
"step": 43220
},
{
"epoch": 1654.9056603773586,
"grad_norm": 2.0941904006879746,
"learning_rate": 1.212340291522137e-05,
"loss": 1.2723,
"step": 43240
},
{
"epoch": 1655.6603773584907,
"grad_norm": 1.8402209538224543,
"learning_rate": 1.2097814546385328e-05,
"loss": 1.2974,
"step": 43260
},
{
"epoch": 1656.4150943396226,
"grad_norm": 1.6993013644974577,
"learning_rate": 1.2072267414029963e-05,
"loss": 1.2513,
"step": 43280
},
{
"epoch": 1657.1698113207547,
"grad_norm": 1.7170522867791609,
"learning_rate": 1.2046761552806534e-05,
"loss": 1.2805,
"step": 43300
},
{
"epoch": 1657.9245283018868,
"grad_norm": 2.0350355441018007,
"learning_rate": 1.2021296997310335e-05,
"loss": 1.2705,
"step": 43320
},
{
"epoch": 1658.6792452830189,
"grad_norm": 2.4347287101096127,
"learning_rate": 1.1995873782080597e-05,
"loss": 1.3121,
"step": 43340
},
{
"epoch": 1659.433962264151,
"grad_norm": 2.4941517416955423,
"learning_rate": 1.1970491941600483e-05,
"loss": 1.283,
"step": 43360
},
{
"epoch": 1660.188679245283,
"grad_norm": 1.5936769777901518,
"learning_rate": 1.1945151510297077e-05,
"loss": 1.3007,
"step": 43380
},
{
"epoch": 1660.9433962264152,
"grad_norm": 1.7694343184984302,
"learning_rate": 1.191985252254125e-05,
"loss": 1.2624,
"step": 43400
},
{
"epoch": 1661.698113207547,
"grad_norm": 1.795958862054882,
"learning_rate": 1.1894595012647705e-05,
"loss": 1.2845,
"step": 43420
},
{
"epoch": 1662.4528301886792,
"grad_norm": 1.7885172471497286,
"learning_rate": 1.1869379014874838e-05,
"loss": 1.2917,
"step": 43440
},
{
"epoch": 1663.2075471698113,
"grad_norm": 2.120293693613178,
"learning_rate": 1.1844204563424761e-05,
"loss": 1.2772,
"step": 43460
},
{
"epoch": 1663.9622641509434,
"grad_norm": 1.9597227989228387,
"learning_rate": 1.1819071692443259e-05,
"loss": 1.2795,
"step": 43480
},
{
"epoch": 1664.7169811320755,
"grad_norm": 1.6400049564311938,
"learning_rate": 1.1793980436019665e-05,
"loss": 1.2698,
"step": 43500
},
{
"epoch": 1665.4716981132076,
"grad_norm": 1.8644080059944403,
"learning_rate": 1.1768930828186929e-05,
"loss": 1.2587,
"step": 43520
},
{
"epoch": 1666.2264150943397,
"grad_norm": 1.8282116731915254,
"learning_rate": 1.1743922902921463e-05,
"loss": 1.3132,
"step": 43540
},
{
"epoch": 1666.9811320754718,
"grad_norm": 1.7249443500233614,
"learning_rate": 1.1718956694143148e-05,
"loss": 1.2723,
"step": 43560
},
{
"epoch": 1667.7358490566037,
"grad_norm": 2.2697663843146665,
"learning_rate": 1.1694032235715316e-05,
"loss": 1.2568,
"step": 43580
},
{
"epoch": 1668.4905660377358,
"grad_norm": 1.530647627622187,
"learning_rate": 1.1669149561444626e-05,
"loss": 1.2717,
"step": 43600
},
{
"epoch": 1669.245283018868,
"grad_norm": 1.9077370898342472,
"learning_rate": 1.1644308705081098e-05,
"loss": 1.252,
"step": 43620
},
{
"epoch": 1670.0,
"grad_norm": 2.44039353912842,
"learning_rate": 1.1619509700318012e-05,
"loss": 1.2829,
"step": 43640
},
{
"epoch": 1670.754716981132,
"grad_norm": 1.7743292552737207,
"learning_rate": 1.159475258079188e-05,
"loss": 1.2831,
"step": 43660
},
{
"epoch": 1671.5094339622642,
"grad_norm": 1.747479187441525,
"learning_rate": 1.1570037380082422e-05,
"loss": 1.2933,
"step": 43680
},
{
"epoch": 1672.2641509433963,
"grad_norm": 1.6300209643052874,
"learning_rate": 1.154536413171247e-05,
"loss": 1.262,
"step": 43700
},
{
"epoch": 1673.0188679245282,
"grad_norm": 1.6492759638436003,
"learning_rate": 1.1520732869147992e-05,
"loss": 1.2733,
"step": 43720
},
{
"epoch": 1673.7735849056603,
"grad_norm": 1.8650633960066672,
"learning_rate": 1.149614362579798e-05,
"loss": 1.2536,
"step": 43740
},
{
"epoch": 1674.5283018867924,
"grad_norm": 2.1343348774015154,
"learning_rate": 1.1471596435014422e-05,
"loss": 1.2813,
"step": 43760
},
{
"epoch": 1675.2830188679245,
"grad_norm": 1.8893198944715273,
"learning_rate": 1.144709133009231e-05,
"loss": 1.2563,
"step": 43780
},
{
"epoch": 1676.0377358490566,
"grad_norm": 2.1120967291275416,
"learning_rate": 1.1422628344269509e-05,
"loss": 1.2821,
"step": 43800
},
{
"epoch": 1676.7924528301887,
"grad_norm": 1.9495522685303381,
"learning_rate": 1.1398207510726789e-05,
"loss": 1.2517,
"step": 43820
},
{
"epoch": 1677.5471698113208,
"grad_norm": 2.467904007991342,
"learning_rate": 1.1373828862587707e-05,
"loss": 1.2561,
"step": 43840
},
{
"epoch": 1678.301886792453,
"grad_norm": 2.0894802109018364,
"learning_rate": 1.1349492432918656e-05,
"loss": 1.2435,
"step": 43860
},
{
"epoch": 1679.0566037735848,
"grad_norm": 1.828171013483477,
"learning_rate": 1.1325198254728714e-05,
"loss": 1.2622,
"step": 43880
},
{
"epoch": 1679.811320754717,
"grad_norm": 1.801681773387057,
"learning_rate": 1.1300946360969663e-05,
"loss": 1.2558,
"step": 43900
},
{
"epoch": 1680.566037735849,
"grad_norm": 1.6735648229781173,
"learning_rate": 1.127673678453596e-05,
"loss": 1.2758,
"step": 43920
},
{
"epoch": 1681.3207547169811,
"grad_norm": 1.8439021982026453,
"learning_rate": 1.1252569558264623e-05,
"loss": 1.2711,
"step": 43940
},
{
"epoch": 1682.0754716981132,
"grad_norm": 2.1505206283933016,
"learning_rate": 1.1228444714935267e-05,
"loss": 1.2654,
"step": 43960
},
{
"epoch": 1682.8301886792453,
"grad_norm": 2.3535064036412763,
"learning_rate": 1.1204362287269989e-05,
"loss": 1.2864,
"step": 43980
},
{
"epoch": 1683.5849056603774,
"grad_norm": 1.9848669397447662,
"learning_rate": 1.1180322307933367e-05,
"loss": 1.2678,
"step": 44000
},
{
"epoch": 1684.3396226415093,
"grad_norm": 1.9913924876704123,
"learning_rate": 1.1156324809532414e-05,
"loss": 1.2676,
"step": 44020
},
{
"epoch": 1685.0943396226414,
"grad_norm": 1.7514208349630622,
"learning_rate": 1.1132369824616499e-05,
"loss": 1.2616,
"step": 44040
},
{
"epoch": 1685.8490566037735,
"grad_norm": 2.2320848012235888,
"learning_rate": 1.1108457385677357e-05,
"loss": 1.2342,
"step": 44060
},
{
"epoch": 1686.6037735849056,
"grad_norm": 1.5942465466549,
"learning_rate": 1.1084587525148977e-05,
"loss": 1.2645,
"step": 44080
},
{
"epoch": 1687.3584905660377,
"grad_norm": 1.7930621881455668,
"learning_rate": 1.1060760275407643e-05,
"loss": 1.2534,
"step": 44100
},
{
"epoch": 1688.1132075471698,
"grad_norm": 1.9526833117644506,
"learning_rate": 1.1036975668771807e-05,
"loss": 1.2609,
"step": 44120
},
{
"epoch": 1688.867924528302,
"grad_norm": 2.7437511360247084,
"learning_rate": 1.1013233737502087e-05,
"loss": 1.2343,
"step": 44140
},
{
"epoch": 1689.622641509434,
"grad_norm": 1.9223463912813794,
"learning_rate": 1.098953451380124e-05,
"loss": 1.2442,
"step": 44160
},
{
"epoch": 1690.377358490566,
"grad_norm": 1.8610095181805815,
"learning_rate": 1.0965878029814056e-05,
"loss": 1.2754,
"step": 44180
},
{
"epoch": 1691.132075471698,
"grad_norm": 1.8877853703015002,
"learning_rate": 1.0942264317627406e-05,
"loss": 1.2491,
"step": 44200
},
{
"epoch": 1691.8867924528302,
"grad_norm": 1.7714097265467896,
"learning_rate": 1.09186934092701e-05,
"loss": 1.2405,
"step": 44220
},
{
"epoch": 1692.6415094339623,
"grad_norm": 1.7637174019223203,
"learning_rate": 1.0895165336712904e-05,
"loss": 1.2829,
"step": 44240
},
{
"epoch": 1693.3962264150944,
"grad_norm": 1.7656304793121242,
"learning_rate": 1.087168013186851e-05,
"loss": 1.2702,
"step": 44260
},
{
"epoch": 1694.1509433962265,
"grad_norm": 1.7808808271916323,
"learning_rate": 1.0848237826591417e-05,
"loss": 1.2587,
"step": 44280
},
{
"epoch": 1694.9056603773586,
"grad_norm": 2.0266053428110538,
"learning_rate": 1.0824838452677987e-05,
"loss": 1.2926,
"step": 44300
},
{
"epoch": 1695.6603773584907,
"grad_norm": 1.8115058261411354,
"learning_rate": 1.0801482041866307e-05,
"loss": 1.2694,
"step": 44320
},
{
"epoch": 1696.4150943396226,
"grad_norm": 2.4945233871026526,
"learning_rate": 1.0778168625836231e-05,
"loss": 1.2699,
"step": 44340
},
{
"epoch": 1697.1698113207547,
"grad_norm": 1.6502310134668141,
"learning_rate": 1.0754898236209268e-05,
"loss": 1.2614,
"step": 44360
},
{
"epoch": 1697.9245283018868,
"grad_norm": 1.8453532789577662,
"learning_rate": 1.0731670904548564e-05,
"loss": 1.2823,
"step": 44380
},
{
"epoch": 1698.6792452830189,
"grad_norm": 1.860653653518071,
"learning_rate": 1.070848666235889e-05,
"loss": 1.2641,
"step": 44400
},
{
"epoch": 1699.433962264151,
"grad_norm": 2.5399359657927856,
"learning_rate": 1.0685345541086543e-05,
"loss": 1.2654,
"step": 44420
},
{
"epoch": 1700.188679245283,
"grad_norm": 2.0018966910355798,
"learning_rate": 1.0662247572119366e-05,
"loss": 1.2504,
"step": 44440
},
{
"epoch": 1700.9433962264152,
"grad_norm": 1.9371452956267547,
"learning_rate": 1.0639192786786632e-05,
"loss": 1.2599,
"step": 44460
},
{
"epoch": 1701.698113207547,
"grad_norm": 1.6521207456435931,
"learning_rate": 1.061618121635906e-05,
"loss": 1.2391,
"step": 44480
},
{
"epoch": 1702.4528301886792,
"grad_norm": 1.8656871033709692,
"learning_rate": 1.0593212892048769e-05,
"loss": 1.2724,
"step": 44500
},
{
"epoch": 1703.2075471698113,
"grad_norm": 1.8848695534792095,
"learning_rate": 1.0570287845009191e-05,
"loss": 1.2528,
"step": 44520
},
{
"epoch": 1703.9622641509434,
"grad_norm": 1.9187788004054305,
"learning_rate": 1.0547406106335084e-05,
"loss": 1.2518,
"step": 44540
},
{
"epoch": 1704.7169811320755,
"grad_norm": 1.6729690153958676,
"learning_rate": 1.0524567707062449e-05,
"loss": 1.2437,
"step": 44560
},
{
"epoch": 1705.4716981132076,
"grad_norm": 1.8430409315959264,
"learning_rate": 1.0501772678168493e-05,
"loss": 1.2467,
"step": 44580
},
{
"epoch": 1706.2264150943397,
"grad_norm": 1.8213698778380842,
"learning_rate": 1.0479021050571638e-05,
"loss": 1.261,
"step": 44600
},
{
"epoch": 1706.9811320754718,
"grad_norm": 1.7756575044684015,
"learning_rate": 1.0456312855131388e-05,
"loss": 1.2278,
"step": 44620
},
{
"epoch": 1707.7358490566037,
"grad_norm": 1.4850849315300283,
"learning_rate": 1.0433648122648373e-05,
"loss": 1.242,
"step": 44640
},
{
"epoch": 1708.4905660377358,
"grad_norm": 1.9352490992820244,
"learning_rate": 1.0411026883864254e-05,
"loss": 1.2507,
"step": 44660
},
{
"epoch": 1709.245283018868,
"grad_norm": 2.2842368933958634,
"learning_rate": 1.0388449169461693e-05,
"loss": 1.2614,
"step": 44680
},
{
"epoch": 1710.0,
"grad_norm": 1.716195015782983,
"learning_rate": 1.0365915010064342e-05,
"loss": 1.2467,
"step": 44700
},
{
"epoch": 1710.754716981132,
"grad_norm": 2.1393035099583524,
"learning_rate": 1.0343424436236746e-05,
"loss": 1.2697,
"step": 44720
},
{
"epoch": 1711.5094339622642,
"grad_norm": 2.0773856374828354,
"learning_rate": 1.0320977478484364e-05,
"loss": 1.2642,
"step": 44740
},
{
"epoch": 1712.2641509433963,
"grad_norm": 1.862877983575214,
"learning_rate": 1.0298574167253475e-05,
"loss": 1.2269,
"step": 44760
},
{
"epoch": 1713.0188679245282,
"grad_norm": 1.8342101414521328,
"learning_rate": 1.0276214532931146e-05,
"loss": 1.2535,
"step": 44780
},
{
"epoch": 1713.7735849056603,
"grad_norm": 1.9601396356391216,
"learning_rate": 1.0253898605845225e-05,
"loss": 1.2327,
"step": 44800
},
{
"epoch": 1714.5283018867924,
"grad_norm": 4.66393766300096,
"learning_rate": 1.0231626416264286e-05,
"loss": 1.2503,
"step": 44820
},
{
"epoch": 1715.2830188679245,
"grad_norm": 1.8180258292414466,
"learning_rate": 1.020939799439755e-05,
"loss": 1.2401,
"step": 44840
},
{
"epoch": 1716.0377358490566,
"grad_norm": 2.106671537780403,
"learning_rate": 1.0187213370394877e-05,
"loss": 1.2536,
"step": 44860
},
{
"epoch": 1716.7924528301887,
"grad_norm": 2.006353528787222,
"learning_rate": 1.016507257434674e-05,
"loss": 1.2669,
"step": 44880
},
{
"epoch": 1717.5471698113208,
"grad_norm": 1.9080849159374786,
"learning_rate": 1.0142975636284143e-05,
"loss": 1.2509,
"step": 44900
},
{
"epoch": 1718.301886792453,
"grad_norm": 1.8773707581872159,
"learning_rate": 1.0120922586178633e-05,
"loss": 1.2675,
"step": 44920
},
{
"epoch": 1719.0566037735848,
"grad_norm": 2.0076660138565647,
"learning_rate": 1.00989134539422e-05,
"loss": 1.2534,
"step": 44940
},
{
"epoch": 1719.811320754717,
"grad_norm": 2.33419651636862,
"learning_rate": 1.0076948269427267e-05,
"loss": 1.2397,
"step": 44960
},
{
"epoch": 1720.566037735849,
"grad_norm": 2.1404808355187552,
"learning_rate": 1.0055027062426677e-05,
"loss": 1.2533,
"step": 44980
},
{
"epoch": 1721.3207547169811,
"grad_norm": 1.8480400039657447,
"learning_rate": 1.003314986267358e-05,
"loss": 1.2493,
"step": 45000
},
{
"epoch": 1722.0754716981132,
"grad_norm": 1.8827968491873732,
"learning_rate": 1.0011316699841473e-05,
"loss": 1.2622,
"step": 45020
},
{
"epoch": 1722.8301886792453,
"grad_norm": 1.805703534242214,
"learning_rate": 9.989527603544106e-06,
"loss": 1.2363,
"step": 45040
},
{
"epoch": 1723.5849056603774,
"grad_norm": 1.808082360236483,
"learning_rate": 9.967782603335458e-06,
"loss": 1.2487,
"step": 45060
},
{
"epoch": 1724.3396226415093,
"grad_norm": 1.7375867158357146,
"learning_rate": 9.946081728709704e-06,
"loss": 1.2495,
"step": 45080
},
{
"epoch": 1725.0943396226414,
"grad_norm": 1.9612535567440743,
"learning_rate": 9.92442500910116e-06,
"loss": 1.229,
"step": 45100
},
{
"epoch": 1725.8490566037735,
"grad_norm": 1.7862147453258874,
"learning_rate": 9.902812473884265e-06,
"loss": 1.257,
"step": 45120
},
{
"epoch": 1726.6037735849056,
"grad_norm": 1.9883007002332853,
"learning_rate": 9.881244152373517e-06,
"loss": 1.2449,
"step": 45140
},
{
"epoch": 1727.3584905660377,
"grad_norm": 1.9502309547963228,
"learning_rate": 9.859720073823439e-06,
"loss": 1.224,
"step": 45160
},
{
"epoch": 1728.1132075471698,
"grad_norm": 1.896645829195727,
"learning_rate": 9.838240267428569e-06,
"loss": 1.2396,
"step": 45180
},
{
"epoch": 1728.867924528302,
"grad_norm": 1.629877819405046,
"learning_rate": 9.816804762323362e-06,
"loss": 1.2227,
"step": 45200
},
{
"epoch": 1729.622641509434,
"grad_norm": 1.929708983579025,
"learning_rate": 9.795413587582212e-06,
"loss": 1.2516,
"step": 45220
},
{
"epoch": 1730.377358490566,
"grad_norm": 2.0413627464070543,
"learning_rate": 9.77406677221937e-06,
"loss": 1.2514,
"step": 45240
},
{
"epoch": 1731.132075471698,
"grad_norm": 1.9912183520226578,
"learning_rate": 9.75276434518892e-06,
"loss": 1.2414,
"step": 45260
},
{
"epoch": 1731.8867924528302,
"grad_norm": 1.9880956960393557,
"learning_rate": 9.731506335384743e-06,
"loss": 1.2419,
"step": 45280
},
{
"epoch": 1732.6415094339623,
"grad_norm": 1.7890362722548563,
"learning_rate": 9.710292771640488e-06,
"loss": 1.2369,
"step": 45300
},
{
"epoch": 1733.3962264150944,
"grad_norm": 1.7651257661243038,
"learning_rate": 9.689123682729494e-06,
"loss": 1.2311,
"step": 45320
},
{
"epoch": 1734.1509433962265,
"grad_norm": 2.2714752518928596,
"learning_rate": 9.667999097364786e-06,
"loss": 1.2367,
"step": 45340
},
{
"epoch": 1734.9056603773586,
"grad_norm": 1.6713670044384341,
"learning_rate": 9.646919044199022e-06,
"loss": 1.2404,
"step": 45360
},
{
"epoch": 1735.6603773584907,
"grad_norm": 1.6792236680717407,
"learning_rate": 9.625883551824463e-06,
"loss": 1.2196,
"step": 45380
},
{
"epoch": 1736.4150943396226,
"grad_norm": 2.115412691262234,
"learning_rate": 9.604892648772943e-06,
"loss": 1.266,
"step": 45400
},
{
"epoch": 1737.1698113207547,
"grad_norm": 1.8051416403865777,
"learning_rate": 9.583946363515793e-06,
"loss": 1.2226,
"step": 45420
},
{
"epoch": 1737.9245283018868,
"grad_norm": 1.6009792370635079,
"learning_rate": 9.563044724463834e-06,
"loss": 1.2201,
"step": 45440
},
{
"epoch": 1738.6792452830189,
"grad_norm": 1.6768644470720357,
"learning_rate": 9.542187759967324e-06,
"loss": 1.2421,
"step": 45460
},
{
"epoch": 1739.433962264151,
"grad_norm": 1.8206836356027367,
"learning_rate": 9.521375498315946e-06,
"loss": 1.2329,
"step": 45480
},
{
"epoch": 1740.188679245283,
"grad_norm": 1.938456827512391,
"learning_rate": 9.500607967738736e-06,
"loss": 1.2325,
"step": 45500
},
{
"epoch": 1740.9433962264152,
"grad_norm": 2.2438430820956277,
"learning_rate": 9.47988519640406e-06,
"loss": 1.2354,
"step": 45520
},
{
"epoch": 1741.698113207547,
"grad_norm": 1.8583994537682718,
"learning_rate": 9.459207212419571e-06,
"loss": 1.235,
"step": 45540
},
{
"epoch": 1742.4528301886792,
"grad_norm": 1.917428400078255,
"learning_rate": 9.438574043832166e-06,
"loss": 1.224,
"step": 45560
},
{
"epoch": 1743.2075471698113,
"grad_norm": 1.9824261322614047,
"learning_rate": 9.417985718627978e-06,
"loss": 1.2129,
"step": 45580
},
{
"epoch": 1743.9622641509434,
"grad_norm": 1.612460364379856,
"learning_rate": 9.397442264732312e-06,
"loss": 1.2377,
"step": 45600
},
{
"epoch": 1744.7169811320755,
"grad_norm": 1.887246888937515,
"learning_rate": 9.376943710009596e-06,
"loss": 1.239,
"step": 45620
},
{
"epoch": 1745.4716981132076,
"grad_norm": 1.8662303877058588,
"learning_rate": 9.35649008226336e-06,
"loss": 1.2157,
"step": 45640
},
{
"epoch": 1746.2264150943397,
"grad_norm": 1.8071426126559238,
"learning_rate": 9.336081409236198e-06,
"loss": 1.2428,
"step": 45660
},
{
"epoch": 1746.9811320754718,
"grad_norm": 2.0833393122383828,
"learning_rate": 9.315717718609757e-06,
"loss": 1.2492,
"step": 45680
},
{
"epoch": 1747.7358490566037,
"grad_norm": 1.6715168273471837,
"learning_rate": 9.295399038004633e-06,
"loss": 1.2266,
"step": 45700
},
{
"epoch": 1748.4905660377358,
"grad_norm": 2.195357416639307,
"learning_rate": 9.275125394980386e-06,
"loss": 1.2253,
"step": 45720
},
{
"epoch": 1749.245283018868,
"grad_norm": 1.6874680664457093,
"learning_rate": 9.254896817035483e-06,
"loss": 1.2173,
"step": 45740
},
{
"epoch": 1750.0,
"grad_norm": 1.711392020263029,
"learning_rate": 9.234713331607285e-06,
"loss": 1.2454,
"step": 45760
},
{
"epoch": 1750.754716981132,
"grad_norm": 2.3706684674067713,
"learning_rate": 9.214574966071978e-06,
"loss": 1.2308,
"step": 45780
},
{
"epoch": 1751.5094339622642,
"grad_norm": 1.6753069249066932,
"learning_rate": 9.19448174774455e-06,
"loss": 1.2413,
"step": 45800
},
{
"epoch": 1752.2641509433963,
"grad_norm": 1.769095060241516,
"learning_rate": 9.174433703878748e-06,
"loss": 1.2319,
"step": 45820
},
{
"epoch": 1753.0188679245282,
"grad_norm": 2.3962310618426756,
"learning_rate": 9.154430861667043e-06,
"loss": 1.2352,
"step": 45840
},
{
"epoch": 1753.7735849056603,
"grad_norm": 1.9412455526945074,
"learning_rate": 9.134473248240613e-06,
"loss": 1.2102,
"step": 45860
},
{
"epoch": 1754.5283018867924,
"grad_norm": 1.8463733972517142,
"learning_rate": 9.114560890669284e-06,
"loss": 1.2332,
"step": 45880
},
{
"epoch": 1755.2830188679245,
"grad_norm": 2.04310291045162,
"learning_rate": 9.094693815961489e-06,
"loss": 1.214,
"step": 45900
},
{
"epoch": 1756.0377358490566,
"grad_norm": 1.6171015922091012,
"learning_rate": 9.074872051064247e-06,
"loss": 1.2699,
"step": 45920
},
{
"epoch": 1756.7924528301887,
"grad_norm": 1.9834386299608673,
"learning_rate": 9.05509562286311e-06,
"loss": 1.2278,
"step": 45940
},
{
"epoch": 1757.5471698113208,
"grad_norm": 1.821750331122801,
"learning_rate": 9.035364558182156e-06,
"loss": 1.2209,
"step": 45960
},
{
"epoch": 1758.301886792453,
"grad_norm": 2.014697149659363,
"learning_rate": 9.01567888378393e-06,
"loss": 1.2467,
"step": 45980
},
{
"epoch": 1759.0566037735848,
"grad_norm": 1.7691564521949696,
"learning_rate": 8.9960386263694e-06,
"loss": 1.2387,
"step": 46000
},
{
"epoch": 1759.811320754717,
"grad_norm": 2.3970514513874353,
"learning_rate": 8.976443812577933e-06,
"loss": 1.2356,
"step": 46020
},
{
"epoch": 1760.566037735849,
"grad_norm": 1.7866985162824316,
"learning_rate": 8.956894468987255e-06,
"loss": 1.2192,
"step": 46040
},
{
"epoch": 1761.3207547169811,
"grad_norm": 1.4793276251372218,
"learning_rate": 8.93739062211343e-06,
"loss": 1.2255,
"step": 46060
},
{
"epoch": 1762.0754716981132,
"grad_norm": 1.629080653433639,
"learning_rate": 8.917932298410821e-06,
"loss": 1.2293,
"step": 46080
},
{
"epoch": 1762.8301886792453,
"grad_norm": 1.9159436924110016,
"learning_rate": 8.898519524272015e-06,
"loss": 1.2401,
"step": 46100
},
{
"epoch": 1763.5849056603774,
"grad_norm": 2.067014892731833,
"learning_rate": 8.879152326027837e-06,
"loss": 1.2344,
"step": 46120
},
{
"epoch": 1764.3396226415093,
"grad_norm": 1.8696210113324339,
"learning_rate": 8.859830729947271e-06,
"loss": 1.2223,
"step": 46140
},
{
"epoch": 1765.0943396226414,
"grad_norm": 2.110486612271203,
"learning_rate": 8.840554762237504e-06,
"loss": 1.243,
"step": 46160
},
{
"epoch": 1765.8490566037735,
"grad_norm": 2.123761822878677,
"learning_rate": 8.821324449043775e-06,
"loss": 1.219,
"step": 46180
},
{
"epoch": 1766.6037735849056,
"grad_norm": 1.9704006034099235,
"learning_rate": 8.802139816449425e-06,
"loss": 1.2274,
"step": 46200
},
{
"epoch": 1767.3584905660377,
"grad_norm": 2.577400619765411,
"learning_rate": 8.783000890475817e-06,
"loss": 1.2215,
"step": 46220
},
{
"epoch": 1768.1132075471698,
"grad_norm": 1.9304613762583265,
"learning_rate": 8.763907697082349e-06,
"loss": 1.2278,
"step": 46240
},
{
"epoch": 1768.867924528302,
"grad_norm": 9.077022357816322,
"learning_rate": 8.744860262166374e-06,
"loss": 1.2376,
"step": 46260
},
{
"epoch": 1769.622641509434,
"grad_norm": 1.7013870498396941,
"learning_rate": 8.72585861156318e-06,
"loss": 1.2435,
"step": 46280
},
{
"epoch": 1770.377358490566,
"grad_norm": 2.2733345450497597,
"learning_rate": 8.706902771045942e-06,
"loss": 1.2491,
"step": 46300
},
{
"epoch": 1771.132075471698,
"grad_norm": 1.7197101765888114,
"learning_rate": 8.687992766325712e-06,
"loss": 1.2308,
"step": 46320
},
{
"epoch": 1771.8867924528302,
"grad_norm": 1.722161318565123,
"learning_rate": 8.669128623051374e-06,
"loss": 1.2153,
"step": 46340
},
{
"epoch": 1772.6415094339623,
"grad_norm": 1.768434935423491,
"learning_rate": 8.650310366809618e-06,
"loss": 1.231,
"step": 46360
},
{
"epoch": 1773.3962264150944,
"grad_norm": 1.754239611346281,
"learning_rate": 8.631538023124864e-06,
"loss": 1.2132,
"step": 46380
},
{
"epoch": 1774.1509433962265,
"grad_norm": 1.8552614353082573,
"learning_rate": 8.612811617459285e-06,
"loss": 1.2112,
"step": 46400
},
{
"epoch": 1774.9056603773586,
"grad_norm": 2.0773862798469467,
"learning_rate": 8.594131175212718e-06,
"loss": 1.2189,
"step": 46420
},
{
"epoch": 1775.6603773584907,
"grad_norm": 2.3280607387947905,
"learning_rate": 8.57549672172269e-06,
"loss": 1.238,
"step": 46440
},
{
"epoch": 1776.4150943396226,
"grad_norm": 2.1755774262596717,
"learning_rate": 8.556908282264332e-06,
"loss": 1.2024,
"step": 46460
},
{
"epoch": 1777.1698113207547,
"grad_norm": 1.7187738055157478,
"learning_rate": 8.538365882050364e-06,
"loss": 1.2234,
"step": 46480
},
{
"epoch": 1777.9245283018868,
"grad_norm": 3.1309450039543165,
"learning_rate": 8.51986954623106e-06,
"loss": 1.2081,
"step": 46500
},
{
"epoch": 1778.6792452830189,
"grad_norm": 2.1042177578345567,
"learning_rate": 8.501419299894205e-06,
"loss": 1.1976,
"step": 46520
},
{
"epoch": 1779.433962264151,
"grad_norm": 2.4039696113928586,
"learning_rate": 8.483015168065095e-06,
"loss": 1.2068,
"step": 46540
},
{
"epoch": 1780.188679245283,
"grad_norm": 2.0537571832378605,
"learning_rate": 8.464657175706461e-06,
"loss": 1.2143,
"step": 46560
},
{
"epoch": 1780.9433962264152,
"grad_norm": 1.9918815720142324,
"learning_rate": 8.44634534771845e-06,
"loss": 1.2019,
"step": 46580
},
{
"epoch": 1781.698113207547,
"grad_norm": 3.5070134161926214,
"learning_rate": 8.428079708938597e-06,
"loss": 1.2117,
"step": 46600
},
{
"epoch": 1782.4528301886792,
"grad_norm": 1.9332698868995186,
"learning_rate": 8.409860284141776e-06,
"loss": 1.2109,
"step": 46620
},
{
"epoch": 1783.2075471698113,
"grad_norm": 1.8649611050997916,
"learning_rate": 8.391687098040202e-06,
"loss": 1.2127,
"step": 46640
},
{
"epoch": 1783.9622641509434,
"grad_norm": 2.1126115309707276,
"learning_rate": 8.373560175283366e-06,
"loss": 1.2071,
"step": 46660
},
{
"epoch": 1784.7169811320755,
"grad_norm": 2.1198410570984145,
"learning_rate": 8.355479540457997e-06,
"loss": 1.2136,
"step": 46680
},
{
"epoch": 1785.4716981132076,
"grad_norm": 1.6900109710024558,
"learning_rate": 8.337445218088043e-06,
"loss": 1.2524,
"step": 46700
},
{
"epoch": 1786.2264150943397,
"grad_norm": 1.8630113220385771,
"learning_rate": 8.31945723263464e-06,
"loss": 1.2265,
"step": 46720
},
{
"epoch": 1786.9811320754718,
"grad_norm": 1.8874455281957463,
"learning_rate": 8.301515608496088e-06,
"loss": 1.2177,
"step": 46740
},
{
"epoch": 1787.7358490566037,
"grad_norm": 2.014600854617101,
"learning_rate": 8.283620370007777e-06,
"loss": 1.2181,
"step": 46760
},
{
"epoch": 1788.4905660377358,
"grad_norm": 2.0564703961686885,
"learning_rate": 8.2657715414422e-06,
"loss": 1.234,
"step": 46780
},
{
"epoch": 1789.245283018868,
"grad_norm": 1.7463019171504772,
"learning_rate": 8.247969147008883e-06,
"loss": 1.2357,
"step": 46800
},
{
"epoch": 1790.0,
"grad_norm": 2.0207773867855345,
"learning_rate": 8.230213210854395e-06,
"loss": 1.2148,
"step": 46820
},
{
"epoch": 1790.754716981132,
"grad_norm": 2.3337870810525168,
"learning_rate": 8.21250375706228e-06,
"loss": 1.237,
"step": 46840
},
{
"epoch": 1791.5094339622642,
"grad_norm": 2.1435617881979563,
"learning_rate": 8.194840809653027e-06,
"loss": 1.2374,
"step": 46860
},
{
"epoch": 1792.2641509433963,
"grad_norm": 1.9102469560838522,
"learning_rate": 8.177224392584056e-06,
"loss": 1.209,
"step": 46880
},
{
"epoch": 1793.0188679245282,
"grad_norm": 2.1795923550151737,
"learning_rate": 8.159654529749662e-06,
"loss": 1.2063,
"step": 46900
},
{
"epoch": 1793.7735849056603,
"grad_norm": 1.823175394536622,
"learning_rate": 8.142131244981005e-06,
"loss": 1.1934,
"step": 46920
},
{
"epoch": 1794.5283018867924,
"grad_norm": 1.8053211353930545,
"learning_rate": 8.12465456204608e-06,
"loss": 1.2198,
"step": 46940
},
{
"epoch": 1795.2830188679245,
"grad_norm": 2.2947577379489195,
"learning_rate": 8.107224504649651e-06,
"loss": 1.2309,
"step": 46960
},
{
"epoch": 1796.0377358490566,
"grad_norm": 1.8475992608945049,
"learning_rate": 8.089841096433251e-06,
"loss": 1.2087,
"step": 46980
},
{
"epoch": 1796.7924528301887,
"grad_norm": 1.8272879309025556,
"learning_rate": 8.072504360975127e-06,
"loss": 1.2136,
"step": 47000
},
{
"epoch": 1797.5471698113208,
"grad_norm": 1.8165782997861282,
"learning_rate": 8.055214321790241e-06,
"loss": 1.1889,
"step": 47020
},
{
"epoch": 1798.301886792453,
"grad_norm": 2.3340672269584726,
"learning_rate": 8.03797100233022e-06,
"loss": 1.221,
"step": 47040
},
{
"epoch": 1799.0566037735848,
"grad_norm": 2.092467100741215,
"learning_rate": 8.020774425983296e-06,
"loss": 1.2128,
"step": 47060
},
{
"epoch": 1799.811320754717,
"grad_norm": 2.3746119444632,
"learning_rate": 8.003624616074315e-06,
"loss": 1.2182,
"step": 47080
},
{
"epoch": 1800.566037735849,
"grad_norm": 1.8281656528438364,
"learning_rate": 7.9865215958647e-06,
"loss": 1.2263,
"step": 47100
},
{
"epoch": 1801.3207547169811,
"grad_norm": 1.7918154594625133,
"learning_rate": 7.969465388552383e-06,
"loss": 1.2213,
"step": 47120
},
{
"epoch": 1802.0754716981132,
"grad_norm": 1.6967922986825377,
"learning_rate": 7.95245601727184e-06,
"loss": 1.2138,
"step": 47140
},
{
"epoch": 1802.8301886792453,
"grad_norm": 2.1758444336626437,
"learning_rate": 7.935493505093988e-06,
"loss": 1.2148,
"step": 47160
},
{
"epoch": 1803.5849056603774,
"grad_norm": 2.065548344188712,
"learning_rate": 7.918577875026188e-06,
"loss": 1.225,
"step": 47180
},
{
"epoch": 1804.3396226415093,
"grad_norm": 2.285598146488397,
"learning_rate": 7.901709150012234e-06,
"loss": 1.2029,
"step": 47200
},
{
"epoch": 1805.0943396226414,
"grad_norm": 1.845588983749011,
"learning_rate": 7.884887352932272e-06,
"loss": 1.2197,
"step": 47220
},
{
"epoch": 1805.8490566037735,
"grad_norm": 2.1058361117020095,
"learning_rate": 7.868112506602826e-06,
"loss": 1.2153,
"step": 47240
},
{
"epoch": 1806.6037735849056,
"grad_norm": 2.465710936967516,
"learning_rate": 7.851384633776713e-06,
"loss": 1.228,
"step": 47260
},
{
"epoch": 1807.3584905660377,
"grad_norm": 2.5705709977723905,
"learning_rate": 7.834703757143039e-06,
"loss": 1.2098,
"step": 47280
},
{
"epoch": 1808.1132075471698,
"grad_norm": 2.2374731183447105,
"learning_rate": 7.818069899327187e-06,
"loss": 1.2129,
"step": 47300
},
{
"epoch": 1808.867924528302,
"grad_norm": 1.9504362821950096,
"learning_rate": 7.801483082890734e-06,
"loss": 1.1901,
"step": 47320
},
{
"epoch": 1809.622641509434,
"grad_norm": 2.3420693435077813,
"learning_rate": 7.784943330331486e-06,
"loss": 1.211,
"step": 47340
},
{
"epoch": 1810.377358490566,
"grad_norm": 2.031062915249881,
"learning_rate": 7.768450664083389e-06,
"loss": 1.2156,
"step": 47360
},
{
"epoch": 1811.132075471698,
"grad_norm": 1.6147973470014159,
"learning_rate": 7.752005106516516e-06,
"loss": 1.2246,
"step": 47380
},
{
"epoch": 1811.8867924528302,
"grad_norm": 3.287052917624636,
"learning_rate": 7.735606679937075e-06,
"loss": 1.2064,
"step": 47400
},
{
"epoch": 1812.6415094339623,
"grad_norm": 2.2761297016427178,
"learning_rate": 7.719255406587317e-06,
"loss": 1.212,
"step": 47420
},
{
"epoch": 1813.3962264150944,
"grad_norm": 1.7704387481824377,
"learning_rate": 7.702951308645558e-06,
"loss": 1.2085,
"step": 47440
},
{
"epoch": 1814.1509433962265,
"grad_norm": 1.8350242336586524,
"learning_rate": 7.68669440822611e-06,
"loss": 1.222,
"step": 47460
},
{
"epoch": 1814.9056603773586,
"grad_norm": 2.2387441733555202,
"learning_rate": 7.67048472737927e-06,
"loss": 1.2227,
"step": 47480
},
{
"epoch": 1815.6603773584907,
"grad_norm": 2.0331586599518863,
"learning_rate": 7.654322288091307e-06,
"loss": 1.2105,
"step": 47500
},
{
"epoch": 1816.4150943396226,
"grad_norm": 1.7302486664188137,
"learning_rate": 7.638207112284387e-06,
"loss": 1.2006,
"step": 47520
},
{
"epoch": 1817.1698113207547,
"grad_norm": 2.102196000276882,
"learning_rate": 7.622139221816588e-06,
"loss": 1.2129,
"step": 47540
},
{
"epoch": 1817.9245283018868,
"grad_norm": 2.367853919945459,
"learning_rate": 7.606118638481834e-06,
"loss": 1.2137,
"step": 47560
},
{
"epoch": 1818.6792452830189,
"grad_norm": 1.7313717857059043,
"learning_rate": 7.5901453840099084e-06,
"loss": 1.1895,
"step": 47580
},
{
"epoch": 1819.433962264151,
"grad_norm": 1.90549898399535,
"learning_rate": 7.574219480066374e-06,
"loss": 1.2056,
"step": 47600
},
{
"epoch": 1820.188679245283,
"grad_norm": 2.037261933639343,
"learning_rate": 7.55834094825259e-06,
"loss": 1.2174,
"step": 47620
},
{
"epoch": 1820.9433962264152,
"grad_norm": 2.106635441636325,
"learning_rate": 7.542509810105648e-06,
"loss": 1.1982,
"step": 47640
},
{
"epoch": 1821.698113207547,
"grad_norm": 2.092038104009338,
"learning_rate": 7.526726087098354e-06,
"loss": 1.2218,
"step": 47660
},
{
"epoch": 1822.4528301886792,
"grad_norm": 1.997516919579926,
"learning_rate": 7.51098980063922e-06,
"loss": 1.2219,
"step": 47680
},
{
"epoch": 1823.2075471698113,
"grad_norm": 1.8136562199600643,
"learning_rate": 7.49530097207239e-06,
"loss": 1.1796,
"step": 47700
},
{
"epoch": 1823.9622641509434,
"grad_norm": 2.5616204147227934,
"learning_rate": 7.47965962267767e-06,
"loss": 1.1939,
"step": 47720
},
{
"epoch": 1824.7169811320755,
"grad_norm": 2.1387597203680815,
"learning_rate": 7.464065773670437e-06,
"loss": 1.1602,
"step": 47740
},
{
"epoch": 1825.4716981132076,
"grad_norm": 1.7803739365612326,
"learning_rate": 7.448519446201648e-06,
"loss": 1.2392,
"step": 47760
},
{
"epoch": 1826.2264150943397,
"grad_norm": 1.8173426913493826,
"learning_rate": 7.433020661357822e-06,
"loss": 1.1921,
"step": 47780
},
{
"epoch": 1826.9811320754718,
"grad_norm": 1.7763793595591069,
"learning_rate": 7.417569440160968e-06,
"loss": 1.2139,
"step": 47800
},
{
"epoch": 1827.7358490566037,
"grad_norm": 2.151890225358,
"learning_rate": 7.402165803568603e-06,
"loss": 1.1918,
"step": 47820
},
{
"epoch": 1828.4905660377358,
"grad_norm": 2.241696527607786,
"learning_rate": 7.386809772473682e-06,
"loss": 1.199,
"step": 47840
},
{
"epoch": 1829.245283018868,
"grad_norm": 1.904140122730207,
"learning_rate": 7.371501367704594e-06,
"loss": 1.175,
"step": 47860
},
{
"epoch": 1830.0,
"grad_norm": 2.2057960272933035,
"learning_rate": 7.356240610025147e-06,
"loss": 1.2026,
"step": 47880
},
{
"epoch": 1830.754716981132,
"grad_norm": 1.992268245473379,
"learning_rate": 7.341027520134496e-06,
"loss": 1.2226,
"step": 47900
},
{
"epoch": 1831.5094339622642,
"grad_norm": 1.8558557959544568,
"learning_rate": 7.325862118667166e-06,
"loss": 1.1879,
"step": 47920
},
{
"epoch": 1832.2641509433963,
"grad_norm": 2.649917575243484,
"learning_rate": 7.3107444261929805e-06,
"loss": 1.2128,
"step": 47940
},
{
"epoch": 1833.0188679245282,
"grad_norm": 1.838198084617481,
"learning_rate": 7.295674463217053e-06,
"loss": 1.1932,
"step": 47960
},
{
"epoch": 1833.7735849056603,
"grad_norm": 1.6953634210974582,
"learning_rate": 7.280652250179774e-06,
"loss": 1.1964,
"step": 47980
},
{
"epoch": 1834.5283018867924,
"grad_norm": 1.6312994813012875,
"learning_rate": 7.26567780745675e-06,
"loss": 1.1941,
"step": 48000
},
{
"epoch": 1835.2830188679245,
"grad_norm": 2.0702401300059528,
"learning_rate": 7.250751155358808e-06,
"loss": 1.2005,
"step": 48020
},
{
"epoch": 1836.0377358490566,
"grad_norm": 2.2642885375841395,
"learning_rate": 7.2358723141319396e-06,
"loss": 1.1894,
"step": 48040
},
{
"epoch": 1836.7924528301887,
"grad_norm": 1.9527405946827057,
"learning_rate": 7.2210413039573e-06,
"loss": 1.182,
"step": 48060
},
{
"epoch": 1837.5471698113208,
"grad_norm": 2.3498816913200984,
"learning_rate": 7.206258144951163e-06,
"loss": 1.1913,
"step": 48080
},
{
"epoch": 1838.301886792453,
"grad_norm": 2.123796744980879,
"learning_rate": 7.1915228571648876e-06,
"loss": 1.2076,
"step": 48100
},
{
"epoch": 1839.0566037735848,
"grad_norm": 1.7252206439090503,
"learning_rate": 7.176835460584927e-06,
"loss": 1.1861,
"step": 48120
},
{
"epoch": 1839.811320754717,
"grad_norm": 1.8734365315429182,
"learning_rate": 7.162195975132747e-06,
"loss": 1.1826,
"step": 48140
},
{
"epoch": 1840.566037735849,
"grad_norm": 2.316186234026582,
"learning_rate": 7.147604420664858e-06,
"loss": 1.177,
"step": 48160
},
{
"epoch": 1841.3207547169811,
"grad_norm": 1.788911685930357,
"learning_rate": 7.133060816972735e-06,
"loss": 1.1844,
"step": 48180
},
{
"epoch": 1842.0754716981132,
"grad_norm": 1.9701957941688446,
"learning_rate": 7.118565183782816e-06,
"loss": 1.211,
"step": 48200
},
{
"epoch": 1842.8301886792453,
"grad_norm": 1.721205749217039,
"learning_rate": 7.104117540756494e-06,
"loss": 1.2045,
"step": 48220
},
{
"epoch": 1843.5849056603774,
"grad_norm": 1.73943762799037,
"learning_rate": 7.089717907490048e-06,
"loss": 1.2005,
"step": 48240
},
{
"epoch": 1844.3396226415093,
"grad_norm": 2.2562973087741587,
"learning_rate": 7.07536630351465e-06,
"loss": 1.21,
"step": 48260
},
{
"epoch": 1845.0943396226414,
"grad_norm": 2.10020840598067,
"learning_rate": 7.061062748296323e-06,
"loss": 1.191,
"step": 48280
},
{
"epoch": 1845.8490566037735,
"grad_norm": 1.8070339824697725,
"learning_rate": 7.0468072612359105e-06,
"loss": 1.193,
"step": 48300
},
{
"epoch": 1846.6037735849056,
"grad_norm": 1.997808139499102,
"learning_rate": 7.032599861669077e-06,
"loss": 1.2014,
"step": 48320
},
{
"epoch": 1847.3584905660377,
"grad_norm": 2.0691188983956277,
"learning_rate": 7.018440568866245e-06,
"loss": 1.1966,
"step": 48340
},
{
"epoch": 1848.1132075471698,
"grad_norm": 2.403979236635362,
"learning_rate": 7.004329402032594e-06,
"loss": 1.1782,
"step": 48360
},
{
"epoch": 1848.867924528302,
"grad_norm": 2.6004080171234385,
"learning_rate": 6.9902663803080305e-06,
"loss": 1.1804,
"step": 48380
},
{
"epoch": 1849.622641509434,
"grad_norm": 1.9209297506929766,
"learning_rate": 6.976251522767146e-06,
"loss": 1.1743,
"step": 48400
},
{
"epoch": 1850.377358490566,
"grad_norm": 1.870852128682101,
"learning_rate": 6.962284848419221e-06,
"loss": 1.1968,
"step": 48420
},
{
"epoch": 1851.132075471698,
"grad_norm": 2.335271083007723,
"learning_rate": 6.948366376208161e-06,
"loss": 1.1848,
"step": 48440
},
{
"epoch": 1851.8867924528302,
"grad_norm": 1.6640538101181919,
"learning_rate": 6.93449612501252e-06,
"loss": 1.201,
"step": 48460
},
{
"epoch": 1852.6415094339623,
"grad_norm": 1.8790103798312214,
"learning_rate": 6.920674113645418e-06,
"loss": 1.167,
"step": 48480
},
{
"epoch": 1853.3962264150944,
"grad_norm": 1.7578289543420753,
"learning_rate": 6.906900360854565e-06,
"loss": 1.2007,
"step": 48500
},
{
"epoch": 1854.1509433962265,
"grad_norm": 1.8615274702658844,
"learning_rate": 6.893174885322198e-06,
"loss": 1.2056,
"step": 48520
},
{
"epoch": 1854.9056603773586,
"grad_norm": 2.3236275376684143,
"learning_rate": 6.879497705665089e-06,
"loss": 1.1716,
"step": 48540
},
{
"epoch": 1855.6603773584907,
"grad_norm": 2.1963583105378213,
"learning_rate": 6.865868840434493e-06,
"loss": 1.1769,
"step": 48560
},
{
"epoch": 1856.4150943396226,
"grad_norm": 2.129799150022101,
"learning_rate": 6.852288308116133e-06,
"loss": 1.1861,
"step": 48580
},
{
"epoch": 1857.1698113207547,
"grad_norm": 1.978798574769679,
"learning_rate": 6.8387561271301765e-06,
"loss": 1.1971,
"step": 48600
},
{
"epoch": 1857.9245283018868,
"grad_norm": 2.2936585531474596,
"learning_rate": 6.8252723158312055e-06,
"loss": 1.1911,
"step": 48620
},
{
"epoch": 1858.6792452830189,
"grad_norm": 1.860277938482895,
"learning_rate": 6.81183689250821e-06,
"loss": 1.1566,
"step": 48640
},
{
"epoch": 1859.433962264151,
"grad_norm": 2.017669140863562,
"learning_rate": 6.79844987538453e-06,
"loss": 1.1728,
"step": 48660
},
{
"epoch": 1860.188679245283,
"grad_norm": 2.098145579199566,
"learning_rate": 6.785111282617849e-06,
"loss": 1.1934,
"step": 48680
},
{
"epoch": 1860.9433962264152,
"grad_norm": 1.835823989245946,
"learning_rate": 6.771821132300191e-06,
"loss": 1.1621,
"step": 48700
},
{
"epoch": 1861.698113207547,
"grad_norm": 1.9612631399268534,
"learning_rate": 6.7585794424578464e-06,
"loss": 1.1911,
"step": 48720
},
{
"epoch": 1862.4528301886792,
"grad_norm": 2.0755400141270464,
"learning_rate": 6.745386231051399e-06,
"loss": 1.1804,
"step": 48740
},
{
"epoch": 1863.2075471698113,
"grad_norm": 2.2075989618364984,
"learning_rate": 6.732241515975663e-06,
"loss": 1.1933,
"step": 48760
},
{
"epoch": 1863.9622641509434,
"grad_norm": 1.9214092744343696,
"learning_rate": 6.719145315059678e-06,
"loss": 1.1913,
"step": 48780
},
{
"epoch": 1864.7169811320755,
"grad_norm": 2.3557809190891703,
"learning_rate": 6.7060976460666846e-06,
"loss": 1.1905,
"step": 48800
},
{
"epoch": 1865.4716981132076,
"grad_norm": 2.114305919520162,
"learning_rate": 6.693098526694083e-06,
"loss": 1.2047,
"step": 48820
},
{
"epoch": 1866.2264150943397,
"grad_norm": 1.8242775313878226,
"learning_rate": 6.680147974573452e-06,
"loss": 1.1933,
"step": 48840
},
{
"epoch": 1866.9811320754718,
"grad_norm": 2.1056639763813956,
"learning_rate": 6.66724600727046e-06,
"loss": 1.1808,
"step": 48860
},
{
"epoch": 1867.7358490566037,
"grad_norm": 1.7165725449830957,
"learning_rate": 6.654392642284892e-06,
"loss": 1.1782,
"step": 48880
},
{
"epoch": 1868.4905660377358,
"grad_norm": 1.7341902387718784,
"learning_rate": 6.6415878970506175e-06,
"loss": 1.179,
"step": 48900
},
{
"epoch": 1869.245283018868,
"grad_norm": 1.7667425869444906,
"learning_rate": 6.6288317889355535e-06,
"loss": 1.1754,
"step": 48920
},
{
"epoch": 1870.0,
"grad_norm": 2.2994252135110655,
"learning_rate": 6.616124335241648e-06,
"loss": 1.1992,
"step": 48940
},
{
"epoch": 1870.754716981132,
"grad_norm": 1.7046658246235185,
"learning_rate": 6.603465553204852e-06,
"loss": 1.1811,
"step": 48960
},
{
"epoch": 1871.5094339622642,
"grad_norm": 2.1898673540015428,
"learning_rate": 6.5908554599951e-06,
"loss": 1.178,
"step": 48980
},
{
"epoch": 1872.2641509433963,
"grad_norm": 1.9804369829197095,
"learning_rate": 6.578294072716292e-06,
"loss": 1.1989,
"step": 49000
},
{
"epoch": 1873.0188679245282,
"grad_norm": 2.065726596455928,
"learning_rate": 6.565781408406267e-06,
"loss": 1.1931,
"step": 49020
},
{
"epoch": 1873.7735849056603,
"grad_norm": 2.2467788121970123,
"learning_rate": 6.553317484036772e-06,
"loss": 1.2074,
"step": 49040
},
{
"epoch": 1874.5283018867924,
"grad_norm": 1.991691603079823,
"learning_rate": 6.5409023165134424e-06,
"loss": 1.1983,
"step": 49060
},
{
"epoch": 1875.2830188679245,
"grad_norm": 2.106299625577455,
"learning_rate": 6.528535922675781e-06,
"loss": 1.1956,
"step": 49080
},
{
"epoch": 1876.0377358490566,
"grad_norm": 2.037693251120139,
"learning_rate": 6.516218319297147e-06,
"loss": 1.185,
"step": 49100
},
{
"epoch": 1876.7924528301887,
"grad_norm": 2.3718612692091763,
"learning_rate": 6.503949523084718e-06,
"loss": 1.1859,
"step": 49120
},
{
"epoch": 1877.5471698113208,
"grad_norm": 1.9858435056818156,
"learning_rate": 6.491729550679461e-06,
"loss": 1.2076,
"step": 49140
},
{
"epoch": 1878.301886792453,
"grad_norm": 2.147814028235424,
"learning_rate": 6.479558418656134e-06,
"loss": 1.1682,
"step": 49160
},
{
"epoch": 1879.0566037735848,
"grad_norm": 1.7628164718106505,
"learning_rate": 6.467436143523228e-06,
"loss": 1.1791,
"step": 49180
},
{
"epoch": 1879.811320754717,
"grad_norm": 1.9837896355936764,
"learning_rate": 6.455362741722995e-06,
"loss": 1.1977,
"step": 49200
},
{
"epoch": 1880.566037735849,
"grad_norm": 2.0613808893064327,
"learning_rate": 6.44333822963138e-06,
"loss": 1.1738,
"step": 49220
},
{
"epoch": 1881.3207547169811,
"grad_norm": 1.6736931908615154,
"learning_rate": 6.431362623558018e-06,
"loss": 1.1774,
"step": 49240
},
{
"epoch": 1882.0754716981132,
"grad_norm": 1.691911714014794,
"learning_rate": 6.4194359397462055e-06,
"loss": 1.1666,
"step": 49260
},
{
"epoch": 1882.8301886792453,
"grad_norm": 2.1345911027894138,
"learning_rate": 6.4075581943728944e-06,
"loss": 1.1973,
"step": 49280
},
{
"epoch": 1883.5849056603774,
"grad_norm": 1.9512349129787812,
"learning_rate": 6.395729403548645e-06,
"loss": 1.1672,
"step": 49300
},
{
"epoch": 1884.3396226415093,
"grad_norm": 2.6451924153676125,
"learning_rate": 6.383949583317629e-06,
"loss": 1.1695,
"step": 49320
},
{
"epoch": 1885.0943396226414,
"grad_norm": 1.783294063259621,
"learning_rate": 6.372218749657584e-06,
"loss": 1.1648,
"step": 49340
},
{
"epoch": 1885.8490566037735,
"grad_norm": 2.3799777683561967,
"learning_rate": 6.360536918479806e-06,
"loss": 1.1776,
"step": 49360
},
{
"epoch": 1886.6037735849056,
"grad_norm": 2.0124780882138347,
"learning_rate": 6.348904105629139e-06,
"loss": 1.1884,
"step": 49380
},
{
"epoch": 1887.3584905660377,
"grad_norm": 1.8426672524927896,
"learning_rate": 6.3373203268839345e-06,
"loss": 1.1842,
"step": 49400
},
{
"epoch": 1888.1132075471698,
"grad_norm": 2.0963675882931274,
"learning_rate": 6.325785597956021e-06,
"loss": 1.1807,
"step": 49420
},
{
"epoch": 1888.867924528302,
"grad_norm": 1.9137633109249375,
"learning_rate": 6.314299934490717e-06,
"loss": 1.1932,
"step": 49440
},
{
"epoch": 1889.622641509434,
"grad_norm": 2.2927620018796033,
"learning_rate": 6.3028633520667744e-06,
"loss": 1.186,
"step": 49460
},
{
"epoch": 1890.377358490566,
"grad_norm": 2.7433533031518182,
"learning_rate": 6.291475866196384e-06,
"loss": 1.1363,
"step": 49480
},
{
"epoch": 1891.132075471698,
"grad_norm": 1.563437023715403,
"learning_rate": 6.280137492325147e-06,
"loss": 1.2093,
"step": 49500
},
{
"epoch": 1891.8867924528302,
"grad_norm": 2.793822111662886,
"learning_rate": 6.2688482458320434e-06,
"loss": 1.1751,
"step": 49520
},
{
"epoch": 1892.6415094339623,
"grad_norm": 1.8418670948197584,
"learning_rate": 6.25760814202941e-06,
"loss": 1.1658,
"step": 49540
},
{
"epoch": 1893.3962264150944,
"grad_norm": 1.8332744098429328,
"learning_rate": 6.246417196162944e-06,
"loss": 1.1654,
"step": 49560
},
{
"epoch": 1894.1509433962265,
"grad_norm": 1.7817661421186255,
"learning_rate": 6.235275423411659e-06,
"loss": 1.1764,
"step": 49580
},
{
"epoch": 1894.9056603773586,
"grad_norm": 1.9495189221186473,
"learning_rate": 6.224182838887876e-06,
"loss": 1.1529,
"step": 49600
},
{
"epoch": 1895.6603773584907,
"grad_norm": 2.4039058315851447,
"learning_rate": 6.213139457637196e-06,
"loss": 1.1747,
"step": 49620
},
{
"epoch": 1896.4150943396226,
"grad_norm": 1.875771973172552,
"learning_rate": 6.202145294638478e-06,
"loss": 1.1821,
"step": 49640
},
{
"epoch": 1897.1698113207547,
"grad_norm": 2.1999372490425393,
"learning_rate": 6.191200364803824e-06,
"loss": 1.1813,
"step": 49660
},
{
"epoch": 1897.9245283018868,
"grad_norm": 11.988202176475387,
"learning_rate": 6.180304682978568e-06,
"loss": 1.1569,
"step": 49680
},
{
"epoch": 1898.6792452830189,
"grad_norm": 1.8662829336756046,
"learning_rate": 6.169458263941242e-06,
"loss": 1.1816,
"step": 49700
},
{
"epoch": 1899.433962264151,
"grad_norm": 2.3098966440534294,
"learning_rate": 6.158661122403553e-06,
"loss": 1.1581,
"step": 49720
},
{
"epoch": 1900.188679245283,
"grad_norm": 2.332658522584547,
"learning_rate": 6.1479132730103704e-06,
"loss": 1.1946,
"step": 49740
},
{
"epoch": 1900.9433962264152,
"grad_norm": 1.7105735490477962,
"learning_rate": 6.137214730339707e-06,
"loss": 1.1868,
"step": 49760
},
{
"epoch": 1901.698113207547,
"grad_norm": 2.243808666742797,
"learning_rate": 6.126565508902698e-06,
"loss": 1.1599,
"step": 49780
},
{
"epoch": 1902.4528301886792,
"grad_norm": 1.8783312097697262,
"learning_rate": 6.115965623143589e-06,
"loss": 1.1621,
"step": 49800
},
{
"epoch": 1903.2075471698113,
"grad_norm": 1.7166955372139616,
"learning_rate": 6.105415087439699e-06,
"loss": 1.1862,
"step": 49820
},
{
"epoch": 1903.9622641509434,
"grad_norm": 2.0340234917391524,
"learning_rate": 6.094913916101413e-06,
"loss": 1.1561,
"step": 49840
},
{
"epoch": 1904.7169811320755,
"grad_norm": 1.8220266868042787,
"learning_rate": 6.084462123372144e-06,
"loss": 1.1749,
"step": 49860
},
{
"epoch": 1905.4716981132076,
"grad_norm": 2.3373227334868973,
"learning_rate": 6.07405972342837e-06,
"loss": 1.2081,
"step": 49880
},
{
"epoch": 1906.2264150943397,
"grad_norm": 2.31770817514565,
"learning_rate": 6.063706730379534e-06,
"loss": 1.1705,
"step": 49900
},
{
"epoch": 1906.9811320754718,
"grad_norm": 1.749701769225866,
"learning_rate": 6.053403158268086e-06,
"loss": 1.1732,
"step": 49920
},
{
"epoch": 1907.7358490566037,
"grad_norm": 2.0136702275524736,
"learning_rate": 6.043149021069432e-06,
"loss": 1.1789,
"step": 49940
},
{
"epoch": 1908.4905660377358,
"grad_norm": 1.8991267563990468,
"learning_rate": 6.032944332691932e-06,
"loss": 1.1691,
"step": 49960
},
{
"epoch": 1909.245283018868,
"grad_norm": 2.178284333271757,
"learning_rate": 6.02278910697688e-06,
"loss": 1.1698,
"step": 49980
},
{
"epoch": 1910.0,
"grad_norm": 2.270879262505861,
"learning_rate": 6.012683357698476e-06,
"loss": 1.1424,
"step": 50000
},
{
"epoch": 1910.754716981132,
"grad_norm": 1.6800724855002753,
"learning_rate": 6.0026270985638094e-06,
"loss": 1.1405,
"step": 50020
},
{
"epoch": 1911.5094339622642,
"grad_norm": 1.9982510142589247,
"learning_rate": 5.9926203432128405e-06,
"loss": 1.1811,
"step": 50040
},
{
"epoch": 1912.2641509433963,
"grad_norm": 2.072677046394058,
"learning_rate": 5.98266310521839e-06,
"loss": 1.1832,
"step": 50060
},
{
"epoch": 1913.0188679245282,
"grad_norm": 1.8518177038658126,
"learning_rate": 5.972755398086119e-06,
"loss": 1.1768,
"step": 50080
},
{
"epoch": 1913.7735849056603,
"grad_norm": 1.7233232781661019,
"learning_rate": 5.9628972352545016e-06,
"loss": 1.1916,
"step": 50100
},
{
"epoch": 1914.5283018867924,
"grad_norm": 2.2438247684764776,
"learning_rate": 5.953088630094804e-06,
"loss": 1.1965,
"step": 50120
},
{
"epoch": 1915.2830188679245,
"grad_norm": 2.354329582753457,
"learning_rate": 5.943329595911085e-06,
"loss": 1.1657,
"step": 50140
},
{
"epoch": 1916.0377358490566,
"grad_norm": 2.0821470705714558,
"learning_rate": 5.933620145940163e-06,
"loss": 1.1733,
"step": 50160
},
{
"epoch": 1916.7924528301887,
"grad_norm": 2.3851614247004513,
"learning_rate": 5.92396029335161e-06,
"loss": 1.1973,
"step": 50180
},
{
"epoch": 1917.5471698113208,
"grad_norm": 2.165021041548156,
"learning_rate": 5.91435005124771e-06,
"loss": 1.1605,
"step": 50200
},
{
"epoch": 1918.301886792453,
"grad_norm": 1.8316074131304803,
"learning_rate": 5.904789432663471e-06,
"loss": 1.175,
"step": 50220
},
{
"epoch": 1919.0566037735848,
"grad_norm": 2.2444762984325517,
"learning_rate": 5.8952784505665775e-06,
"loss": 1.1546,
"step": 50240
},
{
"epoch": 1919.811320754717,
"grad_norm": 1.990381226210719,
"learning_rate": 5.885817117857409e-06,
"loss": 1.1734,
"step": 50260
},
{
"epoch": 1920.566037735849,
"grad_norm": 2.1012346230151935,
"learning_rate": 5.876405447368989e-06,
"loss": 1.1726,
"step": 50280
},
{
"epoch": 1921.3207547169811,
"grad_norm": 2.036740240000707,
"learning_rate": 5.867043451866989e-06,
"loss": 1.1858,
"step": 50300
},
{
"epoch": 1922.0754716981132,
"grad_norm": 1.9828813541843844,
"learning_rate": 5.85773114404969e-06,
"loss": 1.1523,
"step": 50320
},
{
"epoch": 1922.8301886792453,
"grad_norm": 2.1278126328460196,
"learning_rate": 5.848468536547991e-06,
"loss": 1.1886,
"step": 50340
},
{
"epoch": 1923.5849056603774,
"grad_norm": 2.315788726027488,
"learning_rate": 5.8392556419253755e-06,
"loss": 1.1686,
"step": 50360
},
{
"epoch": 1924.3396226415093,
"grad_norm": 2.3735624423680117,
"learning_rate": 5.830092472677899e-06,
"loss": 1.1584,
"step": 50380
},
{
"epoch": 1925.0943396226414,
"grad_norm": 2.3872016424634093,
"learning_rate": 5.820979041234169e-06,
"loss": 1.1859,
"step": 50400
},
{
"epoch": 1925.8490566037735,
"grad_norm": 2.0316120352053115,
"learning_rate": 5.811915359955322e-06,
"loss": 1.1578,
"step": 50420
},
{
"epoch": 1926.6037735849056,
"grad_norm": 1.9955741026809004,
"learning_rate": 5.8029014411350336e-06,
"loss": 1.1699,
"step": 50440
},
{
"epoch": 1927.3584905660377,
"grad_norm": 2.671513853147586,
"learning_rate": 5.793937296999476e-06,
"loss": 1.1613,
"step": 50460
},
{
"epoch": 1928.1132075471698,
"grad_norm": 2.085910842457962,
"learning_rate": 5.785022939707302e-06,
"loss": 1.1919,
"step": 50480
},
{
"epoch": 1928.867924528302,
"grad_norm": 2.15354309947986,
"learning_rate": 5.77615838134964e-06,
"loss": 1.1766,
"step": 50500
},
{
"epoch": 1929.622641509434,
"grad_norm": 2.5693650339132463,
"learning_rate": 5.76734363395007e-06,
"loss": 1.175,
"step": 50520
},
{
"epoch": 1930.377358490566,
"grad_norm": 3.2479427163076533,
"learning_rate": 5.7585787094646196e-06,
"loss": 1.1703,
"step": 50540
},
{
"epoch": 1931.132075471698,
"grad_norm": 2.1161416369904695,
"learning_rate": 5.749863619781723e-06,
"loss": 1.1657,
"step": 50560
},
{
"epoch": 1931.8867924528302,
"grad_norm": 1.7714808950845444,
"learning_rate": 5.7411983767222415e-06,
"loss": 1.1717,
"step": 50580
},
{
"epoch": 1932.6415094339623,
"grad_norm": 2.535706381586084,
"learning_rate": 5.732582992039398e-06,
"loss": 1.1553,
"step": 50600
},
{
"epoch": 1933.3962264150944,
"grad_norm": 2.1037009756450527,
"learning_rate": 5.724017477418814e-06,
"loss": 1.1771,
"step": 50620
},
{
"epoch": 1934.1509433962265,
"grad_norm": 1.849746935628885,
"learning_rate": 5.7155018444784526e-06,
"loss": 1.1422,
"step": 50640
},
{
"epoch": 1934.9056603773586,
"grad_norm": 1.9210522037566025,
"learning_rate": 5.707036104768635e-06,
"loss": 1.1756,
"step": 50660
},
{
"epoch": 1935.6603773584907,
"grad_norm": 1.819206038769788,
"learning_rate": 5.698620269771997e-06,
"loss": 1.1916,
"step": 50680
},
{
"epoch": 1936.4150943396226,
"grad_norm": 2.5377205844625417,
"learning_rate": 5.690254350903488e-06,
"loss": 1.1619,
"step": 50700
},
{
"epoch": 1937.1698113207547,
"grad_norm": 2.2007521096063902,
"learning_rate": 5.681938359510347e-06,
"loss": 1.1846,
"step": 50720
},
{
"epoch": 1937.9245283018868,
"grad_norm": 2.192606880082283,
"learning_rate": 5.673672306872103e-06,
"loss": 1.1699,
"step": 50740
},
{
"epoch": 1938.6792452830189,
"grad_norm": 2.1766069540448436,
"learning_rate": 5.665456204200552e-06,
"loss": 1.1871,
"step": 50760
},
{
"epoch": 1939.433962264151,
"grad_norm": 1.9751649291014899,
"learning_rate": 5.657290062639727e-06,
"loss": 1.1474,
"step": 50780
},
{
"epoch": 1940.188679245283,
"grad_norm": 1.890603847246591,
"learning_rate": 5.6491738932659e-06,
"loss": 1.1559,
"step": 50800
},
{
"epoch": 1940.9433962264152,
"grad_norm": 1.975301210235016,
"learning_rate": 5.641107707087573e-06,
"loss": 1.1521,
"step": 50820
},
{
"epoch": 1941.698113207547,
"grad_norm": 1.8441779800267277,
"learning_rate": 5.6330915150454375e-06,
"loss": 1.145,
"step": 50840
},
{
"epoch": 1942.4528301886792,
"grad_norm": 2.4451642203033064,
"learning_rate": 5.625125328012387e-06,
"loss": 1.1791,
"step": 50860
},
{
"epoch": 1943.2075471698113,
"grad_norm": 2.127622782788785,
"learning_rate": 5.617209156793476e-06,
"loss": 1.1471,
"step": 50880
},
{
"epoch": 1943.9622641509434,
"grad_norm": 2.0779587981444427,
"learning_rate": 5.609343012125934e-06,
"loss": 1.1537,
"step": 50900
},
{
"epoch": 1944.7169811320755,
"grad_norm": 2.1654459900473872,
"learning_rate": 5.601526904679125e-06,
"loss": 1.1609,
"step": 50920
},
{
"epoch": 1945.4716981132076,
"grad_norm": 1.8696254811746238,
"learning_rate": 5.593760845054552e-06,
"loss": 1.1523,
"step": 50940
},
{
"epoch": 1946.2264150943397,
"grad_norm": 1.7856722997496786,
"learning_rate": 5.586044843785832e-06,
"loss": 1.2012,
"step": 50960
},
{
"epoch": 1946.9811320754718,
"grad_norm": 1.9401895010628936,
"learning_rate": 5.578378911338684e-06,
"loss": 1.1384,
"step": 50980
},
{
"epoch": 1947.7358490566037,
"grad_norm": 2.1643993698581077,
"learning_rate": 5.570763058110911e-06,
"loss": 1.1645,
"step": 51000
},
{
"epoch": 1948.4905660377358,
"grad_norm": 2.180473463448981,
"learning_rate": 5.563197294432395e-06,
"loss": 1.1382,
"step": 51020
},
{
"epoch": 1949.245283018868,
"grad_norm": 1.9596617879790443,
"learning_rate": 5.555681630565088e-06,
"loss": 1.1539,
"step": 51040
},
{
"epoch": 1950.0,
"grad_norm": 1.8975845056567062,
"learning_rate": 5.548216076702974e-06,
"loss": 1.144,
"step": 51060
},
{
"epoch": 1950.754716981132,
"grad_norm": 2.188450696803476,
"learning_rate": 5.540800642972071e-06,
"loss": 1.1532,
"step": 51080
},
{
"epoch": 1951.5094339622642,
"grad_norm": 1.8285975742024299,
"learning_rate": 5.533435339430416e-06,
"loss": 1.1949,
"step": 51100
},
{
"epoch": 1952.2641509433963,
"grad_norm": 1.812418268110745,
"learning_rate": 5.526120176068055e-06,
"loss": 1.1613,
"step": 51120
},
{
"epoch": 1953.0188679245282,
"grad_norm": 1.9694834187837782,
"learning_rate": 5.518855162807036e-06,
"loss": 1.1749,
"step": 51140
},
{
"epoch": 1953.7735849056603,
"grad_norm": 2.014411336027095,
"learning_rate": 5.511640309501359e-06,
"loss": 1.1364,
"step": 51160
},
{
"epoch": 1954.5283018867924,
"grad_norm": 1.663001146626253,
"learning_rate": 5.504475625937011e-06,
"loss": 1.1469,
"step": 51180
},
{
"epoch": 1955.2830188679245,
"grad_norm": 3.7966652139269756,
"learning_rate": 5.497361121831918e-06,
"loss": 1.1634,
"step": 51200
},
{
"epoch": 1956.0377358490566,
"grad_norm": 1.6321676665862368,
"learning_rate": 5.490296806835955e-06,
"loss": 1.1747,
"step": 51220
},
{
"epoch": 1956.7924528301887,
"grad_norm": 1.8693551118320602,
"learning_rate": 5.483282690530914e-06,
"loss": 1.1513,
"step": 51240
},
{
"epoch": 1957.5471698113208,
"grad_norm": 1.7254591597688926,
"learning_rate": 5.476318782430499e-06,
"loss": 1.1384,
"step": 51260
},
{
"epoch": 1958.301886792453,
"grad_norm": 2.1059814541036284,
"learning_rate": 5.469405091980319e-06,
"loss": 1.145,
"step": 51280
},
{
"epoch": 1959.0566037735848,
"grad_norm": 2.4150150564267956,
"learning_rate": 5.462541628557862e-06,
"loss": 1.1727,
"step": 51300
},
{
"epoch": 1959.811320754717,
"grad_norm": 2.072986751089322,
"learning_rate": 5.4557284014725005e-06,
"loss": 1.1632,
"step": 51320
},
{
"epoch": 1960.566037735849,
"grad_norm": 1.7011080715428424,
"learning_rate": 5.448965419965458e-06,
"loss": 1.1719,
"step": 51340
},
{
"epoch": 1961.3207547169811,
"grad_norm": 2.050321684694806,
"learning_rate": 5.442252693209813e-06,
"loss": 1.1523,
"step": 51360
},
{
"epoch": 1962.0754716981132,
"grad_norm": 2.3154046947609603,
"learning_rate": 5.4355902303104744e-06,
"loss": 1.1365,
"step": 51380
},
{
"epoch": 1962.8301886792453,
"grad_norm": 2.292815295745735,
"learning_rate": 5.4289780403041805e-06,
"loss": 1.1595,
"step": 51400
},
{
"epoch": 1963.5849056603774,
"grad_norm": 2.1444563447901253,
"learning_rate": 5.422416132159477e-06,
"loss": 1.1609,
"step": 51420
},
{
"epoch": 1964.3396226415093,
"grad_norm": 1.8223405112306774,
"learning_rate": 5.415904514776712e-06,
"loss": 1.128,
"step": 51440
},
{
"epoch": 1965.0943396226414,
"grad_norm": 1.9601698616488796,
"learning_rate": 5.40944319698802e-06,
"loss": 1.1785,
"step": 51460
},
{
"epoch": 1965.8490566037735,
"grad_norm": 2.570580210466246,
"learning_rate": 5.403032187557308e-06,
"loss": 1.147,
"step": 51480
},
{
"epoch": 1966.6037735849056,
"grad_norm": 2.2935471508100553,
"learning_rate": 5.396671495180257e-06,
"loss": 1.1777,
"step": 51500
},
{
"epoch": 1967.3584905660377,
"grad_norm": 1.987678318177208,
"learning_rate": 5.390361128484278e-06,
"loss": 1.1283,
"step": 51520
},
{
"epoch": 1968.1132075471698,
"grad_norm": 1.9732671384472393,
"learning_rate": 5.38410109602855e-06,
"loss": 1.1631,
"step": 51540
},
{
"epoch": 1968.867924528302,
"grad_norm": 2.031999390800106,
"learning_rate": 5.37789140630396e-06,
"loss": 1.1498,
"step": 51560
},
{
"epoch": 1969.622641509434,
"grad_norm": 3.612362619019224,
"learning_rate": 5.3717320677331165e-06,
"loss": 1.1449,
"step": 51580
},
{
"epoch": 1970.377358490566,
"grad_norm": 2.7552728484649216,
"learning_rate": 5.365623088670337e-06,
"loss": 1.1221,
"step": 51600
},
{
"epoch": 1971.132075471698,
"grad_norm": 2.095240315052155,
"learning_rate": 5.359564477401625e-06,
"loss": 1.1635,
"step": 51620
},
{
"epoch": 1971.8867924528302,
"grad_norm": 2.051375314186468,
"learning_rate": 5.353556242144684e-06,
"loss": 1.1768,
"step": 51640
},
{
"epoch": 1972.6415094339623,
"grad_norm": 1.7681282740339075,
"learning_rate": 5.3475983910488705e-06,
"loss": 1.1524,
"step": 51660
},
{
"epoch": 1973.3962264150944,
"grad_norm": 1.9784394422194775,
"learning_rate": 5.34169093219521e-06,
"loss": 1.1694,
"step": 51680
},
{
"epoch": 1974.1509433962265,
"grad_norm": 2.1190098778323887,
"learning_rate": 5.3358338735963825e-06,
"loss": 1.1546,
"step": 51700
},
{
"epoch": 1974.9056603773586,
"grad_norm": 1.6461495183245571,
"learning_rate": 5.3300272231966895e-06,
"loss": 1.1597,
"step": 51720
},
{
"epoch": 1975.6603773584907,
"grad_norm": 2.287937258261333,
"learning_rate": 5.3242709888720875e-06,
"loss": 1.1565,
"step": 51740
},
{
"epoch": 1976.4150943396226,
"grad_norm": 1.971738312330891,
"learning_rate": 5.318565178430121e-06,
"loss": 1.1646,
"step": 51760
},
{
"epoch": 1977.1698113207547,
"grad_norm": 1.733242389596805,
"learning_rate": 5.312909799609962e-06,
"loss": 1.1507,
"step": 51780
},
{
"epoch": 1977.9245283018868,
"grad_norm": 2.2381830913006486,
"learning_rate": 5.307304860082375e-06,
"loss": 1.161,
"step": 51800
},
{
"epoch": 1978.6792452830189,
"grad_norm": 1.7639744175828544,
"learning_rate": 5.3017503674497e-06,
"loss": 1.1639,
"step": 51820
},
{
"epoch": 1979.433962264151,
"grad_norm": 2.142096606558369,
"learning_rate": 5.296246329245867e-06,
"loss": 1.145,
"step": 51840
},
{
"epoch": 1980.188679245283,
"grad_norm": 2.1364940736571905,
"learning_rate": 5.29079275293636e-06,
"loss": 1.1445,
"step": 51860
},
{
"epoch": 1980.9433962264152,
"grad_norm": 2.0408860512130063,
"learning_rate": 5.285389645918224e-06,
"loss": 1.1684,
"step": 51880
},
{
"epoch": 1981.698113207547,
"grad_norm": 2.1484279394512984,
"learning_rate": 5.280037015520047e-06,
"loss": 1.1427,
"step": 51900
},
{
"epoch": 1982.4528301886792,
"grad_norm": 1.8875817727112376,
"learning_rate": 5.27473486900196e-06,
"loss": 1.127,
"step": 51920
},
{
"epoch": 1983.2075471698113,
"grad_norm": 1.9694696435513541,
"learning_rate": 5.269483213555604e-06,
"loss": 1.1631,
"step": 51940
},
{
"epoch": 1983.9622641509434,
"grad_norm": 1.8852852930999937,
"learning_rate": 5.264282056304144e-06,
"loss": 1.1476,
"step": 51960
},
{
"epoch": 1984.7169811320755,
"grad_norm": 2.0442189239889488,
"learning_rate": 5.259131404302259e-06,
"loss": 1.1772,
"step": 51980
},
{
"epoch": 1985.4716981132076,
"grad_norm": 2.115147564108749,
"learning_rate": 5.254031264536109e-06,
"loss": 1.1451,
"step": 52000
}
],
"logging_steps": 20,
"max_steps": 54000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2077,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8771778183168000.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}