| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 13.0, |
| "eval_steps": 500, |
| "global_step": 1807, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03597122302158273, |
| "grad_norm": 2.5657113026110396, |
| "learning_rate": 2.2099447513812156e-07, |
| "loss": 1.0307, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07194244604316546, |
| "grad_norm": 2.4709120913894567, |
| "learning_rate": 4.972375690607735e-07, |
| "loss": 0.999, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1079136690647482, |
| "grad_norm": 2.4404034208735927, |
| "learning_rate": 7.734806629834254e-07, |
| "loss": 0.9694, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.14388489208633093, |
| "grad_norm": 2.441872944336153, |
| "learning_rate": 1.0497237569060774e-06, |
| "loss": 1.0269, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17985611510791366, |
| "grad_norm": 1.6991451599323688, |
| "learning_rate": 1.3259668508287293e-06, |
| "loss": 0.957, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.2158273381294964, |
| "grad_norm": 1.3307484799446745, |
| "learning_rate": 1.6022099447513815e-06, |
| "loss": 0.9355, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2517985611510791, |
| "grad_norm": 1.2496799611826757, |
| "learning_rate": 1.8784530386740332e-06, |
| "loss": 0.9666, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.28776978417266186, |
| "grad_norm": 1.1356552936883524, |
| "learning_rate": 2.1546961325966854e-06, |
| "loss": 0.921, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3237410071942446, |
| "grad_norm": 1.1527057549470892, |
| "learning_rate": 2.430939226519337e-06, |
| "loss": 0.852, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.3597122302158273, |
| "grad_norm": 1.0249903652912056, |
| "learning_rate": 2.707182320441989e-06, |
| "loss": 0.8769, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39568345323741005, |
| "grad_norm": 1.1043714979095989, |
| "learning_rate": 2.983425414364641e-06, |
| "loss": 0.8719, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.4316546762589928, |
| "grad_norm": 1.0874576236402984, |
| "learning_rate": 3.2596685082872933e-06, |
| "loss": 0.8416, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4676258992805755, |
| "grad_norm": 1.127326980338214, |
| "learning_rate": 3.535911602209945e-06, |
| "loss": 0.8216, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5035971223021583, |
| "grad_norm": 0.9717363036593373, |
| "learning_rate": 3.812154696132597e-06, |
| "loss": 0.8, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.539568345323741, |
| "grad_norm": 1.0927788503087883, |
| "learning_rate": 4.088397790055249e-06, |
| "loss": 0.8177, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5755395683453237, |
| "grad_norm": 1.0523015167479852, |
| "learning_rate": 4.364640883977901e-06, |
| "loss": 0.8068, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6115107913669064, |
| "grad_norm": 1.0693768198193012, |
| "learning_rate": 4.640883977900552e-06, |
| "loss": 0.7852, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6474820143884892, |
| "grad_norm": 1.046777484175068, |
| "learning_rate": 4.9171270718232054e-06, |
| "loss": 0.7906, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6834532374100719, |
| "grad_norm": 1.0197896252518375, |
| "learning_rate": 5.193370165745857e-06, |
| "loss": 0.7802, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7194244604316546, |
| "grad_norm": 1.0435304344679766, |
| "learning_rate": 5.469613259668509e-06, |
| "loss": 0.7939, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7553956834532374, |
| "grad_norm": 1.1192401846344067, |
| "learning_rate": 5.74585635359116e-06, |
| "loss": 0.7666, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7913669064748201, |
| "grad_norm": 1.0649149685291524, |
| "learning_rate": 6.0220994475138124e-06, |
| "loss": 0.7701, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8273381294964028, |
| "grad_norm": 0.9973750867415139, |
| "learning_rate": 6.298342541436464e-06, |
| "loss": 0.7771, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.8633093525179856, |
| "grad_norm": 0.9416210367748032, |
| "learning_rate": 6.574585635359117e-06, |
| "loss": 0.7638, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8992805755395683, |
| "grad_norm": 0.8869763185560768, |
| "learning_rate": 6.850828729281769e-06, |
| "loss": 0.7391, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.935251798561151, |
| "grad_norm": 0.9496059833390906, |
| "learning_rate": 7.12707182320442e-06, |
| "loss": 0.7351, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9712230215827338, |
| "grad_norm": 0.9400545014058165, |
| "learning_rate": 7.4033149171270724e-06, |
| "loss": 0.7645, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.0071942446043165, |
| "grad_norm": 0.839356585689632, |
| "learning_rate": 7.679558011049725e-06, |
| "loss": 0.7526, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0431654676258992, |
| "grad_norm": 0.808772401804446, |
| "learning_rate": 7.955801104972377e-06, |
| "loss": 0.7108, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.079136690647482, |
| "grad_norm": 0.9195777885535735, |
| "learning_rate": 8.232044198895029e-06, |
| "loss": 0.7375, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1151079136690647, |
| "grad_norm": 0.80938147816933, |
| "learning_rate": 8.508287292817681e-06, |
| "loss": 0.7267, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.1510791366906474, |
| "grad_norm": 0.8267082776521085, |
| "learning_rate": 8.784530386740332e-06, |
| "loss": 0.6438, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1870503597122302, |
| "grad_norm": 0.8810718593300568, |
| "learning_rate": 9.060773480662984e-06, |
| "loss": 0.6825, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.223021582733813, |
| "grad_norm": 0.8978348613093966, |
| "learning_rate": 9.337016574585636e-06, |
| "loss": 0.6717, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2589928057553956, |
| "grad_norm": 1.652966094402784, |
| "learning_rate": 9.613259668508288e-06, |
| "loss": 0.6801, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.2949640287769784, |
| "grad_norm": 0.8344736270472325, |
| "learning_rate": 9.88950276243094e-06, |
| "loss": 0.6605, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.330935251798561, |
| "grad_norm": 0.7122691201582964, |
| "learning_rate": 9.999916007605012e-06, |
| "loss": 0.6418, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.3669064748201438, |
| "grad_norm": 0.8256873807773062, |
| "learning_rate": 9.999402730965894e-06, |
| "loss": 0.6919, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.4028776978417266, |
| "grad_norm": 1.007866860643835, |
| "learning_rate": 9.9984228879725e-06, |
| "loss": 0.6635, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.4388489208633093, |
| "grad_norm": 0.9234823911775631, |
| "learning_rate": 9.99697657006811e-06, |
| "loss": 0.6714, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.474820143884892, |
| "grad_norm": 0.8634620569129478, |
| "learning_rate": 9.995063912229499e-06, |
| "loss": 0.6859, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.5107913669064748, |
| "grad_norm": 0.9085461363247348, |
| "learning_rate": 9.992685092954347e-06, |
| "loss": 0.6493, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5467625899280577, |
| "grad_norm": 0.8821359520671455, |
| "learning_rate": 9.989840334244583e-06, |
| "loss": 0.6507, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.5827338129496402, |
| "grad_norm": 0.8741181648998291, |
| "learning_rate": 9.98652990158566e-06, |
| "loss": 0.6321, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.6187050359712232, |
| "grad_norm": 0.8647283657532671, |
| "learning_rate": 9.98275410392178e-06, |
| "loss": 0.623, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.6546762589928057, |
| "grad_norm": 0.8020886077383842, |
| "learning_rate": 9.978513293627068e-06, |
| "loss": 0.652, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.6906474820143886, |
| "grad_norm": 0.9087355156431727, |
| "learning_rate": 9.973807866472679e-06, |
| "loss": 0.6684, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.7266187050359711, |
| "grad_norm": 0.9656812695394805, |
| "learning_rate": 9.968638261589866e-06, |
| "loss": 0.6981, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.762589928057554, |
| "grad_norm": 0.825927566467652, |
| "learning_rate": 9.963004961429004e-06, |
| "loss": 0.6539, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.7985611510791366, |
| "grad_norm": 0.8013671458538759, |
| "learning_rate": 9.956908491714552e-06, |
| "loss": 0.6622, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8345323741007196, |
| "grad_norm": 0.8459870643511322, |
| "learning_rate": 9.950349421396004e-06, |
| "loss": 0.666, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.870503597122302, |
| "grad_norm": 0.8165693120838837, |
| "learning_rate": 9.943328362594788e-06, |
| "loss": 0.6513, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.906474820143885, |
| "grad_norm": 0.7543912284760205, |
| "learning_rate": 9.935845970547133e-06, |
| "loss": 0.6335, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.9424460431654675, |
| "grad_norm": 0.8724447270007475, |
| "learning_rate": 9.927902943542932e-06, |
| "loss": 0.6575, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9784172661870505, |
| "grad_norm": 0.8973865160814858, |
| "learning_rate": 9.919500022860559e-06, |
| "loss": 0.6434, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.014388489208633, |
| "grad_norm": 0.8246254108343064, |
| "learning_rate": 9.910637992697707e-06, |
| "loss": 0.6219, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.050359712230216, |
| "grad_norm": 0.9316036149555057, |
| "learning_rate": 9.901317680098187e-06, |
| "loss": 0.552, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.0863309352517985, |
| "grad_norm": 0.7747424028512833, |
| "learning_rate": 9.891539954874758e-06, |
| "loss": 0.5254, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.1223021582733814, |
| "grad_norm": 0.765023536746088, |
| "learning_rate": 9.881305729527944e-06, |
| "loss": 0.5417, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.158273381294964, |
| "grad_norm": 0.7589194364572505, |
| "learning_rate": 9.870615959160876e-06, |
| "loss": 0.5683, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.194244604316547, |
| "grad_norm": 0.6742604327945994, |
| "learning_rate": 9.859471641390161e-06, |
| "loss": 0.5241, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.2302158273381294, |
| "grad_norm": 0.8215048068090698, |
| "learning_rate": 9.84787381625278e-06, |
| "loss": 0.5223, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.2661870503597124, |
| "grad_norm": 0.7846331525765837, |
| "learning_rate": 9.83582356610902e-06, |
| "loss": 0.5091, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.302158273381295, |
| "grad_norm": 0.839958631503289, |
| "learning_rate": 9.823322015541474e-06, |
| "loss": 0.5768, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.338129496402878, |
| "grad_norm": 0.7523314646946071, |
| "learning_rate": 9.810370331250082e-06, |
| "loss": 0.534, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.3741007194244603, |
| "grad_norm": 0.8094520756368656, |
| "learning_rate": 9.796969721943257e-06, |
| "loss": 0.5416, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.4100719424460433, |
| "grad_norm": 0.8079898872263164, |
| "learning_rate": 9.783121438225069e-06, |
| "loss": 0.5407, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.446043165467626, |
| "grad_norm": 0.7534024146698997, |
| "learning_rate": 9.76882677247855e-06, |
| "loss": 0.5129, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.4820143884892087, |
| "grad_norm": 0.6770277707875069, |
| "learning_rate": 9.754087058745074e-06, |
| "loss": 0.5538, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.5179856115107913, |
| "grad_norm": 0.7346186223847041, |
| "learning_rate": 9.738903672599858e-06, |
| "loss": 0.547, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.553956834532374, |
| "grad_norm": 0.8072986015854965, |
| "learning_rate": 9.723278031023587e-06, |
| "loss": 0.5206, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.5899280575539567, |
| "grad_norm": 0.747891183883854, |
| "learning_rate": 9.707211592270183e-06, |
| "loss": 0.5204, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.6258992805755397, |
| "grad_norm": 0.7849096675174403, |
| "learning_rate": 9.690705855730704e-06, |
| "loss": 0.5519, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.661870503597122, |
| "grad_norm": 0.8069083086893393, |
| "learning_rate": 9.673762361793418e-06, |
| "loss": 0.5368, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.697841726618705, |
| "grad_norm": 0.740470534449088, |
| "learning_rate": 9.656382691700053e-06, |
| "loss": 0.5352, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.7338129496402876, |
| "grad_norm": 0.837590597245089, |
| "learning_rate": 9.638568467398215e-06, |
| "loss": 0.5254, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.7697841726618706, |
| "grad_norm": 0.8025317262533782, |
| "learning_rate": 9.620321351390037e-06, |
| "loss": 0.5187, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.805755395683453, |
| "grad_norm": 0.7569128415023125, |
| "learning_rate": 9.601643046577014e-06, |
| "loss": 0.5164, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.841726618705036, |
| "grad_norm": 0.7785055236138303, |
| "learning_rate": 9.582535296101088e-06, |
| "loss": 0.4887, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.8776978417266186, |
| "grad_norm": 0.7719458883382703, |
| "learning_rate": 9.562999883181968e-06, |
| "loss": 0.5173, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.9136690647482015, |
| "grad_norm": 0.8209235363580544, |
| "learning_rate": 9.543038630950706e-06, |
| "loss": 0.511, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.949640287769784, |
| "grad_norm": 0.7253247761806964, |
| "learning_rate": 9.52265340227957e-06, |
| "loss": 0.5388, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.985611510791367, |
| "grad_norm": 0.8010150911294615, |
| "learning_rate": 9.501846099608178e-06, |
| "loss": 0.5279, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.0215827338129495, |
| "grad_norm": 0.7799763170983607, |
| "learning_rate": 9.480618664765956e-06, |
| "loss": 0.444, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.0575539568345325, |
| "grad_norm": 0.8473689333796085, |
| "learning_rate": 9.458973078790925e-06, |
| "loss": 0.4316, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.093525179856115, |
| "grad_norm": 0.7435775172722132, |
| "learning_rate": 9.436911361744817e-06, |
| "loss": 0.3797, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.129496402877698, |
| "grad_norm": 0.8351745729159173, |
| "learning_rate": 9.414435572524551e-06, |
| "loss": 0.3939, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.1654676258992804, |
| "grad_norm": 0.7390963756725633, |
| "learning_rate": 9.391547808670097e-06, |
| "loss": 0.3908, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.2014388489208634, |
| "grad_norm": 0.750139478955199, |
| "learning_rate": 9.368250206168712e-06, |
| "loss": 0.3841, |
| "step": 445 |
| }, |
| { |
| "epoch": 3.237410071942446, |
| "grad_norm": 0.8335176332743932, |
| "learning_rate": 9.344544939255608e-06, |
| "loss": 0.4053, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.273381294964029, |
| "grad_norm": 0.9346298960554003, |
| "learning_rate": 9.320434220211046e-06, |
| "loss": 0.4098, |
| "step": 455 |
| }, |
| { |
| "epoch": 3.3093525179856114, |
| "grad_norm": 0.82446257107031, |
| "learning_rate": 9.295920299153863e-06, |
| "loss": 0.4098, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.3453237410071943, |
| "grad_norm": 0.8355789027376981, |
| "learning_rate": 9.2710054638315e-06, |
| "loss": 0.4131, |
| "step": 465 |
| }, |
| { |
| "epoch": 3.381294964028777, |
| "grad_norm": 0.7578251758484424, |
| "learning_rate": 9.24569203940648e-06, |
| "loss": 0.4199, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.41726618705036, |
| "grad_norm": 2.2996955508669257, |
| "learning_rate": 9.219982388239426e-06, |
| "loss": 0.4351, |
| "step": 475 |
| }, |
| { |
| "epoch": 3.4532374100719423, |
| "grad_norm": 0.7779867034535775, |
| "learning_rate": 9.193878909668591e-06, |
| "loss": 0.407, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.4892086330935252, |
| "grad_norm": 0.7862144605775059, |
| "learning_rate": 9.167384039785943e-06, |
| "loss": 0.3808, |
| "step": 485 |
| }, |
| { |
| "epoch": 3.5251798561151078, |
| "grad_norm": 0.7666253828866978, |
| "learning_rate": 9.140500251209813e-06, |
| "loss": 0.3926, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.5611510791366907, |
| "grad_norm": 0.7839175938965846, |
| "learning_rate": 9.113230052854148e-06, |
| "loss": 0.4075, |
| "step": 495 |
| }, |
| { |
| "epoch": 3.597122302158273, |
| "grad_norm": 0.7521082431006533, |
| "learning_rate": 9.085575989694358e-06, |
| "loss": 0.4119, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.633093525179856, |
| "grad_norm": 0.7916504768929141, |
| "learning_rate": 9.057540642529816e-06, |
| "loss": 0.4028, |
| "step": 505 |
| }, |
| { |
| "epoch": 3.6690647482014387, |
| "grad_norm": 0.8092849334210818, |
| "learning_rate": 9.029126627743003e-06, |
| "loss": 0.4045, |
| "step": 510 |
| }, |
| { |
| "epoch": 3.7050359712230216, |
| "grad_norm": 0.8376631711045777, |
| "learning_rate": 9.000336597055335e-06, |
| "loss": 0.4067, |
| "step": 515 |
| }, |
| { |
| "epoch": 3.741007194244604, |
| "grad_norm": 0.7860913819423667, |
| "learning_rate": 8.971173237279693e-06, |
| "loss": 0.3975, |
| "step": 520 |
| }, |
| { |
| "epoch": 3.776978417266187, |
| "grad_norm": 0.7642723973824054, |
| "learning_rate": 8.941639270069678e-06, |
| "loss": 0.3805, |
| "step": 525 |
| }, |
| { |
| "epoch": 3.81294964028777, |
| "grad_norm": 0.7998252232015214, |
| "learning_rate": 8.911737451665616e-06, |
| "loss": 0.3919, |
| "step": 530 |
| }, |
| { |
| "epoch": 3.8489208633093526, |
| "grad_norm": 0.8048664536386205, |
| "learning_rate": 8.881470572637331e-06, |
| "loss": 0.4037, |
| "step": 535 |
| }, |
| { |
| "epoch": 3.884892086330935, |
| "grad_norm": 0.7500962887822149, |
| "learning_rate": 8.85084145762372e-06, |
| "loss": 0.399, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.920863309352518, |
| "grad_norm": 0.9025166412441733, |
| "learning_rate": 8.819852965069135e-06, |
| "loss": 0.3896, |
| "step": 545 |
| }, |
| { |
| "epoch": 3.956834532374101, |
| "grad_norm": 0.8301594036050014, |
| "learning_rate": 8.788507986956639e-06, |
| "loss": 0.4238, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.9928057553956835, |
| "grad_norm": 0.787139826401505, |
| "learning_rate": 8.756809448538091e-06, |
| "loss": 0.4173, |
| "step": 555 |
| }, |
| { |
| "epoch": 4.028776978417266, |
| "grad_norm": 0.7382914056190027, |
| "learning_rate": 8.724760308061172e-06, |
| "loss": 0.2962, |
| "step": 560 |
| }, |
| { |
| "epoch": 4.0647482014388485, |
| "grad_norm": 0.8624964895203346, |
| "learning_rate": 8.692363556493288e-06, |
| "loss": 0.2789, |
| "step": 565 |
| }, |
| { |
| "epoch": 4.100719424460432, |
| "grad_norm": 0.8180011592364103, |
| "learning_rate": 8.65962221724245e-06, |
| "loss": 0.2595, |
| "step": 570 |
| }, |
| { |
| "epoch": 4.136690647482014, |
| "grad_norm": 0.9258676683243418, |
| "learning_rate": 8.626539345875114e-06, |
| "loss": 0.2799, |
| "step": 575 |
| }, |
| { |
| "epoch": 4.172661870503597, |
| "grad_norm": 0.7487403598173822, |
| "learning_rate": 8.593118029831025e-06, |
| "loss": 0.271, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.2086330935251794, |
| "grad_norm": 0.8833477103510577, |
| "learning_rate": 8.559361388135079e-06, |
| "loss": 0.282, |
| "step": 585 |
| }, |
| { |
| "epoch": 4.244604316546763, |
| "grad_norm": 0.7782248198105488, |
| "learning_rate": 8.525272571106242e-06, |
| "loss": 0.289, |
| "step": 590 |
| }, |
| { |
| "epoch": 4.280575539568345, |
| "grad_norm": 0.8034461020363071, |
| "learning_rate": 8.490854760063551e-06, |
| "loss": 0.2616, |
| "step": 595 |
| }, |
| { |
| "epoch": 4.316546762589928, |
| "grad_norm": 0.793816496134317, |
| "learning_rate": 8.456111167029219e-06, |
| "loss": 0.2813, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.35251798561151, |
| "grad_norm": 0.8190581140556358, |
| "learning_rate": 8.421045034428871e-06, |
| "loss": 0.3232, |
| "step": 605 |
| }, |
| { |
| "epoch": 4.388489208633094, |
| "grad_norm": 0.8718874337244386, |
| "learning_rate": 8.385659634788959e-06, |
| "loss": 0.2989, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.424460431654676, |
| "grad_norm": 0.7812963827584564, |
| "learning_rate": 8.349958270431331e-06, |
| "loss": 0.252, |
| "step": 615 |
| }, |
| { |
| "epoch": 4.460431654676259, |
| "grad_norm": 0.7877942550600477, |
| "learning_rate": 8.313944273165068e-06, |
| "loss": 0.2898, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.496402877697841, |
| "grad_norm": 0.9163627849897873, |
| "learning_rate": 8.277621003975538e-06, |
| "loss": 0.2677, |
| "step": 625 |
| }, |
| { |
| "epoch": 4.532374100719425, |
| "grad_norm": 0.7629297663386118, |
| "learning_rate": 8.240991852710724e-06, |
| "loss": 0.2932, |
| "step": 630 |
| }, |
| { |
| "epoch": 4.568345323741007, |
| "grad_norm": 0.8142905446688702, |
| "learning_rate": 8.204060237764881e-06, |
| "loss": 0.2975, |
| "step": 635 |
| }, |
| { |
| "epoch": 4.60431654676259, |
| "grad_norm": 0.8952608329134608, |
| "learning_rate": 8.166829605759507e-06, |
| "loss": 0.2833, |
| "step": 640 |
| }, |
| { |
| "epoch": 4.640287769784173, |
| "grad_norm": 0.7138017190167328, |
| "learning_rate": 8.1293034312217e-06, |
| "loss": 0.2578, |
| "step": 645 |
| }, |
| { |
| "epoch": 4.676258992805756, |
| "grad_norm": 0.8550352663430013, |
| "learning_rate": 8.091485216259886e-06, |
| "loss": 0.2962, |
| "step": 650 |
| }, |
| { |
| "epoch": 4.712230215827338, |
| "grad_norm": 0.8260458421966028, |
| "learning_rate": 8.053378490236998e-06, |
| "loss": 0.3018, |
| "step": 655 |
| }, |
| { |
| "epoch": 4.748201438848921, |
| "grad_norm": 0.7578084834418747, |
| "learning_rate": 8.014986809441093e-06, |
| "loss": 0.3008, |
| "step": 660 |
| }, |
| { |
| "epoch": 4.784172661870503, |
| "grad_norm": 0.8541676466900879, |
| "learning_rate": 7.976313756753474e-06, |
| "loss": 0.2947, |
| "step": 665 |
| }, |
| { |
| "epoch": 4.820143884892087, |
| "grad_norm": 0.771785543725891, |
| "learning_rate": 7.9373629413143e-06, |
| "loss": 0.2777, |
| "step": 670 |
| }, |
| { |
| "epoch": 4.856115107913669, |
| "grad_norm": 0.7674809820247032, |
| "learning_rate": 7.898137998185788e-06, |
| "loss": 0.2772, |
| "step": 675 |
| }, |
| { |
| "epoch": 4.892086330935252, |
| "grad_norm": 0.8225717543186113, |
| "learning_rate": 7.858642588012957e-06, |
| "loss": 0.2989, |
| "step": 680 |
| }, |
| { |
| "epoch": 4.928057553956835, |
| "grad_norm": 0.7245903343028117, |
| "learning_rate": 7.818880396682007e-06, |
| "loss": 0.2796, |
| "step": 685 |
| }, |
| { |
| "epoch": 4.9640287769784175, |
| "grad_norm": 0.8489860259719797, |
| "learning_rate": 7.778855134976334e-06, |
| "loss": 0.2796, |
| "step": 690 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.7746624242136518, |
| "learning_rate": 7.73857053823023e-06, |
| "loss": 0.2891, |
| "step": 695 |
| }, |
| { |
| "epoch": 5.0359712230215825, |
| "grad_norm": 1.0291353857874774, |
| "learning_rate": 7.698030365980265e-06, |
| "loss": 0.2075, |
| "step": 700 |
| }, |
| { |
| "epoch": 5.071942446043165, |
| "grad_norm": 0.9042435786419071, |
| "learning_rate": 7.657238401614459e-06, |
| "loss": 0.1749, |
| "step": 705 |
| }, |
| { |
| "epoch": 5.107913669064748, |
| "grad_norm": 0.895579817335833, |
| "learning_rate": 7.616198452019176e-06, |
| "loss": 0.1904, |
| "step": 710 |
| }, |
| { |
| "epoch": 5.143884892086331, |
| "grad_norm": 0.790487071528411, |
| "learning_rate": 7.57491434722386e-06, |
| "loss": 0.172, |
| "step": 715 |
| }, |
| { |
| "epoch": 5.179856115107913, |
| "grad_norm": 1.0172179899933327, |
| "learning_rate": 7.5333899400435986e-06, |
| "loss": 0.2023, |
| "step": 720 |
| }, |
| { |
| "epoch": 5.215827338129497, |
| "grad_norm": 0.8101299548770012, |
| "learning_rate": 7.491629105719559e-06, |
| "loss": 0.1732, |
| "step": 725 |
| }, |
| { |
| "epoch": 5.251798561151079, |
| "grad_norm": 0.8041008787827414, |
| "learning_rate": 7.44963574155733e-06, |
| "loss": 0.1901, |
| "step": 730 |
| }, |
| { |
| "epoch": 5.287769784172662, |
| "grad_norm": 0.7679456105085443, |
| "learning_rate": 7.407413766563218e-06, |
| "loss": 0.1987, |
| "step": 735 |
| }, |
| { |
| "epoch": 5.323741007194244, |
| "grad_norm": 0.9189882418553729, |
| "learning_rate": 7.3649671210785024e-06, |
| "loss": 0.1756, |
| "step": 740 |
| }, |
| { |
| "epoch": 5.359712230215827, |
| "grad_norm": 0.8739708430814714, |
| "learning_rate": 7.322299766411702e-06, |
| "loss": 0.1858, |
| "step": 745 |
| }, |
| { |
| "epoch": 5.39568345323741, |
| "grad_norm": 0.7930479843078312, |
| "learning_rate": 7.279415684468893e-06, |
| "loss": 0.1985, |
| "step": 750 |
| }, |
| { |
| "epoch": 5.431654676258993, |
| "grad_norm": 0.8224192025147354, |
| "learning_rate": 7.236318877382098e-06, |
| "loss": 0.1773, |
| "step": 755 |
| }, |
| { |
| "epoch": 5.467625899280575, |
| "grad_norm": 0.8202045692138589, |
| "learning_rate": 7.1930133671357915e-06, |
| "loss": 0.1917, |
| "step": 760 |
| }, |
| { |
| "epoch": 5.503597122302159, |
| "grad_norm": 0.9216022917795681, |
| "learning_rate": 7.149503195191553e-06, |
| "loss": 0.1868, |
| "step": 765 |
| }, |
| { |
| "epoch": 5.539568345323741, |
| "grad_norm": 0.8282102212656368, |
| "learning_rate": 7.1057924221108856e-06, |
| "loss": 0.1793, |
| "step": 770 |
| }, |
| { |
| "epoch": 5.575539568345324, |
| "grad_norm": 0.9944130581062844, |
| "learning_rate": 7.061885127176285e-06, |
| "loss": 0.195, |
| "step": 775 |
| }, |
| { |
| "epoch": 5.611510791366906, |
| "grad_norm": 0.8384690598165658, |
| "learning_rate": 7.017785408010533e-06, |
| "loss": 0.1733, |
| "step": 780 |
| }, |
| { |
| "epoch": 5.647482014388489, |
| "grad_norm": 0.8126766061557951, |
| "learning_rate": 6.973497380194292e-06, |
| "loss": 0.1893, |
| "step": 785 |
| }, |
| { |
| "epoch": 5.683453237410072, |
| "grad_norm": 0.7781648778019523, |
| "learning_rate": 6.929025176882016e-06, |
| "loss": 0.1671, |
| "step": 790 |
| }, |
| { |
| "epoch": 5.719424460431655, |
| "grad_norm": 0.8950618409041426, |
| "learning_rate": 6.884372948416232e-06, |
| "loss": 0.1844, |
| "step": 795 |
| }, |
| { |
| "epoch": 5.755395683453237, |
| "grad_norm": 0.8460453893006227, |
| "learning_rate": 6.839544861940214e-06, |
| "loss": 0.1927, |
| "step": 800 |
| }, |
| { |
| "epoch": 5.7913669064748206, |
| "grad_norm": 0.7684170747964488, |
| "learning_rate": 6.794545101009074e-06, |
| "loss": 0.1856, |
| "step": 805 |
| }, |
| { |
| "epoch": 5.827338129496403, |
| "grad_norm": 0.7353554154462942, |
| "learning_rate": 6.74937786519935e-06, |
| "loss": 0.1827, |
| "step": 810 |
| }, |
| { |
| "epoch": 5.863309352517986, |
| "grad_norm": 1.2438204201134049, |
| "learning_rate": 6.704047369717075e-06, |
| "loss": 0.1789, |
| "step": 815 |
| }, |
| { |
| "epoch": 5.899280575539568, |
| "grad_norm": 0.8460577382893343, |
| "learning_rate": 6.65855784500439e-06, |
| "loss": 0.1892, |
| "step": 820 |
| }, |
| { |
| "epoch": 5.935251798561151, |
| "grad_norm": 0.7772437606617016, |
| "learning_rate": 6.612913536344755e-06, |
| "loss": 0.2007, |
| "step": 825 |
| }, |
| { |
| "epoch": 5.971223021582734, |
| "grad_norm": 0.8274969656085451, |
| "learning_rate": 6.5671187034667465e-06, |
| "loss": 0.1804, |
| "step": 830 |
| }, |
| { |
| "epoch": 6.0071942446043165, |
| "grad_norm": 0.8049472498410428, |
| "learning_rate": 6.521177620146525e-06, |
| "loss": 0.1698, |
| "step": 835 |
| }, |
| { |
| "epoch": 6.043165467625899, |
| "grad_norm": 0.8195061687563975, |
| "learning_rate": 6.475094573808994e-06, |
| "loss": 0.1189, |
| "step": 840 |
| }, |
| { |
| "epoch": 6.079136690647482, |
| "grad_norm": 0.7117012258140467, |
| "learning_rate": 6.42887386512767e-06, |
| "loss": 0.1092, |
| "step": 845 |
| }, |
| { |
| "epoch": 6.115107913669065, |
| "grad_norm": 0.8585323527604425, |
| "learning_rate": 6.3825198076233255e-06, |
| "loss": 0.1172, |
| "step": 850 |
| }, |
| { |
| "epoch": 6.151079136690647, |
| "grad_norm": 0.719310146465657, |
| "learning_rate": 6.336036727261438e-06, |
| "loss": 0.1284, |
| "step": 855 |
| }, |
| { |
| "epoch": 6.18705035971223, |
| "grad_norm": 0.7584056325173733, |
| "learning_rate": 6.289428962048467e-06, |
| "loss": 0.0971, |
| "step": 860 |
| }, |
| { |
| "epoch": 6.223021582733813, |
| "grad_norm": 0.7585595993348573, |
| "learning_rate": 6.242700861627015e-06, |
| "loss": 0.1151, |
| "step": 865 |
| }, |
| { |
| "epoch": 6.258992805755396, |
| "grad_norm": 0.7847975616110491, |
| "learning_rate": 6.195856786869893e-06, |
| "loss": 0.1069, |
| "step": 870 |
| }, |
| { |
| "epoch": 6.294964028776978, |
| "grad_norm": 0.8845845513208976, |
| "learning_rate": 6.148901109473153e-06, |
| "loss": 0.1069, |
| "step": 875 |
| }, |
| { |
| "epoch": 6.330935251798561, |
| "grad_norm": 0.755964158625991, |
| "learning_rate": 6.101838211548099e-06, |
| "loss": 0.113, |
| "step": 880 |
| }, |
| { |
| "epoch": 6.366906474820144, |
| "grad_norm": 0.7457589645048261, |
| "learning_rate": 6.054672485212327e-06, |
| "loss": 0.1206, |
| "step": 885 |
| }, |
| { |
| "epoch": 6.402877697841727, |
| "grad_norm": 0.8137239720399341, |
| "learning_rate": 6.007408332179836e-06, |
| "loss": 0.1096, |
| "step": 890 |
| }, |
| { |
| "epoch": 6.438848920863309, |
| "grad_norm": 0.7371197763823699, |
| "learning_rate": 5.960050163350235e-06, |
| "loss": 0.1028, |
| "step": 895 |
| }, |
| { |
| "epoch": 6.474820143884892, |
| "grad_norm": 0.7124693107117961, |
| "learning_rate": 5.9126023983971114e-06, |
| "loss": 0.1128, |
| "step": 900 |
| }, |
| { |
| "epoch": 6.510791366906475, |
| "grad_norm": 0.7750858789601416, |
| "learning_rate": 5.865069465355551e-06, |
| "loss": 0.1297, |
| "step": 905 |
| }, |
| { |
| "epoch": 6.546762589928058, |
| "grad_norm": 0.8213615097610627, |
| "learning_rate": 5.817455800208901e-06, |
| "loss": 0.1277, |
| "step": 910 |
| }, |
| { |
| "epoch": 6.58273381294964, |
| "grad_norm": 0.7304317963820385, |
| "learning_rate": 5.769765846474794e-06, |
| "loss": 0.1125, |
| "step": 915 |
| }, |
| { |
| "epoch": 6.618705035971223, |
| "grad_norm": 0.8292934519593511, |
| "learning_rate": 5.722004054790442e-06, |
| "loss": 0.1238, |
| "step": 920 |
| }, |
| { |
| "epoch": 6.654676258992806, |
| "grad_norm": 0.7036770476021397, |
| "learning_rate": 5.674174882497297e-06, |
| "loss": 0.1105, |
| "step": 925 |
| }, |
| { |
| "epoch": 6.690647482014389, |
| "grad_norm": 0.8766920039902681, |
| "learning_rate": 5.626282793225066e-06, |
| "loss": 0.1134, |
| "step": 930 |
| }, |
| { |
| "epoch": 6.726618705035971, |
| "grad_norm": 0.8803786508866736, |
| "learning_rate": 5.578332256475144e-06, |
| "loss": 0.116, |
| "step": 935 |
| }, |
| { |
| "epoch": 6.762589928057554, |
| "grad_norm": 0.8141270371349804, |
| "learning_rate": 5.530327747203507e-06, |
| "loss": 0.1175, |
| "step": 940 |
| }, |
| { |
| "epoch": 6.798561151079137, |
| "grad_norm": 0.7327559370982355, |
| "learning_rate": 5.482273745403082e-06, |
| "loss": 0.1084, |
| "step": 945 |
| }, |
| { |
| "epoch": 6.83453237410072, |
| "grad_norm": 0.6975665306165219, |
| "learning_rate": 5.434174735685658e-06, |
| "loss": 0.125, |
| "step": 950 |
| }, |
| { |
| "epoch": 6.870503597122302, |
| "grad_norm": 0.7213427193750023, |
| "learning_rate": 5.3860352068633635e-06, |
| "loss": 0.1078, |
| "step": 955 |
| }, |
| { |
| "epoch": 6.906474820143885, |
| "grad_norm": 0.7912624637000346, |
| "learning_rate": 5.337859651529747e-06, |
| "loss": 0.1089, |
| "step": 960 |
| }, |
| { |
| "epoch": 6.942446043165468, |
| "grad_norm": 0.8661381989776245, |
| "learning_rate": 5.289652565640513e-06, |
| "loss": 0.1201, |
| "step": 965 |
| }, |
| { |
| "epoch": 6.9784172661870505, |
| "grad_norm": 0.8889995059859764, |
| "learning_rate": 5.241418448093931e-06, |
| "loss": 0.1045, |
| "step": 970 |
| }, |
| { |
| "epoch": 7.014388489208633, |
| "grad_norm": 0.6848728107959661, |
| "learning_rate": 5.193161800310991e-06, |
| "loss": 0.0868, |
| "step": 975 |
| }, |
| { |
| "epoch": 7.0503597122302155, |
| "grad_norm": 0.8191951918685765, |
| "learning_rate": 5.144887125815301e-06, |
| "loss": 0.0688, |
| "step": 980 |
| }, |
| { |
| "epoch": 7.086330935251799, |
| "grad_norm": 0.7083429689515068, |
| "learning_rate": 5.0965989298128e-06, |
| "loss": 0.0717, |
| "step": 985 |
| }, |
| { |
| "epoch": 7.122302158273381, |
| "grad_norm": 0.7638610219919097, |
| "learning_rate": 5.048301718771317e-06, |
| "loss": 0.0695, |
| "step": 990 |
| }, |
| { |
| "epoch": 7.158273381294964, |
| "grad_norm": 0.7485748648028515, |
| "learning_rate": 5e-06, |
| "loss": 0.0687, |
| "step": 995 |
| }, |
| { |
| "epoch": 7.194244604316546, |
| "grad_norm": 0.7050027098157597, |
| "learning_rate": 4.951698281228686e-06, |
| "loss": 0.0729, |
| "step": 1000 |
| }, |
| { |
| "epoch": 7.23021582733813, |
| "grad_norm": 0.657912930608705, |
| "learning_rate": 4.903401070187201e-06, |
| "loss": 0.0704, |
| "step": 1005 |
| }, |
| { |
| "epoch": 7.266187050359712, |
| "grad_norm": 0.5398206206893736, |
| "learning_rate": 4.855112874184701e-06, |
| "loss": 0.058, |
| "step": 1010 |
| }, |
| { |
| "epoch": 7.302158273381295, |
| "grad_norm": 0.5495044551987519, |
| "learning_rate": 4.806838199689009e-06, |
| "loss": 0.0684, |
| "step": 1015 |
| }, |
| { |
| "epoch": 7.338129496402877, |
| "grad_norm": 0.661299793154578, |
| "learning_rate": 4.75858155190607e-06, |
| "loss": 0.067, |
| "step": 1020 |
| }, |
| { |
| "epoch": 7.374100719424461, |
| "grad_norm": 0.6484353795344787, |
| "learning_rate": 4.710347434359489e-06, |
| "loss": 0.0649, |
| "step": 1025 |
| }, |
| { |
| "epoch": 7.410071942446043, |
| "grad_norm": 0.6667433332665962, |
| "learning_rate": 4.662140348470253e-06, |
| "loss": 0.066, |
| "step": 1030 |
| }, |
| { |
| "epoch": 7.446043165467626, |
| "grad_norm": 0.6727307298334666, |
| "learning_rate": 4.613964793136637e-06, |
| "loss": 0.0677, |
| "step": 1035 |
| }, |
| { |
| "epoch": 7.482014388489208, |
| "grad_norm": 0.6730404475842242, |
| "learning_rate": 4.565825264314344e-06, |
| "loss": 0.072, |
| "step": 1040 |
| }, |
| { |
| "epoch": 7.517985611510792, |
| "grad_norm": 0.5740431223544631, |
| "learning_rate": 4.51772625459692e-06, |
| "loss": 0.0679, |
| "step": 1045 |
| }, |
| { |
| "epoch": 7.553956834532374, |
| "grad_norm": 0.6895089066435401, |
| "learning_rate": 4.469672252796495e-06, |
| "loss": 0.0689, |
| "step": 1050 |
| }, |
| { |
| "epoch": 7.589928057553957, |
| "grad_norm": 0.781118218856585, |
| "learning_rate": 4.421667743524856e-06, |
| "loss": 0.0698, |
| "step": 1055 |
| }, |
| { |
| "epoch": 7.625899280575539, |
| "grad_norm": 0.5852743633262958, |
| "learning_rate": 4.373717206774935e-06, |
| "loss": 0.0661, |
| "step": 1060 |
| }, |
| { |
| "epoch": 7.661870503597123, |
| "grad_norm": 0.8465001999729229, |
| "learning_rate": 4.3258251175027036e-06, |
| "loss": 0.0699, |
| "step": 1065 |
| }, |
| { |
| "epoch": 7.697841726618705, |
| "grad_norm": 0.6063388727288704, |
| "learning_rate": 4.277995945209558e-06, |
| "loss": 0.0627, |
| "step": 1070 |
| }, |
| { |
| "epoch": 7.733812949640288, |
| "grad_norm": 0.6267594676130663, |
| "learning_rate": 4.230234153525207e-06, |
| "loss": 0.0729, |
| "step": 1075 |
| }, |
| { |
| "epoch": 7.76978417266187, |
| "grad_norm": 0.9079983203188293, |
| "learning_rate": 4.182544199791102e-06, |
| "loss": 0.0584, |
| "step": 1080 |
| }, |
| { |
| "epoch": 7.805755395683454, |
| "grad_norm": 0.7011901007125456, |
| "learning_rate": 4.1349305346444515e-06, |
| "loss": 0.072, |
| "step": 1085 |
| }, |
| { |
| "epoch": 7.841726618705036, |
| "grad_norm": 0.5894372195866899, |
| "learning_rate": 4.08739760160289e-06, |
| "loss": 0.0681, |
| "step": 1090 |
| }, |
| { |
| "epoch": 7.877697841726619, |
| "grad_norm": 0.7312384684995438, |
| "learning_rate": 4.039949836649765e-06, |
| "loss": 0.0652, |
| "step": 1095 |
| }, |
| { |
| "epoch": 7.913669064748201, |
| "grad_norm": 0.7889140825844604, |
| "learning_rate": 3.992591667820166e-06, |
| "loss": 0.0675, |
| "step": 1100 |
| }, |
| { |
| "epoch": 7.9496402877697845, |
| "grad_norm": 0.566611163942455, |
| "learning_rate": 3.945327514787676e-06, |
| "loss": 0.0624, |
| "step": 1105 |
| }, |
| { |
| "epoch": 7.985611510791367, |
| "grad_norm": 0.5476455607834616, |
| "learning_rate": 3.8981617884519015e-06, |
| "loss": 0.0747, |
| "step": 1110 |
| }, |
| { |
| "epoch": 8.02158273381295, |
| "grad_norm": 0.5147724688346386, |
| "learning_rate": 3.851098890526848e-06, |
| "loss": 0.0548, |
| "step": 1115 |
| }, |
| { |
| "epoch": 8.057553956834532, |
| "grad_norm": 0.6656915502998506, |
| "learning_rate": 3.80414321313011e-06, |
| "loss": 0.0439, |
| "step": 1120 |
| }, |
| { |
| "epoch": 8.093525179856115, |
| "grad_norm": 0.5212083633131025, |
| "learning_rate": 3.7572991383729855e-06, |
| "loss": 0.0416, |
| "step": 1125 |
| }, |
| { |
| "epoch": 8.129496402877697, |
| "grad_norm": 0.7168392620555522, |
| "learning_rate": 3.7105710379515335e-06, |
| "loss": 0.0489, |
| "step": 1130 |
| }, |
| { |
| "epoch": 8.16546762589928, |
| "grad_norm": 0.5342340402870694, |
| "learning_rate": 3.6639632727385616e-06, |
| "loss": 0.0453, |
| "step": 1135 |
| }, |
| { |
| "epoch": 8.201438848920864, |
| "grad_norm": 0.4424742834921625, |
| "learning_rate": 3.6174801923766762e-06, |
| "loss": 0.0438, |
| "step": 1140 |
| }, |
| { |
| "epoch": 8.237410071942445, |
| "grad_norm": 0.4371446386393137, |
| "learning_rate": 3.5711261348723327e-06, |
| "loss": 0.0421, |
| "step": 1145 |
| }, |
| { |
| "epoch": 8.273381294964029, |
| "grad_norm": 0.44606778279830905, |
| "learning_rate": 3.5249054261910067e-06, |
| "loss": 0.0449, |
| "step": 1150 |
| }, |
| { |
| "epoch": 8.309352517985612, |
| "grad_norm": 0.4196749502255892, |
| "learning_rate": 3.478822379853477e-06, |
| "loss": 0.0456, |
| "step": 1155 |
| }, |
| { |
| "epoch": 8.345323741007194, |
| "grad_norm": 0.45404549332701183, |
| "learning_rate": 3.432881296533257e-06, |
| "loss": 0.0446, |
| "step": 1160 |
| }, |
| { |
| "epoch": 8.381294964028777, |
| "grad_norm": 0.47041138885456785, |
| "learning_rate": 3.3870864636552468e-06, |
| "loss": 0.0429, |
| "step": 1165 |
| }, |
| { |
| "epoch": 8.417266187050359, |
| "grad_norm": 0.4574468532910372, |
| "learning_rate": 3.3414421549956115e-06, |
| "loss": 0.0485, |
| "step": 1170 |
| }, |
| { |
| "epoch": 8.453237410071942, |
| "grad_norm": 0.5297931614784003, |
| "learning_rate": 3.2959526302829257e-06, |
| "loss": 0.0361, |
| "step": 1175 |
| }, |
| { |
| "epoch": 8.489208633093526, |
| "grad_norm": 0.49346890066053606, |
| "learning_rate": 3.250622134800651e-06, |
| "loss": 0.0352, |
| "step": 1180 |
| }, |
| { |
| "epoch": 8.525179856115107, |
| "grad_norm": 0.5225771130962942, |
| "learning_rate": 3.205454898990928e-06, |
| "loss": 0.0481, |
| "step": 1185 |
| }, |
| { |
| "epoch": 8.56115107913669, |
| "grad_norm": 0.44436107460781493, |
| "learning_rate": 3.160455138059788e-06, |
| "loss": 0.0405, |
| "step": 1190 |
| }, |
| { |
| "epoch": 8.597122302158274, |
| "grad_norm": 0.44898570613304095, |
| "learning_rate": 3.115627051583768e-06, |
| "loss": 0.0438, |
| "step": 1195 |
| }, |
| { |
| "epoch": 8.633093525179856, |
| "grad_norm": 0.5492491751490948, |
| "learning_rate": 3.070974823117986e-06, |
| "loss": 0.0396, |
| "step": 1200 |
| }, |
| { |
| "epoch": 8.66906474820144, |
| "grad_norm": 0.565956815075197, |
| "learning_rate": 3.026502619805709e-06, |
| "loss": 0.048, |
| "step": 1205 |
| }, |
| { |
| "epoch": 8.70503597122302, |
| "grad_norm": 0.5284087475275612, |
| "learning_rate": 2.9822145919894676e-06, |
| "loss": 0.0384, |
| "step": 1210 |
| }, |
| { |
| "epoch": 8.741007194244604, |
| "grad_norm": 0.5103352561656974, |
| "learning_rate": 2.938114872823716e-06, |
| "loss": 0.0407, |
| "step": 1215 |
| }, |
| { |
| "epoch": 8.776978417266188, |
| "grad_norm": 0.6098332646000372, |
| "learning_rate": 2.8942075778891153e-06, |
| "loss": 0.0414, |
| "step": 1220 |
| }, |
| { |
| "epoch": 8.81294964028777, |
| "grad_norm": 0.5038330297794213, |
| "learning_rate": 2.8504968048084492e-06, |
| "loss": 0.0414, |
| "step": 1225 |
| }, |
| { |
| "epoch": 8.848920863309353, |
| "grad_norm": 0.5302618793861339, |
| "learning_rate": 2.806986632864208e-06, |
| "loss": 0.0377, |
| "step": 1230 |
| }, |
| { |
| "epoch": 8.884892086330936, |
| "grad_norm": 0.5568603382860081, |
| "learning_rate": 2.7636811226179027e-06, |
| "loss": 0.0433, |
| "step": 1235 |
| }, |
| { |
| "epoch": 8.920863309352518, |
| "grad_norm": 0.43414177083233707, |
| "learning_rate": 2.7205843155311098e-06, |
| "loss": 0.0407, |
| "step": 1240 |
| }, |
| { |
| "epoch": 8.956834532374101, |
| "grad_norm": 0.4216923605954666, |
| "learning_rate": 2.6777002335882996e-06, |
| "loss": 0.0432, |
| "step": 1245 |
| }, |
| { |
| "epoch": 8.992805755395683, |
| "grad_norm": 0.37758354262761024, |
| "learning_rate": 2.6350328789215e-06, |
| "loss": 0.0437, |
| "step": 1250 |
| }, |
| { |
| "epoch": 9.028776978417266, |
| "grad_norm": 0.3581546154815773, |
| "learning_rate": 2.5925862334367813e-06, |
| "loss": 0.035, |
| "step": 1255 |
| }, |
| { |
| "epoch": 9.06474820143885, |
| "grad_norm": 0.3170867641515182, |
| "learning_rate": 2.550364258442671e-06, |
| "loss": 0.0329, |
| "step": 1260 |
| }, |
| { |
| "epoch": 9.100719424460431, |
| "grad_norm": 0.38183737001651963, |
| "learning_rate": 2.5083708942804446e-06, |
| "loss": 0.0313, |
| "step": 1265 |
| }, |
| { |
| "epoch": 9.136690647482014, |
| "grad_norm": 0.2622381036741509, |
| "learning_rate": 2.466610059956401e-06, |
| "loss": 0.0299, |
| "step": 1270 |
| }, |
| { |
| "epoch": 9.172661870503598, |
| "grad_norm": 0.3649107007802287, |
| "learning_rate": 2.425085652776141e-06, |
| "loss": 0.0298, |
| "step": 1275 |
| }, |
| { |
| "epoch": 9.20863309352518, |
| "grad_norm": 0.3672806177221026, |
| "learning_rate": 2.383801547980826e-06, |
| "loss": 0.0313, |
| "step": 1280 |
| }, |
| { |
| "epoch": 9.244604316546763, |
| "grad_norm": 0.2692943801711555, |
| "learning_rate": 2.342761598385543e-06, |
| "loss": 0.0299, |
| "step": 1285 |
| }, |
| { |
| "epoch": 9.280575539568344, |
| "grad_norm": 0.5182589947207659, |
| "learning_rate": 2.3019696340197358e-06, |
| "loss": 0.0315, |
| "step": 1290 |
| }, |
| { |
| "epoch": 9.316546762589928, |
| "grad_norm": 0.3084054383118723, |
| "learning_rate": 2.2614294617697718e-06, |
| "loss": 0.031, |
| "step": 1295 |
| }, |
| { |
| "epoch": 9.352517985611511, |
| "grad_norm": 0.31260300284142956, |
| "learning_rate": 2.221144865023666e-06, |
| "loss": 0.0355, |
| "step": 1300 |
| }, |
| { |
| "epoch": 9.388489208633093, |
| "grad_norm": 0.41419458958369776, |
| "learning_rate": 2.181119603317994e-06, |
| "loss": 0.0303, |
| "step": 1305 |
| }, |
| { |
| "epoch": 9.424460431654676, |
| "grad_norm": 0.3777963995786534, |
| "learning_rate": 2.141357411987044e-06, |
| "loss": 0.0346, |
| "step": 1310 |
| }, |
| { |
| "epoch": 9.46043165467626, |
| "grad_norm": 0.36001008845633387, |
| "learning_rate": 2.1018620018142145e-06, |
| "loss": 0.0304, |
| "step": 1315 |
| }, |
| { |
| "epoch": 9.496402877697841, |
| "grad_norm": 0.3374166061680025, |
| "learning_rate": 2.062637058685701e-06, |
| "loss": 0.0321, |
| "step": 1320 |
| }, |
| { |
| "epoch": 9.532374100719425, |
| "grad_norm": 0.3122428132703056, |
| "learning_rate": 2.023686243246527e-06, |
| "loss": 0.0311, |
| "step": 1325 |
| }, |
| { |
| "epoch": 9.568345323741006, |
| "grad_norm": 0.31657772369937653, |
| "learning_rate": 1.9850131905589065e-06, |
| "loss": 0.0352, |
| "step": 1330 |
| }, |
| { |
| "epoch": 9.60431654676259, |
| "grad_norm": 0.3556383226917608, |
| "learning_rate": 1.9466215097630027e-06, |
| "loss": 0.0307, |
| "step": 1335 |
| }, |
| { |
| "epoch": 9.640287769784173, |
| "grad_norm": 0.3018207938690679, |
| "learning_rate": 1.908514783740114e-06, |
| "loss": 0.0304, |
| "step": 1340 |
| }, |
| { |
| "epoch": 9.676258992805755, |
| "grad_norm": 0.33472427448796227, |
| "learning_rate": 1.8706965687783013e-06, |
| "loss": 0.0318, |
| "step": 1345 |
| }, |
| { |
| "epoch": 9.712230215827338, |
| "grad_norm": 0.3014237413240292, |
| "learning_rate": 1.8331703942404932e-06, |
| "loss": 0.0345, |
| "step": 1350 |
| }, |
| { |
| "epoch": 9.748201438848922, |
| "grad_norm": 0.2795954718654732, |
| "learning_rate": 1.7959397622351199e-06, |
| "loss": 0.0321, |
| "step": 1355 |
| }, |
| { |
| "epoch": 9.784172661870503, |
| "grad_norm": 0.3404458491839953, |
| "learning_rate": 1.7590081472892779e-06, |
| "loss": 0.026, |
| "step": 1360 |
| }, |
| { |
| "epoch": 9.820143884892087, |
| "grad_norm": 0.27845491491806207, |
| "learning_rate": 1.7223789960244636e-06, |
| "loss": 0.0296, |
| "step": 1365 |
| }, |
| { |
| "epoch": 9.85611510791367, |
| "grad_norm": 0.3612513367851547, |
| "learning_rate": 1.686055726834932e-06, |
| "loss": 0.03, |
| "step": 1370 |
| }, |
| { |
| "epoch": 9.892086330935252, |
| "grad_norm": 0.4797636206277336, |
| "learning_rate": 1.6500417295686705e-06, |
| "loss": 0.03, |
| "step": 1375 |
| }, |
| { |
| "epoch": 9.928057553956835, |
| "grad_norm": 0.36211393385671814, |
| "learning_rate": 1.614340365211044e-06, |
| "loss": 0.0317, |
| "step": 1380 |
| }, |
| { |
| "epoch": 9.964028776978417, |
| "grad_norm": 0.4460467422949885, |
| "learning_rate": 1.5789549655711283e-06, |
| "loss": 0.0308, |
| "step": 1385 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.3268330088868262, |
| "learning_rate": 1.5438888329707824e-06, |
| "loss": 0.0315, |
| "step": 1390 |
| }, |
| { |
| "epoch": 10.035971223021583, |
| "grad_norm": 0.25099763594150687, |
| "learning_rate": 1.5091452399364514e-06, |
| "loss": 0.0259, |
| "step": 1395 |
| }, |
| { |
| "epoch": 10.071942446043165, |
| "grad_norm": 0.21916155615378147, |
| "learning_rate": 1.4747274288937597e-06, |
| "loss": 0.0262, |
| "step": 1400 |
| }, |
| { |
| "epoch": 10.107913669064748, |
| "grad_norm": 0.1703480324817164, |
| "learning_rate": 1.4406386118649219e-06, |
| "loss": 0.0264, |
| "step": 1405 |
| }, |
| { |
| "epoch": 10.14388489208633, |
| "grad_norm": 0.19666841057864862, |
| "learning_rate": 1.4068819701689761e-06, |
| "loss": 0.0271, |
| "step": 1410 |
| }, |
| { |
| "epoch": 10.179856115107913, |
| "grad_norm": 0.22016830192324838, |
| "learning_rate": 1.3734606541248868e-06, |
| "loss": 0.0251, |
| "step": 1415 |
| }, |
| { |
| "epoch": 10.215827338129497, |
| "grad_norm": 0.2717923478176765, |
| "learning_rate": 1.3403777827575515e-06, |
| "loss": 0.0275, |
| "step": 1420 |
| }, |
| { |
| "epoch": 10.251798561151078, |
| "grad_norm": 0.37675768752343775, |
| "learning_rate": 1.3076364435067145e-06, |
| "loss": 0.026, |
| "step": 1425 |
| }, |
| { |
| "epoch": 10.287769784172662, |
| "grad_norm": 0.21221208648318335, |
| "learning_rate": 1.2752396919388293e-06, |
| "loss": 0.0273, |
| "step": 1430 |
| }, |
| { |
| "epoch": 10.323741007194245, |
| "grad_norm": 0.19655351295102655, |
| "learning_rate": 1.2431905514619092e-06, |
| "loss": 0.0282, |
| "step": 1435 |
| }, |
| { |
| "epoch": 10.359712230215827, |
| "grad_norm": 0.20261972203630707, |
| "learning_rate": 1.2114920130433644e-06, |
| "loss": 0.0246, |
| "step": 1440 |
| }, |
| { |
| "epoch": 10.39568345323741, |
| "grad_norm": 0.2010958074650419, |
| "learning_rate": 1.1801470349308664e-06, |
| "loss": 0.0252, |
| "step": 1445 |
| }, |
| { |
| "epoch": 10.431654676258994, |
| "grad_norm": 0.19932681605206862, |
| "learning_rate": 1.1491585423762818e-06, |
| "loss": 0.0251, |
| "step": 1450 |
| }, |
| { |
| "epoch": 10.467625899280575, |
| "grad_norm": 0.20849253670352091, |
| "learning_rate": 1.1185294273626685e-06, |
| "loss": 0.0261, |
| "step": 1455 |
| }, |
| { |
| "epoch": 10.503597122302159, |
| "grad_norm": 0.1639683839169035, |
| "learning_rate": 1.0882625483343846e-06, |
| "loss": 0.0257, |
| "step": 1460 |
| }, |
| { |
| "epoch": 10.53956834532374, |
| "grad_norm": 0.2736139772755203, |
| "learning_rate": 1.0583607299303245e-06, |
| "loss": 0.0274, |
| "step": 1465 |
| }, |
| { |
| "epoch": 10.575539568345324, |
| "grad_norm": 0.22918950850642805, |
| "learning_rate": 1.028826762720308e-06, |
| "loss": 0.0275, |
| "step": 1470 |
| }, |
| { |
| "epoch": 10.611510791366907, |
| "grad_norm": 0.314745772340147, |
| "learning_rate": 9.996634029446672e-07, |
| "loss": 0.0272, |
| "step": 1475 |
| }, |
| { |
| "epoch": 10.647482014388489, |
| "grad_norm": 0.17606113078594732, |
| "learning_rate": 9.708733722569996e-07, |
| "loss": 0.0239, |
| "step": 1480 |
| }, |
| { |
| "epoch": 10.683453237410072, |
| "grad_norm": 0.17429980721967395, |
| "learning_rate": 9.424593574701845e-07, |
| "loss": 0.0301, |
| "step": 1485 |
| }, |
| { |
| "epoch": 10.719424460431654, |
| "grad_norm": 0.1852236456926952, |
| "learning_rate": 9.144240103056439e-07, |
| "loss": 0.0246, |
| "step": 1490 |
| }, |
| { |
| "epoch": 10.755395683453237, |
| "grad_norm": 0.22805051647340693, |
| "learning_rate": 8.867699471458541e-07, |
| "loss": 0.0235, |
| "step": 1495 |
| }, |
| { |
| "epoch": 10.79136690647482, |
| "grad_norm": 0.19411564486758986, |
| "learning_rate": 8.59499748790188e-07, |
| "loss": 0.0273, |
| "step": 1500 |
| }, |
| { |
| "epoch": 10.827338129496402, |
| "grad_norm": 0.241482451282526, |
| "learning_rate": 8.326159602140594e-07, |
| "loss": 0.0283, |
| "step": 1505 |
| }, |
| { |
| "epoch": 10.863309352517986, |
| "grad_norm": 0.2345245699793013, |
| "learning_rate": 8.061210903314104e-07, |
| "loss": 0.0291, |
| "step": 1510 |
| }, |
| { |
| "epoch": 10.899280575539569, |
| "grad_norm": 0.47802038072543573, |
| "learning_rate": 7.800176117605762e-07, |
| "loss": 0.0272, |
| "step": 1515 |
| }, |
| { |
| "epoch": 10.93525179856115, |
| "grad_norm": 0.19335201088686405, |
| "learning_rate": 7.543079605935222e-07, |
| "loss": 0.0255, |
| "step": 1520 |
| }, |
| { |
| "epoch": 10.971223021582734, |
| "grad_norm": 0.16860350286261763, |
| "learning_rate": 7.289945361685013e-07, |
| "loss": 0.0257, |
| "step": 1525 |
| }, |
| { |
| "epoch": 11.007194244604317, |
| "grad_norm": 0.2725776570715531, |
| "learning_rate": 7.040797008461386e-07, |
| "loss": 0.0253, |
| "step": 1530 |
| }, |
| { |
| "epoch": 11.043165467625899, |
| "grad_norm": 0.19705015027897385, |
| "learning_rate": 6.795657797889555e-07, |
| "loss": 0.0248, |
| "step": 1535 |
| }, |
| { |
| "epoch": 11.079136690647482, |
| "grad_norm": 0.18399504559736504, |
| "learning_rate": 6.554550607443932e-07, |
| "loss": 0.0259, |
| "step": 1540 |
| }, |
| { |
| "epoch": 11.115107913669064, |
| "grad_norm": 0.22286270188259397, |
| "learning_rate": 6.317497938312905e-07, |
| "loss": 0.0265, |
| "step": 1545 |
| }, |
| { |
| "epoch": 11.151079136690647, |
| "grad_norm": 0.16493904987971855, |
| "learning_rate": 6.08452191329903e-07, |
| "loss": 0.0261, |
| "step": 1550 |
| }, |
| { |
| "epoch": 11.18705035971223, |
| "grad_norm": 0.1274225765011938, |
| "learning_rate": 5.855644274754485e-07, |
| "loss": 0.0231, |
| "step": 1555 |
| }, |
| { |
| "epoch": 11.223021582733812, |
| "grad_norm": 0.17013387891479795, |
| "learning_rate": 5.630886382551843e-07, |
| "loss": 0.0258, |
| "step": 1560 |
| }, |
| { |
| "epoch": 11.258992805755396, |
| "grad_norm": 0.16983637374927996, |
| "learning_rate": 5.410269212090757e-07, |
| "loss": 0.023, |
| "step": 1565 |
| }, |
| { |
| "epoch": 11.29496402877698, |
| "grad_norm": 0.1623168682381631, |
| "learning_rate": 5.193813352340448e-07, |
| "loss": 0.0227, |
| "step": 1570 |
| }, |
| { |
| "epoch": 11.33093525179856, |
| "grad_norm": 0.15643198279648218, |
| "learning_rate": 4.981539003918235e-07, |
| "loss": 0.0264, |
| "step": 1575 |
| }, |
| { |
| "epoch": 11.366906474820144, |
| "grad_norm": 0.15647879083727156, |
| "learning_rate": 4.773465977204311e-07, |
| "loss": 0.0241, |
| "step": 1580 |
| }, |
| { |
| "epoch": 11.402877697841726, |
| "grad_norm": 0.33238336836597426, |
| "learning_rate": 4.5696136904929464e-07, |
| "loss": 0.025, |
| "step": 1585 |
| }, |
| { |
| "epoch": 11.43884892086331, |
| "grad_norm": 0.17002067755615763, |
| "learning_rate": 4.3700011681803436e-07, |
| "loss": 0.0236, |
| "step": 1590 |
| }, |
| { |
| "epoch": 11.474820143884893, |
| "grad_norm": 0.32709296265366894, |
| "learning_rate": 4.1746470389891323e-07, |
| "loss": 0.0234, |
| "step": 1595 |
| }, |
| { |
| "epoch": 11.510791366906474, |
| "grad_norm": 0.1902707074643716, |
| "learning_rate": 3.9835695342298643e-07, |
| "loss": 0.025, |
| "step": 1600 |
| }, |
| { |
| "epoch": 11.546762589928058, |
| "grad_norm": 0.35433032752427657, |
| "learning_rate": 3.796786486099635e-07, |
| "loss": 0.0261, |
| "step": 1605 |
| }, |
| { |
| "epoch": 11.582733812949641, |
| "grad_norm": 0.16376293850363066, |
| "learning_rate": 3.6143153260178586e-07, |
| "loss": 0.025, |
| "step": 1610 |
| }, |
| { |
| "epoch": 11.618705035971223, |
| "grad_norm": 0.21222268474299327, |
| "learning_rate": 3.436173082999489e-07, |
| "loss": 0.0253, |
| "step": 1615 |
| }, |
| { |
| "epoch": 11.654676258992806, |
| "grad_norm": 0.24007081237773362, |
| "learning_rate": 3.262376382065824e-07, |
| "loss": 0.0257, |
| "step": 1620 |
| }, |
| { |
| "epoch": 11.690647482014388, |
| "grad_norm": 0.17120988541747317, |
| "learning_rate": 3.092941442692976e-07, |
| "loss": 0.0248, |
| "step": 1625 |
| }, |
| { |
| "epoch": 11.726618705035971, |
| "grad_norm": 0.1887089507451267, |
| "learning_rate": 2.927884077298182e-07, |
| "loss": 0.0231, |
| "step": 1630 |
| }, |
| { |
| "epoch": 11.762589928057555, |
| "grad_norm": 0.16792432822698705, |
| "learning_rate": 2.7672196897641336e-07, |
| "loss": 0.0262, |
| "step": 1635 |
| }, |
| { |
| "epoch": 11.798561151079136, |
| "grad_norm": 0.17705501184744799, |
| "learning_rate": 2.610963274001438e-07, |
| "loss": 0.0243, |
| "step": 1640 |
| }, |
| { |
| "epoch": 11.83453237410072, |
| "grad_norm": 0.16452397650807815, |
| "learning_rate": 2.459129412549266e-07, |
| "loss": 0.023, |
| "step": 1645 |
| }, |
| { |
| "epoch": 11.870503597122303, |
| "grad_norm": 0.1649667452272, |
| "learning_rate": 2.311732275214501e-07, |
| "loss": 0.0224, |
| "step": 1650 |
| }, |
| { |
| "epoch": 11.906474820143885, |
| "grad_norm": 0.17302393323353582, |
| "learning_rate": 2.1687856177493137e-07, |
| "loss": 0.0241, |
| "step": 1655 |
| }, |
| { |
| "epoch": 11.942446043165468, |
| "grad_norm": 0.16106116378893284, |
| "learning_rate": 2.0303027805674447e-07, |
| "loss": 0.0218, |
| "step": 1660 |
| }, |
| { |
| "epoch": 11.97841726618705, |
| "grad_norm": 0.18701658079265462, |
| "learning_rate": 1.8962966874991773e-07, |
| "loss": 0.0232, |
| "step": 1665 |
| }, |
| { |
| "epoch": 12.014388489208633, |
| "grad_norm": 0.13530271509575517, |
| "learning_rate": 1.7667798445852703e-07, |
| "loss": 0.0216, |
| "step": 1670 |
| }, |
| { |
| "epoch": 12.050359712230216, |
| "grad_norm": 0.19532436363008074, |
| "learning_rate": 1.6417643389098182e-07, |
| "loss": 0.0221, |
| "step": 1675 |
| }, |
| { |
| "epoch": 12.086330935251798, |
| "grad_norm": 0.1412049337424891, |
| "learning_rate": 1.5212618374722155e-07, |
| "loss": 0.0204, |
| "step": 1680 |
| }, |
| { |
| "epoch": 12.122302158273381, |
| "grad_norm": 0.15939325045792674, |
| "learning_rate": 1.4052835860983937e-07, |
| "loss": 0.0248, |
| "step": 1685 |
| }, |
| { |
| "epoch": 12.158273381294965, |
| "grad_norm": 0.15143043177780832, |
| "learning_rate": 1.2938404083912502e-07, |
| "loss": 0.0226, |
| "step": 1690 |
| }, |
| { |
| "epoch": 12.194244604316546, |
| "grad_norm": 0.1658493522364899, |
| "learning_rate": 1.1869427047205673e-07, |
| "loss": 0.0223, |
| "step": 1695 |
| }, |
| { |
| "epoch": 12.23021582733813, |
| "grad_norm": 0.17505367364240096, |
| "learning_rate": 1.084600451252421e-07, |
| "loss": 0.0261, |
| "step": 1700 |
| }, |
| { |
| "epoch": 12.266187050359711, |
| "grad_norm": 0.14470298755684347, |
| "learning_rate": 9.868231990181332e-08, |
| "loss": 0.0225, |
| "step": 1705 |
| }, |
| { |
| "epoch": 12.302158273381295, |
| "grad_norm": 0.1492823965956767, |
| "learning_rate": 8.936200730229439e-08, |
| "loss": 0.0253, |
| "step": 1710 |
| }, |
| { |
| "epoch": 12.338129496402878, |
| "grad_norm": 0.19794064778842438, |
| "learning_rate": 8.049997713944158e-08, |
| "loss": 0.0262, |
| "step": 1715 |
| }, |
| { |
| "epoch": 12.37410071942446, |
| "grad_norm": 0.14141923113991967, |
| "learning_rate": 7.209705645706944e-08, |
| "loss": 0.0229, |
| "step": 1720 |
| }, |
| { |
| "epoch": 12.410071942446043, |
| "grad_norm": 0.1710020025729561, |
| "learning_rate": 6.415402945286698e-08, |
| "loss": 0.0245, |
| "step": 1725 |
| }, |
| { |
| "epoch": 12.446043165467627, |
| "grad_norm": 0.14036902238695775, |
| "learning_rate": 5.6671637405212865e-08, |
| "loss": 0.021, |
| "step": 1730 |
| }, |
| { |
| "epoch": 12.482014388489208, |
| "grad_norm": 0.18979536208011222, |
| "learning_rate": 4.9650578603996355e-08, |
| "loss": 0.0227, |
| "step": 1735 |
| }, |
| { |
| "epoch": 12.517985611510792, |
| "grad_norm": 0.16609096182431196, |
| "learning_rate": 4.309150828544939e-08, |
| "loss": 0.0237, |
| "step": 1740 |
| }, |
| { |
| "epoch": 12.553956834532373, |
| "grad_norm": 0.16297966249868379, |
| "learning_rate": 3.699503857099829e-08, |
| "loss": 0.0226, |
| "step": 1745 |
| }, |
| { |
| "epoch": 12.589928057553957, |
| "grad_norm": 0.14779137892803101, |
| "learning_rate": 3.1361738410133905e-08, |
| "loss": 0.0236, |
| "step": 1750 |
| }, |
| { |
| "epoch": 12.62589928057554, |
| "grad_norm": 0.15356487001181282, |
| "learning_rate": 2.619213352732186e-08, |
| "loss": 0.0256, |
| "step": 1755 |
| }, |
| { |
| "epoch": 12.661870503597122, |
| "grad_norm": 0.16653231778754538, |
| "learning_rate": 2.1486706372932375e-08, |
| "loss": 0.0229, |
| "step": 1760 |
| }, |
| { |
| "epoch": 12.697841726618705, |
| "grad_norm": 0.15764673944415686, |
| "learning_rate": 1.7245896078220135e-08, |
| "loss": 0.022, |
| "step": 1765 |
| }, |
| { |
| "epoch": 12.733812949640289, |
| "grad_norm": 0.16016051420414304, |
| "learning_rate": 1.3470098414340993e-08, |
| "loss": 0.0226, |
| "step": 1770 |
| }, |
| { |
| "epoch": 12.76978417266187, |
| "grad_norm": 0.24481997751094148, |
| "learning_rate": 1.0159665755417603e-08, |
| "loss": 0.0264, |
| "step": 1775 |
| }, |
| { |
| "epoch": 12.805755395683454, |
| "grad_norm": 0.22542723588692715, |
| "learning_rate": 7.314907045653519e-09, |
| "loss": 0.0253, |
| "step": 1780 |
| }, |
| { |
| "epoch": 12.841726618705035, |
| "grad_norm": 0.157525745290468, |
| "learning_rate": 4.936087770502917e-09, |
| "loss": 0.0239, |
| "step": 1785 |
| }, |
| { |
| "epoch": 12.877697841726619, |
| "grad_norm": 0.17750732441905473, |
| "learning_rate": 3.0234299318909755e-09, |
| "loss": 0.022, |
| "step": 1790 |
| }, |
| { |
| "epoch": 12.913669064748202, |
| "grad_norm": 0.16333347276082377, |
| "learning_rate": 1.5771120274993278e-09, |
| "loss": 0.0255, |
| "step": 1795 |
| }, |
| { |
| "epoch": 12.949640287769784, |
| "grad_norm": 0.14651237067290176, |
| "learning_rate": 5.972690341066178e-10, |
| "loss": 0.0251, |
| "step": 1800 |
| }, |
| { |
| "epoch": 12.985611510791367, |
| "grad_norm": 0.15396045493776508, |
| "learning_rate": 8.39923949891297e-11, |
| "loss": 0.0252, |
| "step": 1805 |
| }, |
| { |
| "epoch": 13.0, |
| "step": 1807, |
| "total_flos": 722931058671616.0, |
| "train_loss": 0.25000700822350524, |
| "train_runtime": 34410.5906, |
| "train_samples_per_second": 1.679, |
| "train_steps_per_second": 0.053 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1807, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 13, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 722931058671616.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|