| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9999711408040172, |
| "eval_steps": 500, |
| "global_step": 17325, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005771839196559984, |
| "grad_norm": 15.207763525645962, |
| "learning_rate": 9.995959129481038e-06, |
| "loss": 2.6, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0011543678393119967, |
| "grad_norm": 12.615616581147215, |
| "learning_rate": 9.990186457311089e-06, |
| "loss": 0.7554, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0017315517589679952, |
| "grad_norm": 38.969453309289854, |
| "learning_rate": 9.984413785141142e-06, |
| "loss": 0.6704, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0023087356786239935, |
| "grad_norm": 22.432338112146017, |
| "learning_rate": 9.978641112971195e-06, |
| "loss": 0.6122, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0028859195982799918, |
| "grad_norm": 6.366544502174357, |
| "learning_rate": 9.972868440801248e-06, |
| "loss": 0.5704, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0034631035179359905, |
| "grad_norm": 5.991950420382146, |
| "learning_rate": 9.967095768631301e-06, |
| "loss": 0.5725, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.004040287437591989, |
| "grad_norm": 27.360506966186097, |
| "learning_rate": 9.961323096461352e-06, |
| "loss": 0.5616, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.004617471357247987, |
| "grad_norm": 7.6551554884885835, |
| "learning_rate": 9.955550424291405e-06, |
| "loss": 0.5581, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.005194655276903985, |
| "grad_norm": 5.380738579039347, |
| "learning_rate": 9.949777752121458e-06, |
| "loss": 0.5386, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0057718391965599835, |
| "grad_norm": 14.701566820920394, |
| "learning_rate": 9.944005079951511e-06, |
| "loss": 0.5277, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.006349023116215982, |
| "grad_norm": 24.2926776932226, |
| "learning_rate": 9.938232407781563e-06, |
| "loss": 0.534, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.006926207035871981, |
| "grad_norm": 18.333541125312422, |
| "learning_rate": 9.932459735611616e-06, |
| "loss": 0.5357, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.007503390955527979, |
| "grad_norm": 4.413124779621301, |
| "learning_rate": 9.926687063441667e-06, |
| "loss": 0.5189, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.008080574875183977, |
| "grad_norm": 6.736449877624926, |
| "learning_rate": 9.920914391271722e-06, |
| "loss": 0.5089, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.008657758794839977, |
| "grad_norm": 7.499085213610092, |
| "learning_rate": 9.915141719101773e-06, |
| "loss": 0.515, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.009234942714495974, |
| "grad_norm": 7.213309423254868, |
| "learning_rate": 9.909369046931826e-06, |
| "loss": 0.516, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.009812126634151973, |
| "grad_norm": 6.224375486360128, |
| "learning_rate": 9.903596374761877e-06, |
| "loss": 0.4787, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.01038931055380797, |
| "grad_norm": 4.666329365397574, |
| "learning_rate": 9.89782370259193e-06, |
| "loss": 0.5457, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01096649447346397, |
| "grad_norm": 7.269438501572431, |
| "learning_rate": 9.892051030421983e-06, |
| "loss": 0.4999, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.011543678393119967, |
| "grad_norm": 3.4282676515510633, |
| "learning_rate": 9.886278358252037e-06, |
| "loss": 0.4979, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.012120862312775966, |
| "grad_norm": 6.398684245798025, |
| "learning_rate": 9.880505686082088e-06, |
| "loss": 0.5097, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.012698046232431964, |
| "grad_norm": 9.308090815837401, |
| "learning_rate": 9.874733013912141e-06, |
| "loss": 0.5003, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.013275230152087963, |
| "grad_norm": 6.572806422530607, |
| "learning_rate": 9.868960341742194e-06, |
| "loss": 0.5151, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.013852414071743962, |
| "grad_norm": 3.9790215956458206, |
| "learning_rate": 9.863187669572247e-06, |
| "loss": 0.5036, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.01442959799139996, |
| "grad_norm": 9.129499916264713, |
| "learning_rate": 9.857414997402298e-06, |
| "loss": 0.4993, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.015006781911055958, |
| "grad_norm": 5.341270073182352, |
| "learning_rate": 9.851642325232351e-06, |
| "loss": 0.5068, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.015583965830711956, |
| "grad_norm": 8.88171344021306, |
| "learning_rate": 9.845869653062403e-06, |
| "loss": 0.5106, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.016161149750367955, |
| "grad_norm": 6.256421713727477, |
| "learning_rate": 9.840096980892456e-06, |
| "loss": 0.5012, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.016738333670023954, |
| "grad_norm": 14.828284045011356, |
| "learning_rate": 9.834324308722509e-06, |
| "loss": 0.4916, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.017315517589679953, |
| "grad_norm": 13.134769047818303, |
| "learning_rate": 9.828551636552562e-06, |
| "loss": 0.5023, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.01789270150933595, |
| "grad_norm": 13.132579195124707, |
| "learning_rate": 9.822778964382613e-06, |
| "loss": 0.4979, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.018469885428991948, |
| "grad_norm": 7.799223760078651, |
| "learning_rate": 9.817006292212666e-06, |
| "loss": 0.5047, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.019047069348647947, |
| "grad_norm": 9.15638603603503, |
| "learning_rate": 9.811233620042719e-06, |
| "loss": 0.5089, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.019624253268303946, |
| "grad_norm": 4.959585555459444, |
| "learning_rate": 9.805460947872772e-06, |
| "loss": 0.4952, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.020201437187959942, |
| "grad_norm": 11.68075308396199, |
| "learning_rate": 9.799688275702823e-06, |
| "loss": 0.4947, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.02077862110761594, |
| "grad_norm": 5.031742433330389, |
| "learning_rate": 9.793915603532876e-06, |
| "loss": 0.4722, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.02135580502727194, |
| "grad_norm": 7.396670940592179, |
| "learning_rate": 9.788142931362928e-06, |
| "loss": 0.4784, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.02193298894692794, |
| "grad_norm": 12.81464819318774, |
| "learning_rate": 9.78237025919298e-06, |
| "loss": 0.5016, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.02251017286658394, |
| "grad_norm": 6.952612493218752, |
| "learning_rate": 9.776597587023034e-06, |
| "loss": 0.486, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.023087356786239934, |
| "grad_norm": 3.8994319647851134, |
| "learning_rate": 9.770824914853087e-06, |
| "loss": 0.5066, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.023664540705895933, |
| "grad_norm": 8.754480087072261, |
| "learning_rate": 9.765052242683138e-06, |
| "loss": 0.477, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.024241724625551932, |
| "grad_norm": 14.00837557848952, |
| "learning_rate": 9.759279570513191e-06, |
| "loss": 0.4698, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.02481890854520793, |
| "grad_norm": 17.668700193812345, |
| "learning_rate": 9.753506898343244e-06, |
| "loss": 0.4809, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.025396092464863927, |
| "grad_norm": 7.562292913968297, |
| "learning_rate": 9.747734226173297e-06, |
| "loss": 0.5051, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.025973276384519926, |
| "grad_norm": 10.300306620163619, |
| "learning_rate": 9.741961554003348e-06, |
| "loss": 0.5036, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.026550460304175925, |
| "grad_norm": 10.310969163346487, |
| "learning_rate": 9.736188881833401e-06, |
| "loss": 0.5029, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.027127644223831925, |
| "grad_norm": 5.858481709305026, |
| "learning_rate": 9.730416209663453e-06, |
| "loss": 0.5021, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.027704828143487924, |
| "grad_norm": 92.82221880785818, |
| "learning_rate": 9.724643537493506e-06, |
| "loss": 0.487, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.02828201206314392, |
| "grad_norm": 5.060105328004147, |
| "learning_rate": 9.718870865323559e-06, |
| "loss": 0.5053, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.02885919598279992, |
| "grad_norm": 5.269210555899195, |
| "learning_rate": 9.713098193153612e-06, |
| "loss": 0.5028, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.029436379902455918, |
| "grad_norm": 2.809713279481296, |
| "learning_rate": 9.707325520983663e-06, |
| "loss": 0.4789, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.030013563822111917, |
| "grad_norm": 5.364421325217877, |
| "learning_rate": 9.701552848813716e-06, |
| "loss": 0.488, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.030590747741767916, |
| "grad_norm": 81.42225421545997, |
| "learning_rate": 9.69578017664377e-06, |
| "loss": 0.4923, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.03116793166142391, |
| "grad_norm": 5.472841855533951, |
| "learning_rate": 9.690007504473822e-06, |
| "loss": 0.4801, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.03174511558107991, |
| "grad_norm": 9.314853911550161, |
| "learning_rate": 9.684234832303874e-06, |
| "loss": 0.4889, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.03232229950073591, |
| "grad_norm": 5.657736368331141, |
| "learning_rate": 9.678462160133927e-06, |
| "loss": 0.468, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.03289948342039191, |
| "grad_norm": 5.316343227859894, |
| "learning_rate": 9.672689487963978e-06, |
| "loss": 0.4636, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.03347666734004791, |
| "grad_norm": 6.818051213532348, |
| "learning_rate": 9.666916815794033e-06, |
| "loss": 0.4785, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.03405385125970391, |
| "grad_norm": 5.515733833721638, |
| "learning_rate": 9.661144143624086e-06, |
| "loss": 0.4676, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.034631035179359906, |
| "grad_norm": 4.414342893400196, |
| "learning_rate": 9.655371471454137e-06, |
| "loss": 0.4536, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0352082190990159, |
| "grad_norm": 5.5253782212489355, |
| "learning_rate": 9.64959879928419e-06, |
| "loss": 0.4831, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0357854030186719, |
| "grad_norm": 3.47774426972913, |
| "learning_rate": 9.643826127114241e-06, |
| "loss": 0.4877, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.0363625869383279, |
| "grad_norm": 7.150841155138223, |
| "learning_rate": 9.638053454944294e-06, |
| "loss": 0.4629, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.036939770857983896, |
| "grad_norm": 3.7894713504219357, |
| "learning_rate": 9.632280782774347e-06, |
| "loss": 0.4816, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.037516954777639895, |
| "grad_norm": 5.346386715612294, |
| "learning_rate": 9.6265081106044e-06, |
| "loss": 0.4861, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.038094138697295894, |
| "grad_norm": 4.822544974681145, |
| "learning_rate": 9.620735438434452e-06, |
| "loss": 0.4756, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.03867132261695189, |
| "grad_norm": 5.810598113453792, |
| "learning_rate": 9.614962766264505e-06, |
| "loss": 0.4752, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.03924850653660789, |
| "grad_norm": 19.319543215067025, |
| "learning_rate": 9.609190094094558e-06, |
| "loss": 0.4664, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.03982569045626389, |
| "grad_norm": 3.6009359491010864, |
| "learning_rate": 9.60341742192461e-06, |
| "loss": 0.481, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.040402874375919884, |
| "grad_norm": 5.102687170049628, |
| "learning_rate": 9.597644749754662e-06, |
| "loss": 0.4751, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04098005829557588, |
| "grad_norm": 6.202922207392348, |
| "learning_rate": 9.591872077584715e-06, |
| "loss": 0.4849, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.04155724221523188, |
| "grad_norm": 7.8448485511355965, |
| "learning_rate": 9.586099405414766e-06, |
| "loss": 0.4947, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.04213442613488788, |
| "grad_norm": 3.9340219935160863, |
| "learning_rate": 9.58032673324482e-06, |
| "loss": 0.4948, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.04271161005454388, |
| "grad_norm": 4.596617570306747, |
| "learning_rate": 9.574554061074873e-06, |
| "loss": 0.4743, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.04328879397419988, |
| "grad_norm": 2.1114969175976923, |
| "learning_rate": 9.568781388904926e-06, |
| "loss": 0.4717, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.04386597789385588, |
| "grad_norm": 5.18107474670299, |
| "learning_rate": 9.563008716734977e-06, |
| "loss": 0.468, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.04444316181351188, |
| "grad_norm": 4.705370463352637, |
| "learning_rate": 9.55723604456503e-06, |
| "loss": 0.4594, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.04502034573316788, |
| "grad_norm": 4.477204626343746, |
| "learning_rate": 9.551463372395083e-06, |
| "loss": 0.4726, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.04559752965282387, |
| "grad_norm": 5.11055150918499, |
| "learning_rate": 9.545690700225136e-06, |
| "loss": 0.4878, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.04617471357247987, |
| "grad_norm": 5.92970242815697, |
| "learning_rate": 9.539918028055187e-06, |
| "loss": 0.4562, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.04675189749213587, |
| "grad_norm": 32.04814941479638, |
| "learning_rate": 9.53414535588524e-06, |
| "loss": 0.4668, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.047329081411791867, |
| "grad_norm": 7.483269872312162, |
| "learning_rate": 9.528372683715292e-06, |
| "loss": 0.4593, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.047906265331447866, |
| "grad_norm": 3.0548203710383026, |
| "learning_rate": 9.522600011545345e-06, |
| "loss": 0.4734, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.048483449251103865, |
| "grad_norm": 2.846647873568613, |
| "learning_rate": 9.516827339375398e-06, |
| "loss": 0.4583, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.049060633170759864, |
| "grad_norm": 2.9958931469528753, |
| "learning_rate": 9.51105466720545e-06, |
| "loss": 0.4503, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.04963781709041586, |
| "grad_norm": 3.669267079128399, |
| "learning_rate": 9.505281995035502e-06, |
| "loss": 0.4543, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.05021500101007186, |
| "grad_norm": 4.959841634234083, |
| "learning_rate": 9.499509322865555e-06, |
| "loss": 0.4638, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.050792184929727854, |
| "grad_norm": 6.2400754071583355, |
| "learning_rate": 9.493736650695608e-06, |
| "loss": 0.4675, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.051369368849383854, |
| "grad_norm": 4.8904554144848325, |
| "learning_rate": 9.487963978525661e-06, |
| "loss": 0.4795, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.05194655276903985, |
| "grad_norm": 5.718936473226969, |
| "learning_rate": 9.482191306355712e-06, |
| "loss": 0.475, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.05252373668869585, |
| "grad_norm": 2.813275923923208, |
| "learning_rate": 9.476418634185765e-06, |
| "loss": 0.4588, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.05310092060835185, |
| "grad_norm": 3.7622872130635825, |
| "learning_rate": 9.470645962015817e-06, |
| "loss": 0.4738, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.05367810452800785, |
| "grad_norm": 5.251759558774021, |
| "learning_rate": 9.464873289845871e-06, |
| "loss": 0.4548, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.05425528844766385, |
| "grad_norm": 4.650175199113373, |
| "learning_rate": 9.459100617675923e-06, |
| "loss": 0.469, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.05483247236731985, |
| "grad_norm": 19.07121359753558, |
| "learning_rate": 9.453327945505976e-06, |
| "loss": 0.4593, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.05540965628697585, |
| "grad_norm": 4.899239522938927, |
| "learning_rate": 9.447555273336027e-06, |
| "loss": 0.4693, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.05598684020663185, |
| "grad_norm": 11.615659586114845, |
| "learning_rate": 9.44178260116608e-06, |
| "loss": 0.4447, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.05656402412628784, |
| "grad_norm": 10.04941518596728, |
| "learning_rate": 9.436009928996133e-06, |
| "loss": 0.4617, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.05714120804594384, |
| "grad_norm": 8.509458377026059, |
| "learning_rate": 9.430237256826186e-06, |
| "loss": 0.4449, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.05771839196559984, |
| "grad_norm": 7.280354559581083, |
| "learning_rate": 9.424464584656238e-06, |
| "loss": 0.4473, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.058295575885255836, |
| "grad_norm": 6.178315104553298, |
| "learning_rate": 9.41869191248629e-06, |
| "loss": 0.4489, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.058872759804911835, |
| "grad_norm": 4.326714170134293, |
| "learning_rate": 9.412919240316344e-06, |
| "loss": 0.4421, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.059449943724567834, |
| "grad_norm": 5.121692072940591, |
| "learning_rate": 9.407146568146397e-06, |
| "loss": 0.4474, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.060027127644223834, |
| "grad_norm": 9.081808249358685, |
| "learning_rate": 9.401373895976448e-06, |
| "loss": 0.4425, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.06060431156387983, |
| "grad_norm": 3.1699426939861644, |
| "learning_rate": 9.395601223806501e-06, |
| "loss": 0.4527, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.06118149548353583, |
| "grad_norm": 4.867955044244513, |
| "learning_rate": 9.389828551636552e-06, |
| "loss": 0.4364, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.061758679403191824, |
| "grad_norm": 4.757339575426131, |
| "learning_rate": 9.384055879466605e-06, |
| "loss": 0.4744, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.06233586332284782, |
| "grad_norm": 7.0039435743401235, |
| "learning_rate": 9.378283207296658e-06, |
| "loss": 0.4542, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.06291304724250382, |
| "grad_norm": 3.3067436767056964, |
| "learning_rate": 9.372510535126711e-06, |
| "loss": 0.4434, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.06349023116215982, |
| "grad_norm": 5.215962094810896, |
| "learning_rate": 9.366737862956763e-06, |
| "loss": 0.4413, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.06406741508181582, |
| "grad_norm": 5.30554157155974, |
| "learning_rate": 9.360965190786816e-06, |
| "loss": 0.4492, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.06464459900147182, |
| "grad_norm": 9.282132535227714, |
| "learning_rate": 9.355192518616869e-06, |
| "loss": 0.4552, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.06522178292112782, |
| "grad_norm": 4.436227742317887, |
| "learning_rate": 9.349419846446922e-06, |
| "loss": 0.4629, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.06579896684078382, |
| "grad_norm": 5.072582901364672, |
| "learning_rate": 9.343647174276975e-06, |
| "loss": 0.4394, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.06637615076043982, |
| "grad_norm": 3.6484490415265287, |
| "learning_rate": 9.337874502107026e-06, |
| "loss": 0.4404, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.06695333468009582, |
| "grad_norm": 5.179047870445265, |
| "learning_rate": 9.332101829937079e-06, |
| "loss": 0.4401, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.06753051859975182, |
| "grad_norm": 5.1113945913805345, |
| "learning_rate": 9.32632915776713e-06, |
| "loss": 0.4642, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.06810770251940781, |
| "grad_norm": 13.536539735261888, |
| "learning_rate": 9.320556485597183e-06, |
| "loss": 0.44, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.06868488643906381, |
| "grad_norm": 39.38178075944957, |
| "learning_rate": 9.314783813427236e-06, |
| "loss": 0.4401, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.06926207035871981, |
| "grad_norm": 5.347846132280397, |
| "learning_rate": 9.30901114125729e-06, |
| "loss": 0.4279, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0698392542783758, |
| "grad_norm": 17.998657342305947, |
| "learning_rate": 9.30323846908734e-06, |
| "loss": 0.4289, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.0704164381980318, |
| "grad_norm": 3.352558494444376, |
| "learning_rate": 9.297465796917394e-06, |
| "loss": 0.4345, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.0709936221176878, |
| "grad_norm": 5.425024450686575, |
| "learning_rate": 9.291693124747447e-06, |
| "loss": 0.4551, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.0715708060373438, |
| "grad_norm": 5.053276208988115, |
| "learning_rate": 9.2859204525775e-06, |
| "loss": 0.455, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.0721479899569998, |
| "grad_norm": 4.369475575649534, |
| "learning_rate": 9.280147780407551e-06, |
| "loss": 0.4359, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.0727251738766558, |
| "grad_norm": 7.966064775548766, |
| "learning_rate": 9.274375108237604e-06, |
| "loss": 0.4408, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.07330235779631179, |
| "grad_norm": 2.9840558262546626, |
| "learning_rate": 9.268602436067656e-06, |
| "loss": 0.445, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.07387954171596779, |
| "grad_norm": 6.923266339159757, |
| "learning_rate": 9.26282976389771e-06, |
| "loss": 0.4319, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.07445672563562379, |
| "grad_norm": 5.445323456459659, |
| "learning_rate": 9.257057091727762e-06, |
| "loss": 0.4649, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.07503390955527979, |
| "grad_norm": 2.258392109239514, |
| "learning_rate": 9.251284419557815e-06, |
| "loss": 0.4435, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.07561109347493579, |
| "grad_norm": 6.963934239231957, |
| "learning_rate": 9.245511747387866e-06, |
| "loss": 0.4516, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.07618827739459179, |
| "grad_norm": 3.741922936601378, |
| "learning_rate": 9.239739075217919e-06, |
| "loss": 0.4457, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.07676546131424779, |
| "grad_norm": 4.697657485962023, |
| "learning_rate": 9.233966403047972e-06, |
| "loss": 0.4585, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.07734264523390379, |
| "grad_norm": 2.7800726567106886, |
| "learning_rate": 9.228193730878025e-06, |
| "loss": 0.4303, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.07791982915355979, |
| "grad_norm": 2.374574795518818, |
| "learning_rate": 9.222421058708076e-06, |
| "loss": 0.4468, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.07849701307321579, |
| "grad_norm": 3.379706889838213, |
| "learning_rate": 9.21664838653813e-06, |
| "loss": 0.4468, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.07907419699287178, |
| "grad_norm": 5.02793266796178, |
| "learning_rate": 9.210875714368182e-06, |
| "loss": 0.4541, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.07965138091252778, |
| "grad_norm": 5.377287835326619, |
| "learning_rate": 9.205103042198235e-06, |
| "loss": 0.4577, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.08022856483218377, |
| "grad_norm": 4.118238939930749, |
| "learning_rate": 9.199330370028287e-06, |
| "loss": 0.4535, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.08080574875183977, |
| "grad_norm": 3.6462071059200785, |
| "learning_rate": 9.19355769785834e-06, |
| "loss": 0.452, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.08138293267149577, |
| "grad_norm": 2.023709004387077, |
| "learning_rate": 9.187785025688391e-06, |
| "loss": 0.4385, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.08196011659115177, |
| "grad_norm": 2.718469268180074, |
| "learning_rate": 9.182012353518444e-06, |
| "loss": 0.4565, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.08253730051080777, |
| "grad_norm": 4.494982724398743, |
| "learning_rate": 9.176239681348497e-06, |
| "loss": 0.455, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.08311448443046376, |
| "grad_norm": 3.008004247279657, |
| "learning_rate": 9.17046700917855e-06, |
| "loss": 0.4338, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.08369166835011976, |
| "grad_norm": 4.471511152653035, |
| "learning_rate": 9.164694337008601e-06, |
| "loss": 0.4498, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.08426885226977576, |
| "grad_norm": 41.046308308564996, |
| "learning_rate": 9.158921664838654e-06, |
| "loss": 0.444, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.08484603618943176, |
| "grad_norm": 2.2817565591543087, |
| "learning_rate": 9.153148992668707e-06, |
| "loss": 0.4524, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.08542322010908776, |
| "grad_norm": 2.7552178530343974, |
| "learning_rate": 9.14737632049876e-06, |
| "loss": 0.4395, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.08600040402874376, |
| "grad_norm": 7.350119226751549, |
| "learning_rate": 9.141603648328812e-06, |
| "loss": 0.4439, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.08657758794839976, |
| "grad_norm": 4.927329771744563, |
| "learning_rate": 9.135830976158865e-06, |
| "loss": 0.4435, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.08715477186805576, |
| "grad_norm": 3.2061592885446433, |
| "learning_rate": 9.130058303988916e-06, |
| "loss": 0.4551, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.08773195578771176, |
| "grad_norm": 2.81815402264405, |
| "learning_rate": 9.12428563181897e-06, |
| "loss": 0.4541, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.08830913970736776, |
| "grad_norm": 1.8995196764329627, |
| "learning_rate": 9.118512959649022e-06, |
| "loss": 0.426, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.08888632362702376, |
| "grad_norm": 4.57597268577496, |
| "learning_rate": 9.112740287479075e-06, |
| "loss": 0.4388, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.08946350754667975, |
| "grad_norm": 4.888253541319005, |
| "learning_rate": 9.106967615309127e-06, |
| "loss": 0.4557, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.09004069146633575, |
| "grad_norm": 2.7824915648882853, |
| "learning_rate": 9.10119494313918e-06, |
| "loss": 0.4544, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.09061787538599175, |
| "grad_norm": 4.3535000068694645, |
| "learning_rate": 9.095422270969233e-06, |
| "loss": 0.4545, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.09119505930564774, |
| "grad_norm": 3.465323902631204, |
| "learning_rate": 9.089649598799286e-06, |
| "loss": 0.4343, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.09177224322530374, |
| "grad_norm": 3.685921584283666, |
| "learning_rate": 9.083876926629337e-06, |
| "loss": 0.4571, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.09234942714495974, |
| "grad_norm": 3.7270367548310457, |
| "learning_rate": 9.07810425445939e-06, |
| "loss": 0.4368, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.09292661106461574, |
| "grad_norm": 4.635316351567143, |
| "learning_rate": 9.072331582289441e-06, |
| "loss": 0.4333, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.09350379498427173, |
| "grad_norm": 7.835128472709014, |
| "learning_rate": 9.066558910119494e-06, |
| "loss": 0.4371, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.09408097890392773, |
| "grad_norm": 3.497453269659607, |
| "learning_rate": 9.060786237949547e-06, |
| "loss": 0.441, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.09465816282358373, |
| "grad_norm": 8.942601742638523, |
| "learning_rate": 9.0550135657796e-06, |
| "loss": 0.4381, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.09523534674323973, |
| "grad_norm": 4.589054052495082, |
| "learning_rate": 9.049240893609653e-06, |
| "loss": 0.4382, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.09581253066289573, |
| "grad_norm": 5.380659756568862, |
| "learning_rate": 9.043468221439705e-06, |
| "loss": 0.4409, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.09638971458255173, |
| "grad_norm": 2.868711750692323, |
| "learning_rate": 9.037695549269758e-06, |
| "loss": 0.4456, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.09696689850220773, |
| "grad_norm": 2.5446684388172884, |
| "learning_rate": 9.03192287709981e-06, |
| "loss": 0.4491, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.09754408242186373, |
| "grad_norm": 2.258241750087974, |
| "learning_rate": 9.026150204929864e-06, |
| "loss": 0.4366, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.09812126634151973, |
| "grad_norm": 2.602005403010541, |
| "learning_rate": 9.020377532759915e-06, |
| "loss": 0.4336, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.09869845026117573, |
| "grad_norm": 2.209187153695394, |
| "learning_rate": 9.014604860589968e-06, |
| "loss": 0.4438, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.09927563418083173, |
| "grad_norm": 3.0657261742371205, |
| "learning_rate": 9.008832188420021e-06, |
| "loss": 0.4438, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.09985281810048773, |
| "grad_norm": 3.199466707600606, |
| "learning_rate": 9.003059516250074e-06, |
| "loss": 0.4315, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.10043000202014372, |
| "grad_norm": 2.6315666639919657, |
| "learning_rate": 8.997286844080125e-06, |
| "loss": 0.4309, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.10100718593979972, |
| "grad_norm": 2.438482770459306, |
| "learning_rate": 8.991514171910178e-06, |
| "loss": 0.4385, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.10158436985945571, |
| "grad_norm": 17.29321542355523, |
| "learning_rate": 8.98574149974023e-06, |
| "loss": 0.4408, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.10216155377911171, |
| "grad_norm": 4.982684643457552, |
| "learning_rate": 8.979968827570283e-06, |
| "loss": 0.4298, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.10273873769876771, |
| "grad_norm": 6.799624435849738, |
| "learning_rate": 8.974196155400336e-06, |
| "loss": 0.4334, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.1033159216184237, |
| "grad_norm": 3.774388255492694, |
| "learning_rate": 8.968423483230389e-06, |
| "loss": 0.4308, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.1038931055380797, |
| "grad_norm": 10.033128674066695, |
| "learning_rate": 8.96265081106044e-06, |
| "loss": 0.4341, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.1044702894577357, |
| "grad_norm": 2.5219113776584123, |
| "learning_rate": 8.956878138890493e-06, |
| "loss": 0.4362, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.1050474733773917, |
| "grad_norm": 2.419786699423789, |
| "learning_rate": 8.951105466720546e-06, |
| "loss": 0.4242, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.1056246572970477, |
| "grad_norm": 60.08295959626865, |
| "learning_rate": 8.9453327945506e-06, |
| "loss": 0.4385, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.1062018412167037, |
| "grad_norm": 3.096217392102352, |
| "learning_rate": 8.93956012238065e-06, |
| "loss": 0.4241, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.1067790251363597, |
| "grad_norm": 2.5832709944291503, |
| "learning_rate": 8.933787450210704e-06, |
| "loss": 0.4361, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.1073562090560157, |
| "grad_norm": 2.3859955361814302, |
| "learning_rate": 8.928014778040755e-06, |
| "loss": 0.4213, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.1079333929756717, |
| "grad_norm": 3.223331997025027, |
| "learning_rate": 8.922242105870808e-06, |
| "loss": 0.4489, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.1085105768953277, |
| "grad_norm": 4.411834387897919, |
| "learning_rate": 8.916469433700861e-06, |
| "loss": 0.4336, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.1090877608149837, |
| "grad_norm": 9.832585313890915, |
| "learning_rate": 8.910696761530914e-06, |
| "loss": 0.4441, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.1096649447346397, |
| "grad_norm": 5.365037688190915, |
| "learning_rate": 8.904924089360965e-06, |
| "loss": 0.4367, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.1102421286542957, |
| "grad_norm": 3.819456675382363, |
| "learning_rate": 8.899151417191018e-06, |
| "loss": 0.4272, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.1108193125739517, |
| "grad_norm": 3.036048623046518, |
| "learning_rate": 8.893378745021071e-06, |
| "loss": 0.4338, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.1113964964936077, |
| "grad_norm": 3.2508150432730196, |
| "learning_rate": 8.887606072851124e-06, |
| "loss": 0.4469, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.1119736804132637, |
| "grad_norm": 2.9078235731412305, |
| "learning_rate": 8.881833400681176e-06, |
| "loss": 0.4452, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.11255086433291968, |
| "grad_norm": 2.4501912449235648, |
| "learning_rate": 8.876060728511229e-06, |
| "loss": 0.4246, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.11312804825257568, |
| "grad_norm": 6.242529869364432, |
| "learning_rate": 8.87028805634128e-06, |
| "loss": 0.4459, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.11370523217223168, |
| "grad_norm": 25.285733450369438, |
| "learning_rate": 8.864515384171333e-06, |
| "loss": 0.4377, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.11428241609188768, |
| "grad_norm": 3.177411911875137, |
| "learning_rate": 8.858742712001386e-06, |
| "loss": 0.4197, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.11485960001154367, |
| "grad_norm": 3.117021809901308, |
| "learning_rate": 8.852970039831439e-06, |
| "loss": 0.4147, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.11543678393119967, |
| "grad_norm": 3.919723512318409, |
| "learning_rate": 8.84719736766149e-06, |
| "loss": 0.4414, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.11601396785085567, |
| "grad_norm": 2.9342199285272796, |
| "learning_rate": 8.841424695491543e-06, |
| "loss": 0.427, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.11659115177051167, |
| "grad_norm": 1.918258053028722, |
| "learning_rate": 8.835652023321596e-06, |
| "loss": 0.4381, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.11716833569016767, |
| "grad_norm": 1.7751748211927454, |
| "learning_rate": 8.82987935115165e-06, |
| "loss": 0.4228, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.11774551960982367, |
| "grad_norm": 2.2012455830534385, |
| "learning_rate": 8.8241066789817e-06, |
| "loss": 0.4209, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.11832270352947967, |
| "grad_norm": 2.8237417214584544, |
| "learning_rate": 8.818334006811754e-06, |
| "loss": 0.4177, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.11889988744913567, |
| "grad_norm": 4.6392306678295805, |
| "learning_rate": 8.812561334641805e-06, |
| "loss": 0.4087, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.11947707136879167, |
| "grad_norm": 2.735063304359889, |
| "learning_rate": 8.80678866247186e-06, |
| "loss": 0.4272, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.12005425528844767, |
| "grad_norm": 3.0299802933466657, |
| "learning_rate": 8.801015990301911e-06, |
| "loss": 0.4371, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.12063143920810367, |
| "grad_norm": 5.124807390829953, |
| "learning_rate": 8.795243318131964e-06, |
| "loss": 0.4165, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.12120862312775967, |
| "grad_norm": 5.1154449505887385, |
| "learning_rate": 8.789470645962016e-06, |
| "loss": 0.4346, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.12178580704741566, |
| "grad_norm": 2.203947517060104, |
| "learning_rate": 8.783697973792069e-06, |
| "loss": 0.4225, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.12236299096707166, |
| "grad_norm": 2.1610133845450683, |
| "learning_rate": 8.777925301622122e-06, |
| "loss": 0.4121, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.12294017488672765, |
| "grad_norm": 13.953365114471524, |
| "learning_rate": 8.772152629452175e-06, |
| "loss": 0.4304, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.12351735880638365, |
| "grad_norm": 8.125499458140256, |
| "learning_rate": 8.766379957282226e-06, |
| "loss": 0.4351, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.12409454272603965, |
| "grad_norm": 10.554186301118584, |
| "learning_rate": 8.760607285112279e-06, |
| "loss": 0.4266, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.12467172664569565, |
| "grad_norm": 4.270037728186706, |
| "learning_rate": 8.754834612942332e-06, |
| "loss": 0.438, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.12524891056535165, |
| "grad_norm": 8.246386750659413, |
| "learning_rate": 8.749061940772385e-06, |
| "loss": 0.4283, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.12582609448500764, |
| "grad_norm": 3.506946209841679, |
| "learning_rate": 8.743289268602438e-06, |
| "loss": 0.4106, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.12640327840466364, |
| "grad_norm": 6.297671982703174, |
| "learning_rate": 8.73751659643249e-06, |
| "loss": 0.4329, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.12698046232431964, |
| "grad_norm": 3.18530097096491, |
| "learning_rate": 8.731743924262542e-06, |
| "loss": 0.4185, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.12755764624397564, |
| "grad_norm": 8.542498824615011, |
| "learning_rate": 8.725971252092594e-06, |
| "loss": 0.4255, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.12813483016363164, |
| "grad_norm": 4.533535099713348, |
| "learning_rate": 8.720198579922647e-06, |
| "loss": 0.423, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.12871201408328764, |
| "grad_norm": 3.889530684586912, |
| "learning_rate": 8.7144259077527e-06, |
| "loss": 0.4235, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.12928919800294364, |
| "grad_norm": 3.6486853451512777, |
| "learning_rate": 8.708653235582753e-06, |
| "loss": 0.4211, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.12986638192259964, |
| "grad_norm": 4.615918028596828, |
| "learning_rate": 8.702880563412804e-06, |
| "loss": 0.4088, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.13044356584225564, |
| "grad_norm": 3.850082033721987, |
| "learning_rate": 8.697107891242857e-06, |
| "loss": 0.4282, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.13102074976191164, |
| "grad_norm": 14.518871261878948, |
| "learning_rate": 8.69133521907291e-06, |
| "loss": 0.431, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.13159793368156764, |
| "grad_norm": 3.5577521941063797, |
| "learning_rate": 8.685562546902963e-06, |
| "loss": 0.4269, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.13217511760122364, |
| "grad_norm": 4.319284164265623, |
| "learning_rate": 8.679789874733014e-06, |
| "loss": 0.4151, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.13275230152087963, |
| "grad_norm": 5.95977091224079, |
| "learning_rate": 8.674017202563068e-06, |
| "loss": 0.434, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.13332948544053563, |
| "grad_norm": 5.717322004077494, |
| "learning_rate": 8.668244530393119e-06, |
| "loss": 0.412, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.13390666936019163, |
| "grad_norm": 4.1757793177667315, |
| "learning_rate": 8.662471858223172e-06, |
| "loss": 0.4154, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.13448385327984763, |
| "grad_norm": 2.4222311786523822, |
| "learning_rate": 8.656699186053225e-06, |
| "loss": 0.4075, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.13506103719950363, |
| "grad_norm": 4.366879694604865, |
| "learning_rate": 8.650926513883278e-06, |
| "loss": 0.4212, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.13563822111915963, |
| "grad_norm": 4.573113527280892, |
| "learning_rate": 8.64515384171333e-06, |
| "loss": 0.4235, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.13621540503881563, |
| "grad_norm": 2.7183246240824612, |
| "learning_rate": 8.639381169543382e-06, |
| "loss": 0.4067, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.13679258895847163, |
| "grad_norm": 2.858161612405957, |
| "learning_rate": 8.633608497373435e-06, |
| "loss": 0.404, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.13736977287812763, |
| "grad_norm": 4.083715288704242, |
| "learning_rate": 8.627835825203488e-06, |
| "loss": 0.4063, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.13794695679778363, |
| "grad_norm": 5.0777931108973595, |
| "learning_rate": 8.62206315303354e-06, |
| "loss": 0.422, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.13852414071743963, |
| "grad_norm": 7.507110618573909, |
| "learning_rate": 8.616290480863593e-06, |
| "loss": 0.421, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.1391013246370956, |
| "grad_norm": 5.833761997884338, |
| "learning_rate": 8.610517808693644e-06, |
| "loss": 0.4149, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.1396785085567516, |
| "grad_norm": 2.9291582156343523, |
| "learning_rate": 8.604745136523697e-06, |
| "loss": 0.4223, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.1402556924764076, |
| "grad_norm": 3.8527608900197245, |
| "learning_rate": 8.59897246435375e-06, |
| "loss": 0.4381, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.1408328763960636, |
| "grad_norm": 5.009308450797531, |
| "learning_rate": 8.593199792183803e-06, |
| "loss": 0.4265, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.1414100603157196, |
| "grad_norm": 4.305682778167611, |
| "learning_rate": 8.587427120013854e-06, |
| "loss": 0.422, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.1419872442353756, |
| "grad_norm": 4.119426729774013, |
| "learning_rate": 8.581654447843907e-06, |
| "loss": 0.4079, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.1425644281550316, |
| "grad_norm": 6.546817474930748, |
| "learning_rate": 8.57588177567396e-06, |
| "loss": 0.42, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.1431416120746876, |
| "grad_norm": 3.668016204374388, |
| "learning_rate": 8.570109103504013e-06, |
| "loss": 0.4323, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.1437187959943436, |
| "grad_norm": 4.577088984604785, |
| "learning_rate": 8.564336431334065e-06, |
| "loss": 0.412, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.1442959799139996, |
| "grad_norm": 5.57584596543327, |
| "learning_rate": 8.558563759164118e-06, |
| "loss": 0.4263, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.1448731638336556, |
| "grad_norm": 2.695769154250156, |
| "learning_rate": 8.55279108699417e-06, |
| "loss": 0.4107, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.1454503477533116, |
| "grad_norm": 13.95443843109925, |
| "learning_rate": 8.547018414824224e-06, |
| "loss": 0.4213, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.1460275316729676, |
| "grad_norm": 7.136056655011844, |
| "learning_rate": 8.541245742654275e-06, |
| "loss": 0.4274, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.14660471559262359, |
| "grad_norm": 6.544753723165297, |
| "learning_rate": 8.535473070484328e-06, |
| "loss": 0.414, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.14718189951227958, |
| "grad_norm": 5.397598920832979, |
| "learning_rate": 8.52970039831438e-06, |
| "loss": 0.4204, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.14775908343193558, |
| "grad_norm": 4.756636071432368, |
| "learning_rate": 8.523927726144432e-06, |
| "loss": 0.4124, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.14833626735159158, |
| "grad_norm": 14.516917457987418, |
| "learning_rate": 8.518155053974486e-06, |
| "loss": 0.4287, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.14891345127124758, |
| "grad_norm": 8.042684749735274, |
| "learning_rate": 8.512382381804539e-06, |
| "loss": 0.399, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.14949063519090358, |
| "grad_norm": 7.4003473379136775, |
| "learning_rate": 8.50660970963459e-06, |
| "loss": 0.4113, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.15006781911055958, |
| "grad_norm": 3.771759855408406, |
| "learning_rate": 8.500837037464643e-06, |
| "loss": 0.4056, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.15064500303021558, |
| "grad_norm": 3.6286193403799682, |
| "learning_rate": 8.495064365294696e-06, |
| "loss": 0.4246, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.15122218694987158, |
| "grad_norm": 3.863418428458885, |
| "learning_rate": 8.489291693124749e-06, |
| "loss": 0.4085, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.15179937086952758, |
| "grad_norm": 4.263367210964064, |
| "learning_rate": 8.4835190209548e-06, |
| "loss": 0.4223, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.15237655478918358, |
| "grad_norm": 3.337620340539389, |
| "learning_rate": 8.477746348784853e-06, |
| "loss": 0.4175, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.15295373870883958, |
| "grad_norm": 3.0267657042390788, |
| "learning_rate": 8.471973676614905e-06, |
| "loss": 0.4113, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.15353092262849558, |
| "grad_norm": 2.6106010567447893, |
| "learning_rate": 8.466201004444958e-06, |
| "loss": 0.4157, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.15410810654815157, |
| "grad_norm": 2.264152062153991, |
| "learning_rate": 8.46042833227501e-06, |
| "loss": 0.4338, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.15468529046780757, |
| "grad_norm": 1.9215310401757064, |
| "learning_rate": 8.454655660105064e-06, |
| "loss": 0.4114, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.15526247438746357, |
| "grad_norm": 2.615154051211967, |
| "learning_rate": 8.448882987935117e-06, |
| "loss": 0.4216, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.15583965830711957, |
| "grad_norm": 2.491915544143726, |
| "learning_rate": 8.443110315765168e-06, |
| "loss": 0.411, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.15641684222677557, |
| "grad_norm": 10.894040933901527, |
| "learning_rate": 8.437337643595221e-06, |
| "loss": 0.43, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.15699402614643157, |
| "grad_norm": 7.186343591522965, |
| "learning_rate": 8.431564971425274e-06, |
| "loss": 0.4119, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.15757121006608757, |
| "grad_norm": 11.15516113836552, |
| "learning_rate": 8.425792299255327e-06, |
| "loss": 0.4206, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.15814839398574357, |
| "grad_norm": 3.3849670049000626, |
| "learning_rate": 8.420019627085378e-06, |
| "loss": 0.4098, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.15872557790539957, |
| "grad_norm": 2.3478329302680176, |
| "learning_rate": 8.414246954915431e-06, |
| "loss": 0.4358, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.15930276182505557, |
| "grad_norm": 5.368826342998146, |
| "learning_rate": 8.408474282745483e-06, |
| "loss": 0.4114, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.15987994574471157, |
| "grad_norm": 2.778388386877281, |
| "learning_rate": 8.402701610575536e-06, |
| "loss": 0.4149, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.16045712966436754, |
| "grad_norm": 6.269723327157733, |
| "learning_rate": 8.396928938405589e-06, |
| "loss": 0.4059, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.16103431358402354, |
| "grad_norm": 3.51009855789622, |
| "learning_rate": 8.391156266235642e-06, |
| "loss": 0.4073, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.16161149750367954, |
| "grad_norm": 2.84216423707538, |
| "learning_rate": 8.385383594065693e-06, |
| "loss": 0.4249, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.16218868142333553, |
| "grad_norm": 4.458775881028344, |
| "learning_rate": 8.379610921895746e-06, |
| "loss": 0.4248, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.16276586534299153, |
| "grad_norm": 4.7717011271745875, |
| "learning_rate": 8.3738382497258e-06, |
| "loss": 0.4031, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.16334304926264753, |
| "grad_norm": 2.339306133151702, |
| "learning_rate": 8.368065577555852e-06, |
| "loss": 0.4089, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.16392023318230353, |
| "grad_norm": 2.242140863987944, |
| "learning_rate": 8.362292905385904e-06, |
| "loss": 0.4183, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.16449741710195953, |
| "grad_norm": 2.50085114867505, |
| "learning_rate": 8.356520233215957e-06, |
| "loss": 0.4209, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.16507460102161553, |
| "grad_norm": 2.426762382327375, |
| "learning_rate": 8.350747561046008e-06, |
| "loss": 0.4009, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.16565178494127153, |
| "grad_norm": 3.535990085619889, |
| "learning_rate": 8.344974888876063e-06, |
| "loss": 0.4145, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.16622896886092753, |
| "grad_norm": 1.9434196504603485, |
| "learning_rate": 8.339202216706114e-06, |
| "loss": 0.4029, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.16680615278058353, |
| "grad_norm": 3.647575465250371, |
| "learning_rate": 8.333429544536167e-06, |
| "loss": 0.4165, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.16738333670023953, |
| "grad_norm": 3.21796325121302, |
| "learning_rate": 8.327656872366218e-06, |
| "loss": 0.419, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.16796052061989553, |
| "grad_norm": 2.49775247695972, |
| "learning_rate": 8.321884200196271e-06, |
| "loss": 0.4179, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.16853770453955152, |
| "grad_norm": 1.992198339323461, |
| "learning_rate": 8.316111528026324e-06, |
| "loss": 0.4062, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.16911488845920752, |
| "grad_norm": 2.82923364634973, |
| "learning_rate": 8.310338855856377e-06, |
| "loss": 0.3997, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.16969207237886352, |
| "grad_norm": 2.414622342801528, |
| "learning_rate": 8.304566183686429e-06, |
| "loss": 0.3978, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.17026925629851952, |
| "grad_norm": 3.500929543134566, |
| "learning_rate": 8.298793511516482e-06, |
| "loss": 0.4214, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.17084644021817552, |
| "grad_norm": 4.533770301559384, |
| "learning_rate": 8.293020839346535e-06, |
| "loss": 0.4003, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.17142362413783152, |
| "grad_norm": 3.463538477874354, |
| "learning_rate": 8.287248167176588e-06, |
| "loss": 0.4049, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.17200080805748752, |
| "grad_norm": 2.6643523986040023, |
| "learning_rate": 8.281475495006639e-06, |
| "loss": 0.4157, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.17257799197714352, |
| "grad_norm": 5.1202436242415175, |
| "learning_rate": 8.275702822836692e-06, |
| "loss": 0.4119, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.17315517589679952, |
| "grad_norm": 4.375642688680107, |
| "learning_rate": 8.269930150666743e-06, |
| "loss": 0.4241, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17373235981645552, |
| "grad_norm": 13.184415266130015, |
| "learning_rate": 8.264157478496796e-06, |
| "loss": 0.3931, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.17430954373611152, |
| "grad_norm": 3.5119329436961872, |
| "learning_rate": 8.25838480632685e-06, |
| "loss": 0.4332, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.17488672765576752, |
| "grad_norm": 2.057788212146213, |
| "learning_rate": 8.252612134156902e-06, |
| "loss": 0.4026, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.17546391157542351, |
| "grad_norm": 2.4025914280540293, |
| "learning_rate": 8.246839461986954e-06, |
| "loss": 0.4297, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.1760410954950795, |
| "grad_norm": 2.3273476646183187, |
| "learning_rate": 8.241066789817007e-06, |
| "loss": 0.4256, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.1766182794147355, |
| "grad_norm": 3.968034409940849, |
| "learning_rate": 8.23529411764706e-06, |
| "loss": 0.3911, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.1771954633343915, |
| "grad_norm": 3.5987187085577648, |
| "learning_rate": 8.229521445477113e-06, |
| "loss": 0.4137, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.1777726472540475, |
| "grad_norm": 1.6685524653169845, |
| "learning_rate": 8.223748773307164e-06, |
| "loss": 0.4115, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.1783498311737035, |
| "grad_norm": 15.206856747451255, |
| "learning_rate": 8.217976101137217e-06, |
| "loss": 0.3993, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.1789270150933595, |
| "grad_norm": 3.2248196017828685, |
| "learning_rate": 8.212203428967268e-06, |
| "loss": 0.4263, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.1795041990130155, |
| "grad_norm": 5.06230398614513, |
| "learning_rate": 8.206430756797322e-06, |
| "loss": 0.4241, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.1800813829326715, |
| "grad_norm": 24.83780832952265, |
| "learning_rate": 8.200658084627375e-06, |
| "loss": 0.4139, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.1806585668523275, |
| "grad_norm": 2.8321722632943094, |
| "learning_rate": 8.194885412457428e-06, |
| "loss": 0.4233, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.1812357507719835, |
| "grad_norm": 3.0260534676681727, |
| "learning_rate": 8.189112740287479e-06, |
| "loss": 0.4142, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.18181293469163948, |
| "grad_norm": 4.68099176672326, |
| "learning_rate": 8.183340068117532e-06, |
| "loss": 0.4118, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.18239011861129548, |
| "grad_norm": 11.310308197618612, |
| "learning_rate": 8.177567395947585e-06, |
| "loss": 0.415, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.18296730253095148, |
| "grad_norm": 4.572818458115979, |
| "learning_rate": 8.171794723777638e-06, |
| "loss": 0.4015, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.18354448645060747, |
| "grad_norm": 6.450421868687323, |
| "learning_rate": 8.16602205160769e-06, |
| "loss": 0.4263, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.18412167037026347, |
| "grad_norm": 3.4296934325228263, |
| "learning_rate": 8.160249379437742e-06, |
| "loss": 0.4124, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.18469885428991947, |
| "grad_norm": 9.959813392971029, |
| "learning_rate": 8.154476707267794e-06, |
| "loss": 0.4015, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.18527603820957547, |
| "grad_norm": 3.3029855110594695, |
| "learning_rate": 8.148704035097847e-06, |
| "loss": 0.4009, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.18585322212923147, |
| "grad_norm": 2.8097447076161273, |
| "learning_rate": 8.142931362927901e-06, |
| "loss": 0.4002, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.18643040604888747, |
| "grad_norm": 34.98571611934199, |
| "learning_rate": 8.137158690757953e-06, |
| "loss": 0.405, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.18700758996854347, |
| "grad_norm": 9.148426684066074, |
| "learning_rate": 8.131386018588006e-06, |
| "loss": 0.4044, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.18758477388819947, |
| "grad_norm": 4.173548086962396, |
| "learning_rate": 8.125613346418057e-06, |
| "loss": 0.4217, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.18816195780785547, |
| "grad_norm": 53.37411486094812, |
| "learning_rate": 8.11984067424811e-06, |
| "loss": 0.4138, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.18873914172751147, |
| "grad_norm": 2.3480379802050733, |
| "learning_rate": 8.114068002078163e-06, |
| "loss": 0.4033, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.18931632564716747, |
| "grad_norm": 2.221291345204337, |
| "learning_rate": 8.108295329908216e-06, |
| "loss": 0.3914, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.18989350956682347, |
| "grad_norm": 4.990540206022043, |
| "learning_rate": 8.102522657738267e-06, |
| "loss": 0.3985, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.19047069348647946, |
| "grad_norm": 4.025051683089965, |
| "learning_rate": 8.09674998556832e-06, |
| "loss": 0.415, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.19104787740613546, |
| "grad_norm": 2.5251837331957607, |
| "learning_rate": 8.090977313398373e-06, |
| "loss": 0.4133, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.19162506132579146, |
| "grad_norm": 2.380391015882718, |
| "learning_rate": 8.085204641228426e-06, |
| "loss": 0.3926, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.19220224524544746, |
| "grad_norm": 4.41017165770354, |
| "learning_rate": 8.079431969058478e-06, |
| "loss": 0.4211, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.19277942916510346, |
| "grad_norm": 2.3063890276035846, |
| "learning_rate": 8.07365929688853e-06, |
| "loss": 0.4138, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.19335661308475946, |
| "grad_norm": 4.271620366325266, |
| "learning_rate": 8.067886624718582e-06, |
| "loss": 0.4093, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.19393379700441546, |
| "grad_norm": 32.70807377417423, |
| "learning_rate": 8.062113952548635e-06, |
| "loss": 0.4035, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.19451098092407146, |
| "grad_norm": 2.4018179935448276, |
| "learning_rate": 8.056341280378688e-06, |
| "loss": 0.4126, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.19508816484372746, |
| "grad_norm": 3.1209658711505552, |
| "learning_rate": 8.050568608208741e-06, |
| "loss": 0.4073, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.19566534876338346, |
| "grad_norm": 3.6351629831980024, |
| "learning_rate": 8.044795936038793e-06, |
| "loss": 0.4041, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.19624253268303946, |
| "grad_norm": 3.652368336229329, |
| "learning_rate": 8.039023263868846e-06, |
| "loss": 0.4112, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.19681971660269545, |
| "grad_norm": 2.984404633116569, |
| "learning_rate": 8.033250591698899e-06, |
| "loss": 0.4134, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.19739690052235145, |
| "grad_norm": 2.4526774932392024, |
| "learning_rate": 8.027477919528952e-06, |
| "loss": 0.4011, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.19797408444200745, |
| "grad_norm": 4.571072305725846, |
| "learning_rate": 8.021705247359003e-06, |
| "loss": 0.4097, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.19855126836166345, |
| "grad_norm": 2.6003306321869895, |
| "learning_rate": 8.015932575189056e-06, |
| "loss": 0.406, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.19912845228131945, |
| "grad_norm": 4.171122512202554, |
| "learning_rate": 8.010159903019107e-06, |
| "loss": 0.4083, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.19970563620097545, |
| "grad_norm": 3.2250910237655006, |
| "learning_rate": 8.00438723084916e-06, |
| "loss": 0.415, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.20028282012063145, |
| "grad_norm": 4.871724300699273, |
| "learning_rate": 7.998614558679213e-06, |
| "loss": 0.4296, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.20086000404028745, |
| "grad_norm": 2.9738579217002887, |
| "learning_rate": 7.992841886509266e-06, |
| "loss": 0.4109, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.20143718795994345, |
| "grad_norm": 5.488557624180365, |
| "learning_rate": 7.987069214339318e-06, |
| "loss": 0.4203, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.20201437187959945, |
| "grad_norm": 6.30148557707432, |
| "learning_rate": 7.98129654216937e-06, |
| "loss": 0.4166, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.20259155579925545, |
| "grad_norm": 4.159946548536443, |
| "learning_rate": 7.975523869999424e-06, |
| "loss": 0.4126, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.20316873971891142, |
| "grad_norm": 15.334889223997754, |
| "learning_rate": 7.969751197829477e-06, |
| "loss": 0.3947, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.20374592363856742, |
| "grad_norm": 3.025285897677573, |
| "learning_rate": 7.963978525659528e-06, |
| "loss": 0.403, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.20432310755822342, |
| "grad_norm": 4.262956488693212, |
| "learning_rate": 7.958205853489581e-06, |
| "loss": 0.4066, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.20490029147787941, |
| "grad_norm": 2.5780055567960143, |
| "learning_rate": 7.952433181319632e-06, |
| "loss": 0.4032, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.20547747539753541, |
| "grad_norm": 4.956342345823864, |
| "learning_rate": 7.946660509149685e-06, |
| "loss": 0.4022, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.2060546593171914, |
| "grad_norm": 3.89966793053596, |
| "learning_rate": 7.940887836979738e-06, |
| "loss": 0.4059, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.2066318432368474, |
| "grad_norm": 2.598383914711163, |
| "learning_rate": 7.935115164809791e-06, |
| "loss": 0.3988, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.2072090271565034, |
| "grad_norm": 3.673402619499888, |
| "learning_rate": 7.929342492639843e-06, |
| "loss": 0.4117, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.2077862110761594, |
| "grad_norm": 5.7308049694271235, |
| "learning_rate": 7.923569820469896e-06, |
| "loss": 0.4008, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.2083633949958154, |
| "grad_norm": 7.935773567013425, |
| "learning_rate": 7.917797148299949e-06, |
| "loss": 0.3877, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.2089405789154714, |
| "grad_norm": 6.009048915243287, |
| "learning_rate": 7.912024476130002e-06, |
| "loss": 0.4065, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.2095177628351274, |
| "grad_norm": 2.95995624359623, |
| "learning_rate": 7.906251803960053e-06, |
| "loss": 0.401, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.2100949467547834, |
| "grad_norm": 3.1037746592103255, |
| "learning_rate": 7.900479131790106e-06, |
| "loss": 0.4013, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.2106721306744394, |
| "grad_norm": 6.029705929267264, |
| "learning_rate": 7.894706459620158e-06, |
| "loss": 0.4085, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.2112493145940954, |
| "grad_norm": 5.81081103172759, |
| "learning_rate": 7.888933787450212e-06, |
| "loss": 0.4062, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.2118264985137514, |
| "grad_norm": 6.6915581684773935, |
| "learning_rate": 7.883161115280264e-06, |
| "loss": 0.4146, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.2124036824334074, |
| "grad_norm": 4.483451936143915, |
| "learning_rate": 7.877388443110317e-06, |
| "loss": 0.3964, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.2129808663530634, |
| "grad_norm": 4.847478023953505, |
| "learning_rate": 7.871615770940368e-06, |
| "loss": 0.3968, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.2135580502727194, |
| "grad_norm": 4.734123259756348, |
| "learning_rate": 7.865843098770421e-06, |
| "loss": 0.411, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.2141352341923754, |
| "grad_norm": 67.93482554004888, |
| "learning_rate": 7.860070426600474e-06, |
| "loss": 0.4171, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.2147124181120314, |
| "grad_norm": 4.427723281232474, |
| "learning_rate": 7.854297754430527e-06, |
| "loss": 0.3901, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.2152896020316874, |
| "grad_norm": 5.970970375519135, |
| "learning_rate": 7.848525082260578e-06, |
| "loss": 0.392, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.2158667859513434, |
| "grad_norm": 3.364616367304386, |
| "learning_rate": 7.842752410090631e-06, |
| "loss": 0.4262, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.2164439698709994, |
| "grad_norm": 14.709875574791262, |
| "learning_rate": 7.836979737920684e-06, |
| "loss": 0.4189, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.2170211537906554, |
| "grad_norm": 34.85728037006575, |
| "learning_rate": 7.831207065750737e-06, |
| "loss": 0.4281, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.2175983377103114, |
| "grad_norm": 3.8349995808560835, |
| "learning_rate": 7.82543439358079e-06, |
| "loss": 0.4197, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.2181755216299674, |
| "grad_norm": 9.826012532840847, |
| "learning_rate": 7.819661721410842e-06, |
| "loss": 0.411, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.2187527055496234, |
| "grad_norm": 5.148779185873613, |
| "learning_rate": 7.813889049240895e-06, |
| "loss": 0.408, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.2193298894692794, |
| "grad_norm": 6.034690169401536, |
| "learning_rate": 7.808116377070946e-06, |
| "loss": 0.389, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.2199070733889354, |
| "grad_norm": 7.98093273312519, |
| "learning_rate": 7.802343704900999e-06, |
| "loss": 0.4087, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.2204842573085914, |
| "grad_norm": 20.795705430196968, |
| "learning_rate": 7.796571032731052e-06, |
| "loss": 0.4164, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.2210614412282474, |
| "grad_norm": 4.546394166434191, |
| "learning_rate": 7.790798360561105e-06, |
| "loss": 0.4198, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.2216386251479034, |
| "grad_norm": 2.8073314189395457, |
| "learning_rate": 7.785025688391156e-06, |
| "loss": 0.4201, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.2222158090675594, |
| "grad_norm": 24.430024193972454, |
| "learning_rate": 7.77925301622121e-06, |
| "loss": 0.3916, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.2227929929872154, |
| "grad_norm": 3.7314691801815525, |
| "learning_rate": 7.773480344051262e-06, |
| "loss": 0.4034, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.2233701769068714, |
| "grad_norm": 4.463974565740604, |
| "learning_rate": 7.767707671881316e-06, |
| "loss": 0.4177, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.2239473608265274, |
| "grad_norm": 3.935819483400635, |
| "learning_rate": 7.761934999711367e-06, |
| "loss": 0.4186, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.22452454474618336, |
| "grad_norm": 2.7465902804726343, |
| "learning_rate": 7.75616232754142e-06, |
| "loss": 0.4355, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.22510172866583936, |
| "grad_norm": 2.59329896630652, |
| "learning_rate": 7.750389655371471e-06, |
| "loss": 0.4139, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.22567891258549536, |
| "grad_norm": 2.8540002686300214, |
| "learning_rate": 7.744616983201524e-06, |
| "loss": 0.4062, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.22625609650515136, |
| "grad_norm": 6.48349060337823, |
| "learning_rate": 7.738844311031577e-06, |
| "loss": 0.4002, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.22683328042480735, |
| "grad_norm": 4.668616924928805, |
| "learning_rate": 7.73307163886163e-06, |
| "loss": 0.4041, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.22741046434446335, |
| "grad_norm": 6.539752215401261, |
| "learning_rate": 7.727298966691682e-06, |
| "loss": 0.3994, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.22798764826411935, |
| "grad_norm": 3.3595541678536156, |
| "learning_rate": 7.721526294521735e-06, |
| "loss": 0.3995, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.22856483218377535, |
| "grad_norm": 2.8441727594832886, |
| "learning_rate": 7.715753622351788e-06, |
| "loss": 0.4, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.22914201610343135, |
| "grad_norm": 2.849353128300574, |
| "learning_rate": 7.70998095018184e-06, |
| "loss": 0.4048, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.22971920002308735, |
| "grad_norm": 4.881214366450045, |
| "learning_rate": 7.704208278011892e-06, |
| "loss": 0.4054, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.23029638394274335, |
| "grad_norm": 15.46486836243805, |
| "learning_rate": 7.698435605841945e-06, |
| "loss": 0.3923, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.23087356786239935, |
| "grad_norm": 3.847585477525563, |
| "learning_rate": 7.692662933671996e-06, |
| "loss": 0.393, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.23145075178205535, |
| "grad_norm": 38.3399434241166, |
| "learning_rate": 7.686890261502051e-06, |
| "loss": 0.3926, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.23202793570171135, |
| "grad_norm": 2.6580858390650466, |
| "learning_rate": 7.681117589332102e-06, |
| "loss": 0.3948, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.23260511962136735, |
| "grad_norm": 4.466615319467433, |
| "learning_rate": 7.675344917162155e-06, |
| "loss": 0.402, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.23318230354102334, |
| "grad_norm": 6.7414838396917505, |
| "learning_rate": 7.669572244992207e-06, |
| "loss": 0.3904, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.23375948746067934, |
| "grad_norm": 4.03997703346681, |
| "learning_rate": 7.66379957282226e-06, |
| "loss": 0.3938, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.23433667138033534, |
| "grad_norm": 4.258742526842458, |
| "learning_rate": 7.658026900652313e-06, |
| "loss": 0.3998, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.23491385529999134, |
| "grad_norm": 5.100923170543839, |
| "learning_rate": 7.652254228482366e-06, |
| "loss": 0.3968, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.23549103921964734, |
| "grad_norm": 8.332021271422962, |
| "learning_rate": 7.646481556312417e-06, |
| "loss": 0.4017, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.23606822313930334, |
| "grad_norm": 5.001487618559788, |
| "learning_rate": 7.64070888414247e-06, |
| "loss": 0.3929, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.23664540705895934, |
| "grad_norm": 6.605470741420995, |
| "learning_rate": 7.634936211972523e-06, |
| "loss": 0.3995, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.23722259097861534, |
| "grad_norm": 4.352594377363156, |
| "learning_rate": 7.629163539802575e-06, |
| "loss": 0.4008, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.23779977489827134, |
| "grad_norm": 8.143604964743357, |
| "learning_rate": 7.6233908676326275e-06, |
| "loss": 0.3987, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.23837695881792734, |
| "grad_norm": 3.9869800783007427, |
| "learning_rate": 7.6176181954626805e-06, |
| "loss": 0.3874, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.23895414273758334, |
| "grad_norm": 3.4272782347037207, |
| "learning_rate": 7.611845523292733e-06, |
| "loss": 0.3869, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.23953132665723934, |
| "grad_norm": 2.964033904850173, |
| "learning_rate": 7.606072851122786e-06, |
| "loss": 0.3962, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.24010851057689533, |
| "grad_norm": 55.68370603421082, |
| "learning_rate": 7.600300178952838e-06, |
| "loss": 0.3981, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.24068569449655133, |
| "grad_norm": 4.402682573412378, |
| "learning_rate": 7.594527506782891e-06, |
| "loss": 0.4152, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.24126287841620733, |
| "grad_norm": 2.8760794787596997, |
| "learning_rate": 7.588754834612942e-06, |
| "loss": 0.3881, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.24184006233586333, |
| "grad_norm": 2.0239240283122575, |
| "learning_rate": 7.582982162442995e-06, |
| "loss": 0.4023, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.24241724625551933, |
| "grad_norm": 2.5930742840295986, |
| "learning_rate": 7.577209490273047e-06, |
| "loss": 0.4146, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.24299443017517533, |
| "grad_norm": 12.25806910583576, |
| "learning_rate": 7.5714368181031e-06, |
| "loss": 0.4268, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.24357161409483133, |
| "grad_norm": 9.121606156996025, |
| "learning_rate": 7.565664145933153e-06, |
| "loss": 0.3926, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.24414879801448733, |
| "grad_norm": 9.12741140098973, |
| "learning_rate": 7.559891473763206e-06, |
| "loss": 0.3908, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.24472598193414333, |
| "grad_norm": 2.988966863298224, |
| "learning_rate": 7.554118801593258e-06, |
| "loss": 0.4077, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.24530316585379933, |
| "grad_norm": 4.755614024652895, |
| "learning_rate": 7.548346129423311e-06, |
| "loss": 0.3923, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.2458803497734553, |
| "grad_norm": 3.286123151497483, |
| "learning_rate": 7.542573457253363e-06, |
| "loss": 0.3842, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.2464575336931113, |
| "grad_norm": 24.75179268960897, |
| "learning_rate": 7.536800785083416e-06, |
| "loss": 0.4348, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.2470347176127673, |
| "grad_norm": 2.1639276020923064, |
| "learning_rate": 7.531028112913469e-06, |
| "loss": 0.4064, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.2476119015324233, |
| "grad_norm": 1.9508786971501029, |
| "learning_rate": 7.525255440743521e-06, |
| "loss": 0.4085, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.2481890854520793, |
| "grad_norm": 1.8580731020746826, |
| "learning_rate": 7.519482768573574e-06, |
| "loss": 0.4111, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.2487662693717353, |
| "grad_norm": 2.6928452549457407, |
| "learning_rate": 7.5137100964036255e-06, |
| "loss": 0.4115, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.2493434532913913, |
| "grad_norm": 2.054083128359146, |
| "learning_rate": 7.5079374242336786e-06, |
| "loss": 0.4121, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.2499206372110473, |
| "grad_norm": 2.6316000209561663, |
| "learning_rate": 7.502164752063731e-06, |
| "loss": 0.4186, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.2504978211307033, |
| "grad_norm": 1.8771833797620388, |
| "learning_rate": 7.496392079893784e-06, |
| "loss": 0.4001, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.2510750050503593, |
| "grad_norm": 9.221358255158016, |
| "learning_rate": 7.490619407723836e-06, |
| "loss": 0.4177, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.2516521889700153, |
| "grad_norm": 1.8539444504213582, |
| "learning_rate": 7.484846735553889e-06, |
| "loss": 0.4055, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.2522293728896713, |
| "grad_norm": 2.284022523834839, |
| "learning_rate": 7.479074063383941e-06, |
| "loss": 0.4228, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.2528065568093273, |
| "grad_norm": 1.8613795057409426, |
| "learning_rate": 7.473301391213994e-06, |
| "loss": 0.3886, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.2533837407289833, |
| "grad_norm": 3.7779942998503855, |
| "learning_rate": 7.467528719044046e-06, |
| "loss": 0.4158, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.2539609246486393, |
| "grad_norm": 3.7840065627156365, |
| "learning_rate": 7.461756046874099e-06, |
| "loss": 0.4187, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.2545381085682953, |
| "grad_norm": 5.099586666363089, |
| "learning_rate": 7.455983374704151e-06, |
| "loss": 0.4098, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.2551152924879513, |
| "grad_norm": 2.6870891278248337, |
| "learning_rate": 7.450210702534204e-06, |
| "loss": 0.3986, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.2556924764076073, |
| "grad_norm": 2.7248868412027583, |
| "learning_rate": 7.444438030364256e-06, |
| "loss": 0.4038, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.2562696603272633, |
| "grad_norm": 3.714403433710303, |
| "learning_rate": 7.438665358194309e-06, |
| "loss": 0.4045, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.2568468442469193, |
| "grad_norm": 2.9240340589059644, |
| "learning_rate": 7.432892686024361e-06, |
| "loss": 0.3911, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.2574240281665753, |
| "grad_norm": 4.331854988527969, |
| "learning_rate": 7.427120013854414e-06, |
| "loss": 0.394, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.2580012120862313, |
| "grad_norm": 6.84340943547103, |
| "learning_rate": 7.421347341684466e-06, |
| "loss": 0.3884, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.2585783960058873, |
| "grad_norm": 26.71610826157837, |
| "learning_rate": 7.415574669514519e-06, |
| "loss": 0.4126, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.2591555799255433, |
| "grad_norm": 3.691538028091923, |
| "learning_rate": 7.4098019973445714e-06, |
| "loss": 0.3997, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.2597327638451993, |
| "grad_norm": 3.6466304992174527, |
| "learning_rate": 7.4040293251746245e-06, |
| "loss": 0.387, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.2603099477648553, |
| "grad_norm": 3.069337367045407, |
| "learning_rate": 7.398256653004677e-06, |
| "loss": 0.3925, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.2608871316845113, |
| "grad_norm": 17.7941278304272, |
| "learning_rate": 7.39248398083473e-06, |
| "loss": 0.3904, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.2614643156041673, |
| "grad_norm": 4.010302222583594, |
| "learning_rate": 7.386711308664781e-06, |
| "loss": 0.4, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.2620414995238233, |
| "grad_norm": 2.891669458141575, |
| "learning_rate": 7.380938636494834e-06, |
| "loss": 0.3916, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.2626186834434793, |
| "grad_norm": 3.5781925847929736, |
| "learning_rate": 7.375165964324886e-06, |
| "loss": 0.3901, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.26319586736313527, |
| "grad_norm": 2.8217409668695814, |
| "learning_rate": 7.369393292154939e-06, |
| "loss": 0.41, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.26377305128279127, |
| "grad_norm": 4.326569507185014, |
| "learning_rate": 7.363620619984991e-06, |
| "loss": 0.4091, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.26435023520244727, |
| "grad_norm": 5.515247686980751, |
| "learning_rate": 7.357847947815044e-06, |
| "loss": 0.4203, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.26492741912210327, |
| "grad_norm": 6.561612435080219, |
| "learning_rate": 7.3520752756450966e-06, |
| "loss": 0.3951, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.26550460304175927, |
| "grad_norm": 2.241772546310698, |
| "learning_rate": 7.34630260347515e-06, |
| "loss": 0.3985, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.26608178696141527, |
| "grad_norm": 2.524827575292416, |
| "learning_rate": 7.340529931305202e-06, |
| "loss": 0.3981, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.26665897088107127, |
| "grad_norm": 2.4686565154848106, |
| "learning_rate": 7.334757259135255e-06, |
| "loss": 0.3948, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.26723615480072727, |
| "grad_norm": 5.055286394352697, |
| "learning_rate": 7.328984586965306e-06, |
| "loss": 0.3966, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.26781333872038326, |
| "grad_norm": 2.4105713306719023, |
| "learning_rate": 7.323211914795359e-06, |
| "loss": 0.3896, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.26839052264003926, |
| "grad_norm": 8.884358381031186, |
| "learning_rate": 7.317439242625411e-06, |
| "loss": 0.4043, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.26896770655969526, |
| "grad_norm": 3.3786577911171465, |
| "learning_rate": 7.311666570455464e-06, |
| "loss": 0.4087, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.26954489047935126, |
| "grad_norm": 2.241166004757874, |
| "learning_rate": 7.3058938982855165e-06, |
| "loss": 0.3961, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.27012207439900726, |
| "grad_norm": 3.7398767727731195, |
| "learning_rate": 7.3001212261155695e-06, |
| "loss": 0.4025, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.27069925831866326, |
| "grad_norm": 2.4627601331961024, |
| "learning_rate": 7.294348553945622e-06, |
| "loss": 0.3967, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.27127644223831926, |
| "grad_norm": 2.9400014965222243, |
| "learning_rate": 7.288575881775675e-06, |
| "loss": 0.398, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.27185362615797526, |
| "grad_norm": 2.371642161881622, |
| "learning_rate": 7.282803209605727e-06, |
| "loss": 0.3878, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.27243081007763126, |
| "grad_norm": 2.1217448647861943, |
| "learning_rate": 7.27703053743578e-06, |
| "loss": 0.3961, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.27300799399728726, |
| "grad_norm": 6.480519075871927, |
| "learning_rate": 7.271257865265832e-06, |
| "loss": 0.3904, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.27358517791694326, |
| "grad_norm": 3.7782562668503292, |
| "learning_rate": 7.265485193095885e-06, |
| "loss": 0.4015, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.27416236183659926, |
| "grad_norm": 27.34719190524476, |
| "learning_rate": 7.259712520925936e-06, |
| "loss": 0.3995, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.27473954575625525, |
| "grad_norm": 2.5984426690848044, |
| "learning_rate": 7.2539398487559894e-06, |
| "loss": 0.4091, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.27531672967591125, |
| "grad_norm": 2.269931164816007, |
| "learning_rate": 7.248167176586042e-06, |
| "loss": 0.391, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.27589391359556725, |
| "grad_norm": 2.0949435472109443, |
| "learning_rate": 7.242394504416095e-06, |
| "loss": 0.3867, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.27647109751522325, |
| "grad_norm": 2.1688865736563794, |
| "learning_rate": 7.236621832246147e-06, |
| "loss": 0.3819, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.27704828143487925, |
| "grad_norm": 3.6275115123885744, |
| "learning_rate": 7.2308491600762e-06, |
| "loss": 0.403, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.27762546535453525, |
| "grad_norm": 2.7044630613298204, |
| "learning_rate": 7.225076487906253e-06, |
| "loss": 0.401, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.2782026492741912, |
| "grad_norm": 3.6256795573786853, |
| "learning_rate": 7.219303815736305e-06, |
| "loss": 0.4008, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.2787798331938472, |
| "grad_norm": 2.3560879474365595, |
| "learning_rate": 7.213531143566358e-06, |
| "loss": 0.382, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.2793570171135032, |
| "grad_norm": 6.363609259389832, |
| "learning_rate": 7.20775847139641e-06, |
| "loss": 0.3889, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.2799342010331592, |
| "grad_norm": 2.447343796783594, |
| "learning_rate": 7.201985799226463e-06, |
| "loss": 0.3819, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.2805113849528152, |
| "grad_norm": 4.454942195776334, |
| "learning_rate": 7.1962131270565146e-06, |
| "loss": 0.4029, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.2810885688724712, |
| "grad_norm": 4.7341175135353675, |
| "learning_rate": 7.1904404548865684e-06, |
| "loss": 0.3962, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.2816657527921272, |
| "grad_norm": 1.8389081163765115, |
| "learning_rate": 7.18466778271662e-06, |
| "loss": 0.3962, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.2822429367117832, |
| "grad_norm": 3.2296499793817612, |
| "learning_rate": 7.178895110546673e-06, |
| "loss": 0.3942, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.2828201206314392, |
| "grad_norm": 3.947947823306894, |
| "learning_rate": 7.173122438376725e-06, |
| "loss": 0.3976, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.2833973045510952, |
| "grad_norm": 3.562193655967395, |
| "learning_rate": 7.167349766206778e-06, |
| "loss": 0.3876, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.2839744884707512, |
| "grad_norm": 2.0441973593521086, |
| "learning_rate": 7.16157709403683e-06, |
| "loss": 0.404, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.2845516723904072, |
| "grad_norm": 2.3917393046670075, |
| "learning_rate": 7.155804421866883e-06, |
| "loss": 0.4169, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.2851288563100632, |
| "grad_norm": 2.328855705516118, |
| "learning_rate": 7.150031749696935e-06, |
| "loss": 0.3886, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.2857060402297192, |
| "grad_norm": 5.150276479313902, |
| "learning_rate": 7.144259077526988e-06, |
| "loss": 0.401, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.2862832241493752, |
| "grad_norm": 2.4042125393360907, |
| "learning_rate": 7.1384864053570405e-06, |
| "loss": 0.4003, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.2868604080690312, |
| "grad_norm": 2.8838052574781257, |
| "learning_rate": 7.1327137331870936e-06, |
| "loss": 0.3992, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.2874375919886872, |
| "grad_norm": 3.3196900990562646, |
| "learning_rate": 7.126941061017145e-06, |
| "loss": 0.4025, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.2880147759083432, |
| "grad_norm": 5.299768426314854, |
| "learning_rate": 7.121168388847198e-06, |
| "loss": 0.3994, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.2885919598279992, |
| "grad_norm": 27.899807155688983, |
| "learning_rate": 7.11539571667725e-06, |
| "loss": 0.402, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2891691437476552, |
| "grad_norm": 2.2901385928116484, |
| "learning_rate": 7.109623044507303e-06, |
| "loss": 0.3936, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.2897463276673112, |
| "grad_norm": 5.293699045227353, |
| "learning_rate": 7.103850372337355e-06, |
| "loss": 0.3977, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.2903235115869672, |
| "grad_norm": 2.2638394900698624, |
| "learning_rate": 7.098077700167408e-06, |
| "loss": 0.3932, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.2909006955066232, |
| "grad_norm": 2.2021953017040317, |
| "learning_rate": 7.0923050279974605e-06, |
| "loss": 0.4063, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.2914778794262792, |
| "grad_norm": 3.0086020205487363, |
| "learning_rate": 7.0865323558275135e-06, |
| "loss": 0.3948, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.2920550633459352, |
| "grad_norm": 2.1426519571708615, |
| "learning_rate": 7.080759683657566e-06, |
| "loss": 0.3967, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.29263224726559117, |
| "grad_norm": 2.396350457849943, |
| "learning_rate": 7.074987011487619e-06, |
| "loss": 0.3699, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.29320943118524717, |
| "grad_norm": 48.09761207732283, |
| "learning_rate": 7.06921433931767e-06, |
| "loss": 0.3783, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.29378661510490317, |
| "grad_norm": 1.9858233601346664, |
| "learning_rate": 7.063441667147724e-06, |
| "loss": 0.3885, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.29436379902455917, |
| "grad_norm": 1.8720378976655367, |
| "learning_rate": 7.057668994977775e-06, |
| "loss": 0.408, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.29494098294421517, |
| "grad_norm": 1.9939149039664568, |
| "learning_rate": 7.051896322807828e-06, |
| "loss": 0.3954, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.29551816686387117, |
| "grad_norm": 1.654582540560213, |
| "learning_rate": 7.04612365063788e-06, |
| "loss": 0.3914, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.29609535078352717, |
| "grad_norm": 3.06484715205326, |
| "learning_rate": 7.040350978467933e-06, |
| "loss": 0.3941, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.29667253470318317, |
| "grad_norm": 4.663889722159032, |
| "learning_rate": 7.034578306297986e-06, |
| "loss": 0.3864, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.29724971862283917, |
| "grad_norm": 7.729000917336516, |
| "learning_rate": 7.028805634128039e-06, |
| "loss": 0.383, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.29782690254249516, |
| "grad_norm": 3.2930128906393357, |
| "learning_rate": 7.023032961958091e-06, |
| "loss": 0.4141, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.29840408646215116, |
| "grad_norm": 2.410906648107205, |
| "learning_rate": 7.017260289788144e-06, |
| "loss": 0.3954, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.29898127038180716, |
| "grad_norm": 2.334082494973446, |
| "learning_rate": 7.011487617618196e-06, |
| "loss": 0.3946, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.29955845430146316, |
| "grad_norm": 2.5351433587502568, |
| "learning_rate": 7.005714945448249e-06, |
| "loss": 0.3793, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.30013563822111916, |
| "grad_norm": 4.293591510370919, |
| "learning_rate": 6.9999422732783e-06, |
| "loss": 0.3932, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.30071282214077516, |
| "grad_norm": 2.7812977150432032, |
| "learning_rate": 6.994169601108353e-06, |
| "loss": 0.4031, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.30129000606043116, |
| "grad_norm": 2.6445327211143392, |
| "learning_rate": 6.9883969289384055e-06, |
| "loss": 0.3971, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.30186718998008716, |
| "grad_norm": 3.667055707656093, |
| "learning_rate": 6.9826242567684585e-06, |
| "loss": 0.3863, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.30244437389974316, |
| "grad_norm": 7.251793531417223, |
| "learning_rate": 6.976851584598511e-06, |
| "loss": 0.3957, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.30302155781939916, |
| "grad_norm": 2.3512832022952415, |
| "learning_rate": 6.971078912428564e-06, |
| "loss": 0.3795, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.30359874173905516, |
| "grad_norm": 3.8467854833379103, |
| "learning_rate": 6.965306240258616e-06, |
| "loss": 0.4022, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.30417592565871115, |
| "grad_norm": 3.452869510119873, |
| "learning_rate": 6.959533568088669e-06, |
| "loss": 0.367, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.30475310957836715, |
| "grad_norm": 1.7928454544936765, |
| "learning_rate": 6.953760895918721e-06, |
| "loss": 0.3835, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.30533029349802315, |
| "grad_norm": 3.3141188092724057, |
| "learning_rate": 6.947988223748774e-06, |
| "loss": 0.3841, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.30590747741767915, |
| "grad_norm": 7.1200801655892905, |
| "learning_rate": 6.942215551578826e-06, |
| "loss": 0.3851, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.30648466133733515, |
| "grad_norm": 2.6078657985349563, |
| "learning_rate": 6.936442879408879e-06, |
| "loss": 0.3851, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.30706184525699115, |
| "grad_norm": 3.8742908966217873, |
| "learning_rate": 6.930670207238931e-06, |
| "loss": 0.3657, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.30763902917664715, |
| "grad_norm": 2.0698861633639885, |
| "learning_rate": 6.924897535068984e-06, |
| "loss": 0.3911, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.30821621309630315, |
| "grad_norm": 2.5491051422292412, |
| "learning_rate": 6.919124862899037e-06, |
| "loss": 0.3765, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.30879339701595915, |
| "grad_norm": 3.734949482545124, |
| "learning_rate": 6.913352190729089e-06, |
| "loss": 0.3895, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.30937058093561515, |
| "grad_norm": 3.6071556886180356, |
| "learning_rate": 6.907579518559142e-06, |
| "loss": 0.3855, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.30994776485527115, |
| "grad_norm": 2.048967073465003, |
| "learning_rate": 6.901806846389194e-06, |
| "loss": 0.3955, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.31052494877492715, |
| "grad_norm": 3.6739243647918016, |
| "learning_rate": 6.896034174219247e-06, |
| "loss": 0.3886, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.31110213269458314, |
| "grad_norm": 6.86781835267949, |
| "learning_rate": 6.890261502049299e-06, |
| "loss": 0.3865, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.31167931661423914, |
| "grad_norm": 2.3848433309003445, |
| "learning_rate": 6.884488829879352e-06, |
| "loss": 0.382, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.31225650053389514, |
| "grad_norm": 4.973233732358959, |
| "learning_rate": 6.8787161577094044e-06, |
| "loss": 0.3902, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.31283368445355114, |
| "grad_norm": 4.047034417439155, |
| "learning_rate": 6.8729434855394575e-06, |
| "loss": 0.3848, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.31341086837320714, |
| "grad_norm": 2.0502326573281464, |
| "learning_rate": 6.867170813369509e-06, |
| "loss": 0.3775, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.31398805229286314, |
| "grad_norm": 2.3429460144586747, |
| "learning_rate": 6.861398141199563e-06, |
| "loss": 0.3854, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.31456523621251914, |
| "grad_norm": 2.366729120218485, |
| "learning_rate": 6.855625469029614e-06, |
| "loss": 0.392, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.31514242013217514, |
| "grad_norm": 2.846824703607735, |
| "learning_rate": 6.849852796859667e-06, |
| "loss": 0.3899, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.31571960405183114, |
| "grad_norm": 4.059470208038008, |
| "learning_rate": 6.844080124689719e-06, |
| "loss": 0.3824, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.31629678797148714, |
| "grad_norm": 3.060450364883157, |
| "learning_rate": 6.838307452519772e-06, |
| "loss": 0.386, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.31687397189114314, |
| "grad_norm": 6.0419702046843735, |
| "learning_rate": 6.832534780349824e-06, |
| "loss": 0.3989, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.31745115581079913, |
| "grad_norm": 148.73139965795136, |
| "learning_rate": 6.826762108179877e-06, |
| "loss": 0.3972, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.31802833973045513, |
| "grad_norm": 4.457525994355414, |
| "learning_rate": 6.8209894360099296e-06, |
| "loss": 0.3917, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.31860552365011113, |
| "grad_norm": 2.172441867365423, |
| "learning_rate": 6.815216763839983e-06, |
| "loss": 0.3877, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.31918270756976713, |
| "grad_norm": 3.287076044270403, |
| "learning_rate": 6.809444091670035e-06, |
| "loss": 0.3697, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.31975989148942313, |
| "grad_norm": 2.0889311701748747, |
| "learning_rate": 6.803671419500088e-06, |
| "loss": 0.3744, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.32033707540907913, |
| "grad_norm": 3.2176355512083616, |
| "learning_rate": 6.797898747330139e-06, |
| "loss": 0.3936, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.3209142593287351, |
| "grad_norm": 2.16842859391982, |
| "learning_rate": 6.792126075160192e-06, |
| "loss": 0.3945, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.3214914432483911, |
| "grad_norm": 3.8670603387604134, |
| "learning_rate": 6.786353402990244e-06, |
| "loss": 0.3883, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.3220686271680471, |
| "grad_norm": 16.096137517550215, |
| "learning_rate": 6.780580730820297e-06, |
| "loss": 0.3982, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.32264581108770307, |
| "grad_norm": 3.253563357226522, |
| "learning_rate": 6.7748080586503495e-06, |
| "loss": 0.3971, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.32322299500735907, |
| "grad_norm": 5.849409027392066, |
| "learning_rate": 6.7690353864804025e-06, |
| "loss": 0.3966, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.32380017892701507, |
| "grad_norm": 4.446139033898797, |
| "learning_rate": 6.763262714310455e-06, |
| "loss": 0.3871, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.32437736284667107, |
| "grad_norm": 2.4319593332542953, |
| "learning_rate": 6.757490042140508e-06, |
| "loss": 0.381, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.32495454676632707, |
| "grad_norm": 4.734141112466146, |
| "learning_rate": 6.75171736997056e-06, |
| "loss": 0.3688, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.32553173068598307, |
| "grad_norm": 3.0227214340364386, |
| "learning_rate": 6.745944697800613e-06, |
| "loss": 0.3939, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.32610891460563907, |
| "grad_norm": 6.9528804097069274, |
| "learning_rate": 6.740172025630664e-06, |
| "loss": 0.3859, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.32668609852529507, |
| "grad_norm": 2.4438240264660527, |
| "learning_rate": 6.734399353460718e-06, |
| "loss": 0.3929, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.32726328244495106, |
| "grad_norm": 7.001401722934106, |
| "learning_rate": 6.728626681290769e-06, |
| "loss": 0.3878, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.32784046636460706, |
| "grad_norm": 4.631134068889566, |
| "learning_rate": 6.7228540091208224e-06, |
| "loss": 0.3886, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.32841765028426306, |
| "grad_norm": 3.0416584434332274, |
| "learning_rate": 6.717081336950875e-06, |
| "loss": 0.3732, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.32899483420391906, |
| "grad_norm": 3.189118603447828, |
| "learning_rate": 6.711308664780928e-06, |
| "loss": 0.3788, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.32957201812357506, |
| "grad_norm": 4.412271751435158, |
| "learning_rate": 6.70553599261098e-06, |
| "loss": 0.3903, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.33014920204323106, |
| "grad_norm": 5.781124605717443, |
| "learning_rate": 6.699763320441033e-06, |
| "loss": 0.3742, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.33072638596288706, |
| "grad_norm": 2.38240681303682, |
| "learning_rate": 6.693990648271085e-06, |
| "loss": 0.3846, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.33130356988254306, |
| "grad_norm": 5.627940078972001, |
| "learning_rate": 6.688217976101138e-06, |
| "loss": 0.383, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.33188075380219906, |
| "grad_norm": 2.5562400913295695, |
| "learning_rate": 6.68244530393119e-06, |
| "loss": 0.388, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.33245793772185506, |
| "grad_norm": 2.009018555010131, |
| "learning_rate": 6.676672631761243e-06, |
| "loss": 0.3863, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.33303512164151106, |
| "grad_norm": 2.6584190178994223, |
| "learning_rate": 6.6708999595912945e-06, |
| "loss": 0.382, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.33361230556116706, |
| "grad_norm": 1.6637756869209672, |
| "learning_rate": 6.6651272874213476e-06, |
| "loss": 0.384, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.33418948948082305, |
| "grad_norm": 2.3195781804624174, |
| "learning_rate": 6.6593546152514e-06, |
| "loss": 0.3766, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.33476667340047905, |
| "grad_norm": 3.760084084073311, |
| "learning_rate": 6.653581943081453e-06, |
| "loss": 0.4035, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.33534385732013505, |
| "grad_norm": 2.1527295119213607, |
| "learning_rate": 6.647809270911505e-06, |
| "loss": 0.3746, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.33592104123979105, |
| "grad_norm": 2.50518271064483, |
| "learning_rate": 6.642036598741558e-06, |
| "loss": 0.3788, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.33649822515944705, |
| "grad_norm": 3.4692947058918895, |
| "learning_rate": 6.63626392657161e-06, |
| "loss": 0.3893, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.33707540907910305, |
| "grad_norm": 2.512775623033029, |
| "learning_rate": 6.630491254401663e-06, |
| "loss": 0.3719, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.33765259299875905, |
| "grad_norm": 1.9666671304858914, |
| "learning_rate": 6.624718582231716e-06, |
| "loss": 0.3785, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.33822977691841505, |
| "grad_norm": 2.724605715859374, |
| "learning_rate": 6.618945910061768e-06, |
| "loss": 0.4004, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.33880696083807105, |
| "grad_norm": 2.7489712656132044, |
| "learning_rate": 6.613173237891821e-06, |
| "loss": 0.3771, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.33938414475772705, |
| "grad_norm": 2.599485813176942, |
| "learning_rate": 6.6074005657218735e-06, |
| "loss": 0.4003, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.33996132867738305, |
| "grad_norm": 2.746831225729797, |
| "learning_rate": 6.6016278935519266e-06, |
| "loss": 0.3699, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.34053851259703904, |
| "grad_norm": 10.192368895005096, |
| "learning_rate": 6.595855221381978e-06, |
| "loss": 0.3825, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.34111569651669504, |
| "grad_norm": 2.3362074486231785, |
| "learning_rate": 6.590082549212031e-06, |
| "loss": 0.3816, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.34169288043635104, |
| "grad_norm": 3.2505345689597194, |
| "learning_rate": 6.584309877042083e-06, |
| "loss": 0.4147, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.34227006435600704, |
| "grad_norm": 2.771813125028045, |
| "learning_rate": 6.578537204872136e-06, |
| "loss": 0.3822, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.34284724827566304, |
| "grad_norm": 2.600143172725041, |
| "learning_rate": 6.572764532702188e-06, |
| "loss": 0.3718, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.34342443219531904, |
| "grad_norm": 3.904138501463535, |
| "learning_rate": 6.566991860532241e-06, |
| "loss": 0.3886, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.34400161611497504, |
| "grad_norm": 5.0319871188012515, |
| "learning_rate": 6.5612191883622935e-06, |
| "loss": 0.3877, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.34457880003463104, |
| "grad_norm": 13.625359588311552, |
| "learning_rate": 6.5554465161923465e-06, |
| "loss": 0.3687, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.34515598395428704, |
| "grad_norm": 2.920042053925227, |
| "learning_rate": 6.549673844022399e-06, |
| "loss": 0.3859, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.34573316787394304, |
| "grad_norm": 3.5277767201369223, |
| "learning_rate": 6.543901171852452e-06, |
| "loss": 0.3789, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.34631035179359904, |
| "grad_norm": 2.7899995578571426, |
| "learning_rate": 6.538128499682503e-06, |
| "loss": 0.3668, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.34688753571325504, |
| "grad_norm": 5.64453785605591, |
| "learning_rate": 6.532355827512557e-06, |
| "loss": 0.3824, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.34746471963291103, |
| "grad_norm": 3.440174338768187, |
| "learning_rate": 6.526583155342608e-06, |
| "loss": 0.3844, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.34804190355256703, |
| "grad_norm": 3.9486486049020635, |
| "learning_rate": 6.520810483172661e-06, |
| "loss": 0.3897, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.34861908747222303, |
| "grad_norm": 7.253991235141298, |
| "learning_rate": 6.515037811002713e-06, |
| "loss": 0.3677, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.34919627139187903, |
| "grad_norm": 7.656685618930045, |
| "learning_rate": 6.509265138832766e-06, |
| "loss": 0.388, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.34977345531153503, |
| "grad_norm": 2.213875264653562, |
| "learning_rate": 6.503492466662819e-06, |
| "loss": 0.3851, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.35035063923119103, |
| "grad_norm": 5.474678610165808, |
| "learning_rate": 6.497719794492872e-06, |
| "loss": 0.3777, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.35092782315084703, |
| "grad_norm": 4.538506198098333, |
| "learning_rate": 6.491947122322924e-06, |
| "loss": 0.3606, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.35150500707050303, |
| "grad_norm": 2.7367624612828627, |
| "learning_rate": 6.486174450152977e-06, |
| "loss": 0.3824, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.352082190990159, |
| "grad_norm": 3.9600996597048432, |
| "learning_rate": 6.480401777983029e-06, |
| "loss": 0.3865, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.352659374909815, |
| "grad_norm": 6.138903729575434, |
| "learning_rate": 6.474629105813082e-06, |
| "loss": 0.3836, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.353236558829471, |
| "grad_norm": 3.1110082739843676, |
| "learning_rate": 6.468856433643133e-06, |
| "loss": 0.3941, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.353813742749127, |
| "grad_norm": 3.0125957250660997, |
| "learning_rate": 6.463083761473186e-06, |
| "loss": 0.3907, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.354390926668783, |
| "grad_norm": 5.777332948819984, |
| "learning_rate": 6.4573110893032385e-06, |
| "loss": 0.3926, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.354968110588439, |
| "grad_norm": 10.69646287431515, |
| "learning_rate": 6.4515384171332915e-06, |
| "loss": 0.3892, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.355545294508095, |
| "grad_norm": 5.389336302495197, |
| "learning_rate": 6.445765744963344e-06, |
| "loss": 0.3957, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.356122478427751, |
| "grad_norm": 12.802235491479053, |
| "learning_rate": 6.439993072793397e-06, |
| "loss": 0.3742, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.356699662347407, |
| "grad_norm": 4.196338750242119, |
| "learning_rate": 6.434220400623449e-06, |
| "loss": 0.3878, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.357276846267063, |
| "grad_norm": 3.7684375534000276, |
| "learning_rate": 6.428447728453502e-06, |
| "loss": 0.3724, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.357854030186719, |
| "grad_norm": 2.4825477710744446, |
| "learning_rate": 6.422675056283554e-06, |
| "loss": 0.3665, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.358431214106375, |
| "grad_norm": 2.5273547043428244, |
| "learning_rate": 6.416902384113607e-06, |
| "loss": 0.3682, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.359008398026031, |
| "grad_norm": 3.3691141387535453, |
| "learning_rate": 6.4111297119436584e-06, |
| "loss": 0.3884, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.359585581945687, |
| "grad_norm": 3.986041227799815, |
| "learning_rate": 6.405357039773712e-06, |
| "loss": 0.3793, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.360162765865343, |
| "grad_norm": 4.388692532796717, |
| "learning_rate": 6.399584367603764e-06, |
| "loss": 0.3826, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.360739949784999, |
| "grad_norm": 5.61124433419293, |
| "learning_rate": 6.393811695433817e-06, |
| "loss": 0.3725, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.361317133704655, |
| "grad_norm": 5.79217310796387, |
| "learning_rate": 6.388039023263869e-06, |
| "loss": 0.3803, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.361894317624311, |
| "grad_norm": 3.3092133128777017, |
| "learning_rate": 6.382266351093922e-06, |
| "loss": 0.3727, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.362471501543967, |
| "grad_norm": 2.6436967571480308, |
| "learning_rate": 6.376493678923974e-06, |
| "loss": 0.3869, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.363048685463623, |
| "grad_norm": 4.870192599092706, |
| "learning_rate": 6.370721006754027e-06, |
| "loss": 0.3711, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.36362586938327895, |
| "grad_norm": 6.412850489358521, |
| "learning_rate": 6.364948334584079e-06, |
| "loss": 0.3778, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.36420305330293495, |
| "grad_norm": 6.723734658950526, |
| "learning_rate": 6.359175662414132e-06, |
| "loss": 0.375, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.36478023722259095, |
| "grad_norm": 2.9855811704461916, |
| "learning_rate": 6.353402990244184e-06, |
| "loss": 0.3968, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.36535742114224695, |
| "grad_norm": 4.253318369577758, |
| "learning_rate": 6.3476303180742374e-06, |
| "loss": 0.3835, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.36593460506190295, |
| "grad_norm": 3.350507256204714, |
| "learning_rate": 6.341857645904289e-06, |
| "loss": 0.3934, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.36651178898155895, |
| "grad_norm": 7.6596745260829024, |
| "learning_rate": 6.336084973734342e-06, |
| "loss": 0.3695, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.36708897290121495, |
| "grad_norm": 5.170501025053992, |
| "learning_rate": 6.330312301564394e-06, |
| "loss": 0.3822, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.36766615682087095, |
| "grad_norm": 3.3572394366722342, |
| "learning_rate": 6.324539629394447e-06, |
| "loss": 0.3756, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.36824334074052695, |
| "grad_norm": 4.222113472184697, |
| "learning_rate": 6.3187669572245e-06, |
| "loss": 0.3934, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.36882052466018295, |
| "grad_norm": 4.239041230078147, |
| "learning_rate": 6.312994285054552e-06, |
| "loss": 0.3841, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.36939770857983895, |
| "grad_norm": 3.8859530952931425, |
| "learning_rate": 6.307221612884605e-06, |
| "loss": 0.3747, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.36997489249949495, |
| "grad_norm": 5.0107950962438315, |
| "learning_rate": 6.301448940714657e-06, |
| "loss": 0.3781, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.37055207641915094, |
| "grad_norm": 19.339302178387896, |
| "learning_rate": 6.29567626854471e-06, |
| "loss": 0.3804, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.37112926033880694, |
| "grad_norm": 103.6902739739912, |
| "learning_rate": 6.2899035963747626e-06, |
| "loss": 0.3614, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.37170644425846294, |
| "grad_norm": 9.795833830078546, |
| "learning_rate": 6.284130924204816e-06, |
| "loss": 0.3806, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.37228362817811894, |
| "grad_norm": 2.835184422290757, |
| "learning_rate": 6.278358252034868e-06, |
| "loss": 0.3635, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.37286081209777494, |
| "grad_norm": 17.867434742010555, |
| "learning_rate": 6.272585579864921e-06, |
| "loss": 0.3825, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.37343799601743094, |
| "grad_norm": 3.5064803159499554, |
| "learning_rate": 6.266812907694972e-06, |
| "loss": 0.3777, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.37401517993708694, |
| "grad_norm": 6.18556265562049, |
| "learning_rate": 6.261040235525025e-06, |
| "loss": 0.354, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.37459236385674294, |
| "grad_norm": 5.3643329344286, |
| "learning_rate": 6.255267563355077e-06, |
| "loss": 0.3653, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.37516954777639894, |
| "grad_norm": 4.370066633666132, |
| "learning_rate": 6.24949489118513e-06, |
| "loss": 0.3799, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.37574673169605494, |
| "grad_norm": 3.802802160469247, |
| "learning_rate": 6.2437222190151825e-06, |
| "loss": 0.3771, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.37632391561571094, |
| "grad_norm": 2.999312565631662, |
| "learning_rate": 6.2379495468452355e-06, |
| "loss": 0.3761, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.37690109953536693, |
| "grad_norm": 7.852310497644898, |
| "learning_rate": 6.232176874675288e-06, |
| "loss": 0.3823, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.37747828345502293, |
| "grad_norm": 4.876630434197547, |
| "learning_rate": 6.226404202505341e-06, |
| "loss": 0.3704, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.37805546737467893, |
| "grad_norm": 4.989568751322678, |
| "learning_rate": 6.220631530335393e-06, |
| "loss": 0.3836, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.37863265129433493, |
| "grad_norm": 3.7548796873491135, |
| "learning_rate": 6.214858858165446e-06, |
| "loss": 0.3759, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.37920983521399093, |
| "grad_norm": 2.7376006597130513, |
| "learning_rate": 6.209086185995497e-06, |
| "loss": 0.3825, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.37978701913364693, |
| "grad_norm": 3.835955921955649, |
| "learning_rate": 6.203313513825551e-06, |
| "loss": 0.3892, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.38036420305330293, |
| "grad_norm": 2.728138816573778, |
| "learning_rate": 6.197540841655602e-06, |
| "loss": 0.3806, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.38094138697295893, |
| "grad_norm": 6.331854012865428, |
| "learning_rate": 6.1917681694856554e-06, |
| "loss": 0.3849, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.3815185708926149, |
| "grad_norm": 2.7712160688455394, |
| "learning_rate": 6.185995497315708e-06, |
| "loss": 0.366, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.3820957548122709, |
| "grad_norm": 2.2697691277132406, |
| "learning_rate": 6.180222825145761e-06, |
| "loss": 0.3625, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.3826729387319269, |
| "grad_norm": 4.51763189851551, |
| "learning_rate": 6.174450152975813e-06, |
| "loss": 0.3745, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.3832501226515829, |
| "grad_norm": 22.00920716007038, |
| "learning_rate": 6.168677480805866e-06, |
| "loss": 0.379, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.3838273065712389, |
| "grad_norm": 8.50487988964264, |
| "learning_rate": 6.162904808635918e-06, |
| "loss": 0.3798, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.3844044904908949, |
| "grad_norm": 2.756542308650349, |
| "learning_rate": 6.157132136465971e-06, |
| "loss": 0.3777, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.3849816744105509, |
| "grad_norm": 1.8967629666152492, |
| "learning_rate": 6.151359464296023e-06, |
| "loss": 0.3536, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.3855588583302069, |
| "grad_norm": 2.208647530669507, |
| "learning_rate": 6.145586792126076e-06, |
| "loss": 0.3757, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.3861360422498629, |
| "grad_norm": 3.18500818944882, |
| "learning_rate": 6.1398141199561275e-06, |
| "loss": 0.381, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.3867132261695189, |
| "grad_norm": 3.7319272107204267, |
| "learning_rate": 6.1340414477861806e-06, |
| "loss": 0.3895, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.3872904100891749, |
| "grad_norm": 6.233879379077169, |
| "learning_rate": 6.128268775616233e-06, |
| "loss": 0.3931, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.3878675940088309, |
| "grad_norm": 2.5172058960090147, |
| "learning_rate": 6.122496103446286e-06, |
| "loss": 0.3696, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.3884447779284869, |
| "grad_norm": 2.4821687545852544, |
| "learning_rate": 6.116723431276338e-06, |
| "loss": 0.3783, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.3890219618481429, |
| "grad_norm": 2.5811379708324984, |
| "learning_rate": 6.110950759106391e-06, |
| "loss": 0.3883, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.3895991457677989, |
| "grad_norm": 4.606721510393016, |
| "learning_rate": 6.105178086936443e-06, |
| "loss": 0.364, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.3901763296874549, |
| "grad_norm": 5.353229433626119, |
| "learning_rate": 6.099405414766496e-06, |
| "loss": 0.3882, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.3907535136071109, |
| "grad_norm": 2.3516345262109617, |
| "learning_rate": 6.093632742596548e-06, |
| "loss": 0.3788, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.3913306975267669, |
| "grad_norm": 11.487680253286674, |
| "learning_rate": 6.087860070426601e-06, |
| "loss": 0.3889, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.3919078814464229, |
| "grad_norm": 3.18290202413646, |
| "learning_rate": 6.082087398256653e-06, |
| "loss": 0.3607, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.3924850653660789, |
| "grad_norm": 2.7380986355865917, |
| "learning_rate": 6.0763147260867065e-06, |
| "loss": 0.3809, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.3930622492857349, |
| "grad_norm": 2.985403565819371, |
| "learning_rate": 6.070542053916758e-06, |
| "loss": 0.3785, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.3936394332053909, |
| "grad_norm": 7.257850197480963, |
| "learning_rate": 6.064769381746811e-06, |
| "loss": 0.3813, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.3942166171250469, |
| "grad_norm": 2.651981631840983, |
| "learning_rate": 6.058996709576863e-06, |
| "loss": 0.3911, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.3947938010447029, |
| "grad_norm": 3.007540853480136, |
| "learning_rate": 6.053224037406916e-06, |
| "loss": 0.3787, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.3953709849643589, |
| "grad_norm": 4.967113215124695, |
| "learning_rate": 6.047451365236968e-06, |
| "loss": 0.3729, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.3959481688840149, |
| "grad_norm": 2.4113519734571627, |
| "learning_rate": 6.041678693067021e-06, |
| "loss": 0.3576, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.3965253528036709, |
| "grad_norm": 1.5215990778439656, |
| "learning_rate": 6.0359060208970734e-06, |
| "loss": 0.3813, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.3971025367233269, |
| "grad_norm": 1.9980571139407164, |
| "learning_rate": 6.0301333487271265e-06, |
| "loss": 0.3764, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.3976797206429829, |
| "grad_norm": 3.851850368869639, |
| "learning_rate": 6.024360676557179e-06, |
| "loss": 0.3793, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.3982569045626389, |
| "grad_norm": 2.819413612633571, |
| "learning_rate": 6.018588004387232e-06, |
| "loss": 0.3915, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.3988340884822949, |
| "grad_norm": 2.2801532893497733, |
| "learning_rate": 6.012815332217285e-06, |
| "loss": 0.3927, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.3994112724019509, |
| "grad_norm": 3.0396536780138734, |
| "learning_rate": 6.007042660047336e-06, |
| "loss": 0.3787, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.3999884563216069, |
| "grad_norm": 1.9908365824878806, |
| "learning_rate": 6.00126998787739e-06, |
| "loss": 0.3751, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.4005656402412629, |
| "grad_norm": 2.5287281468598817, |
| "learning_rate": 5.995497315707441e-06, |
| "loss": 0.3739, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.4011428241609189, |
| "grad_norm": 2.3877649628077404, |
| "learning_rate": 5.989724643537494e-06, |
| "loss": 0.3931, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.4017200080805749, |
| "grad_norm": 1.751274625854989, |
| "learning_rate": 5.983951971367546e-06, |
| "loss": 0.3725, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.4022971920002309, |
| "grad_norm": 7.097265852789204, |
| "learning_rate": 5.978179299197599e-06, |
| "loss": 0.3771, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.4028743759198869, |
| "grad_norm": 1.5751898011207688, |
| "learning_rate": 5.972406627027652e-06, |
| "loss": 0.3804, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.4034515598395429, |
| "grad_norm": 3.487612778253862, |
| "learning_rate": 5.966633954857705e-06, |
| "loss": 0.3677, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.4040287437591989, |
| "grad_norm": 2.622057972311098, |
| "learning_rate": 5.960861282687757e-06, |
| "loss": 0.3778, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.4046059276788549, |
| "grad_norm": 2.7368469799858532, |
| "learning_rate": 5.95508861051781e-06, |
| "loss": 0.3768, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.4051831115985109, |
| "grad_norm": 1.6133398127083427, |
| "learning_rate": 5.949315938347862e-06, |
| "loss": 0.3799, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.4057602955181669, |
| "grad_norm": 3.191334805976918, |
| "learning_rate": 5.943543266177915e-06, |
| "loss": 0.3813, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.40633747943782283, |
| "grad_norm": 2.8991810624406784, |
| "learning_rate": 5.937770594007966e-06, |
| "loss": 0.376, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.40691466335747883, |
| "grad_norm": 2.0785390805202684, |
| "learning_rate": 5.931997921838019e-06, |
| "loss": 0.3729, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.40749184727713483, |
| "grad_norm": 1.9512094562324862, |
| "learning_rate": 5.9262252496680715e-06, |
| "loss": 0.3732, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.40806903119679083, |
| "grad_norm": 3.3176725840902206, |
| "learning_rate": 5.9204525774981245e-06, |
| "loss": 0.3874, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.40864621511644683, |
| "grad_norm": 3.000837724994079, |
| "learning_rate": 5.914679905328177e-06, |
| "loss": 0.3745, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.40922339903610283, |
| "grad_norm": 1.8158962267665133, |
| "learning_rate": 5.90890723315823e-06, |
| "loss": 0.3756, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.40980058295575883, |
| "grad_norm": 2.324389501252935, |
| "learning_rate": 5.903134560988282e-06, |
| "loss": 0.3886, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.41037776687541483, |
| "grad_norm": 2.894571332845524, |
| "learning_rate": 5.897361888818335e-06, |
| "loss": 0.3869, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.41095495079507083, |
| "grad_norm": 2.629677485680801, |
| "learning_rate": 5.891589216648387e-06, |
| "loss": 0.3615, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.4115321347147268, |
| "grad_norm": 13.759434005163566, |
| "learning_rate": 5.88581654447844e-06, |
| "loss": 0.3678, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.4121093186343828, |
| "grad_norm": 3.187249272214218, |
| "learning_rate": 5.8800438723084915e-06, |
| "loss": 0.3496, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.4126865025540388, |
| "grad_norm": 25.01828326406148, |
| "learning_rate": 5.874271200138545e-06, |
| "loss": 0.3831, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.4132636864736948, |
| "grad_norm": 4.28899556920541, |
| "learning_rate": 5.868498527968597e-06, |
| "loss": 0.3847, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.4138408703933508, |
| "grad_norm": 5.017592395582479, |
| "learning_rate": 5.86272585579865e-06, |
| "loss": 0.3528, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.4144180543130068, |
| "grad_norm": 3.6138133944499686, |
| "learning_rate": 5.856953183628702e-06, |
| "loss": 0.3615, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.4149952382326628, |
| "grad_norm": 11.345281193048963, |
| "learning_rate": 5.851180511458755e-06, |
| "loss": 0.36, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.4155724221523188, |
| "grad_norm": 4.1575514029124525, |
| "learning_rate": 5.845407839288807e-06, |
| "loss": 0.3707, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.4161496060719748, |
| "grad_norm": 5.184879687211155, |
| "learning_rate": 5.83963516711886e-06, |
| "loss": 0.3584, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.4167267899916308, |
| "grad_norm": 3.6353294525038256, |
| "learning_rate": 5.833862494948912e-06, |
| "loss": 0.3922, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.4173039739112868, |
| "grad_norm": 10.083912587939164, |
| "learning_rate": 5.828089822778965e-06, |
| "loss": 0.358, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.4178811578309428, |
| "grad_norm": 3.795430776940293, |
| "learning_rate": 5.822317150609017e-06, |
| "loss": 0.3584, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.4184583417505988, |
| "grad_norm": 2.735432424886805, |
| "learning_rate": 5.8165444784390704e-06, |
| "loss": 0.3628, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.4190355256702548, |
| "grad_norm": 3.7538394849350034, |
| "learning_rate": 5.810771806269122e-06, |
| "loss": 0.3808, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.4196127095899108, |
| "grad_norm": 3.486146744000872, |
| "learning_rate": 5.804999134099175e-06, |
| "loss": 0.3677, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.4201898935095668, |
| "grad_norm": 6.482596192596544, |
| "learning_rate": 5.799226461929227e-06, |
| "loss": 0.3715, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.4207670774292228, |
| "grad_norm": 15.287481532081374, |
| "learning_rate": 5.79345378975928e-06, |
| "loss": 0.3607, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.4213442613488788, |
| "grad_norm": 5.756011268210783, |
| "learning_rate": 5.787681117589332e-06, |
| "loss": 0.3829, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.4219214452685348, |
| "grad_norm": 5.238271188240731, |
| "learning_rate": 5.781908445419385e-06, |
| "loss": 0.3685, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.4224986291881908, |
| "grad_norm": 5.072523904302979, |
| "learning_rate": 5.776135773249437e-06, |
| "loss": 0.3785, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.4230758131078468, |
| "grad_norm": 2.7230926250144494, |
| "learning_rate": 5.77036310107949e-06, |
| "loss": 0.3586, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.4236529970275028, |
| "grad_norm": 3.1651643016202438, |
| "learning_rate": 5.7645904289095425e-06, |
| "loss": 0.3675, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.4242301809471588, |
| "grad_norm": 5.575569273909336, |
| "learning_rate": 5.7588177567395956e-06, |
| "loss": 0.3659, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.4248073648668148, |
| "grad_norm": 3.4372405530276686, |
| "learning_rate": 5.753045084569647e-06, |
| "loss": 0.3562, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.4253845487864708, |
| "grad_norm": 3.1380962366302203, |
| "learning_rate": 5.747272412399701e-06, |
| "loss": 0.3665, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.4259617327061268, |
| "grad_norm": 4.195020514299469, |
| "learning_rate": 5.741499740229752e-06, |
| "loss": 0.3834, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.4265389166257828, |
| "grad_norm": 2.5201571788814103, |
| "learning_rate": 5.735727068059805e-06, |
| "loss": 0.3598, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.4271161005454388, |
| "grad_norm": 7.757366621212017, |
| "learning_rate": 5.729954395889857e-06, |
| "loss": 0.365, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.4276932844650948, |
| "grad_norm": 3.6863947123217438, |
| "learning_rate": 5.72418172371991e-06, |
| "loss": 0.3605, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.4282704683847508, |
| "grad_norm": 2.713386138832286, |
| "learning_rate": 5.7184090515499625e-06, |
| "loss": 0.3692, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.4288476523044068, |
| "grad_norm": 4.061486235134526, |
| "learning_rate": 5.7126363793800155e-06, |
| "loss": 0.3615, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.4294248362240628, |
| "grad_norm": 3.171386095616653, |
| "learning_rate": 5.7068637072100685e-06, |
| "loss": 0.3742, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.4300020201437188, |
| "grad_norm": 3.0632786743675173, |
| "learning_rate": 5.701091035040121e-06, |
| "loss": 0.3669, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.4305792040633748, |
| "grad_norm": 5.9823682538619884, |
| "learning_rate": 5.695318362870174e-06, |
| "loss": 0.3625, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.4311563879830308, |
| "grad_norm": 2.587936253733615, |
| "learning_rate": 5.689545690700226e-06, |
| "loss": 0.3654, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.4317335719026868, |
| "grad_norm": 3.3311960193145507, |
| "learning_rate": 5.683773018530279e-06, |
| "loss": 0.3849, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.4323107558223428, |
| "grad_norm": 7.2504266943512885, |
| "learning_rate": 5.67800034636033e-06, |
| "loss": 0.383, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.4328879397419988, |
| "grad_norm": 2.565927845188403, |
| "learning_rate": 5.672227674190383e-06, |
| "loss": 0.3689, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.4334651236616548, |
| "grad_norm": 2.596436540237007, |
| "learning_rate": 5.6664550020204354e-06, |
| "loss": 0.3783, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.4340423075813108, |
| "grad_norm": 3.2568921956880397, |
| "learning_rate": 5.6606823298504884e-06, |
| "loss": 0.3622, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.4346194915009668, |
| "grad_norm": 3.534180092969136, |
| "learning_rate": 5.654909657680541e-06, |
| "loss": 0.374, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.4351966754206228, |
| "grad_norm": 2.320956209280894, |
| "learning_rate": 5.649136985510594e-06, |
| "loss": 0.3618, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.4357738593402788, |
| "grad_norm": 2.7986565358175732, |
| "learning_rate": 5.643364313340646e-06, |
| "loss": 0.376, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.4363510432599348, |
| "grad_norm": 4.3640055595975955, |
| "learning_rate": 5.637591641170699e-06, |
| "loss": 0.359, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.4369282271795908, |
| "grad_norm": 2.4295878318318893, |
| "learning_rate": 5.631818969000751e-06, |
| "loss": 0.3837, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.4375054110992468, |
| "grad_norm": 2.5358015892610304, |
| "learning_rate": 5.626046296830804e-06, |
| "loss": 0.3824, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.4380825950189028, |
| "grad_norm": 2.732560193932699, |
| "learning_rate": 5.620273624660856e-06, |
| "loss": 0.3657, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.4386597789385588, |
| "grad_norm": 4.150107259821488, |
| "learning_rate": 5.614500952490909e-06, |
| "loss": 0.3805, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.4392369628582148, |
| "grad_norm": 6.027002919837396, |
| "learning_rate": 5.6087282803209605e-06, |
| "loss": 0.3733, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.4398141467778708, |
| "grad_norm": 4.383047686001244, |
| "learning_rate": 5.6029556081510136e-06, |
| "loss": 0.3798, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.4403913306975268, |
| "grad_norm": 3.183548428631444, |
| "learning_rate": 5.597182935981066e-06, |
| "loss": 0.3704, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.4409685146171828, |
| "grad_norm": 3.2995502847867364, |
| "learning_rate": 5.591410263811119e-06, |
| "loss": 0.3868, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.4415456985368388, |
| "grad_norm": 2.9302522543070384, |
| "learning_rate": 5.585637591641171e-06, |
| "loss": 0.3719, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.4421228824564948, |
| "grad_norm": 3.699827330713927, |
| "learning_rate": 5.579864919471224e-06, |
| "loss": 0.3843, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.4427000663761508, |
| "grad_norm": 10.131740140492866, |
| "learning_rate": 5.574092247301276e-06, |
| "loss": 0.3741, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.4432772502958068, |
| "grad_norm": 2.53184828015941, |
| "learning_rate": 5.568319575131329e-06, |
| "loss": 0.3679, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.4438544342154628, |
| "grad_norm": 2.735336367396379, |
| "learning_rate": 5.562546902961381e-06, |
| "loss": 0.3794, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.4444316181351188, |
| "grad_norm": 3.118950089045635, |
| "learning_rate": 5.556774230791434e-06, |
| "loss": 0.3749, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.4450088020547748, |
| "grad_norm": 2.0345152708541736, |
| "learning_rate": 5.551001558621486e-06, |
| "loss": 0.3797, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.4455859859744308, |
| "grad_norm": 2.216729946357023, |
| "learning_rate": 5.5452288864515395e-06, |
| "loss": 0.3704, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.4461631698940868, |
| "grad_norm": 11.605317744790039, |
| "learning_rate": 5.539456214281591e-06, |
| "loss": 0.3826, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.4467403538137428, |
| "grad_norm": 5.555060033849291, |
| "learning_rate": 5.533683542111644e-06, |
| "loss": 0.3768, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.4473175377333988, |
| "grad_norm": 2.5005756710793507, |
| "learning_rate": 5.527910869941696e-06, |
| "loss": 0.3667, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.4478947216530548, |
| "grad_norm": 4.5009505264076655, |
| "learning_rate": 5.522138197771749e-06, |
| "loss": 0.3773, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.44847190557271077, |
| "grad_norm": 2.6271424623008945, |
| "learning_rate": 5.516365525601801e-06, |
| "loss": 0.3737, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.4490490894923667, |
| "grad_norm": 12.12839502479119, |
| "learning_rate": 5.510592853431854e-06, |
| "loss": 0.3573, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.4496262734120227, |
| "grad_norm": 4.194839597547066, |
| "learning_rate": 5.5048201812619064e-06, |
| "loss": 0.3774, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.4502034573316787, |
| "grad_norm": 2.1887367164733016, |
| "learning_rate": 5.4990475090919595e-06, |
| "loss": 0.381, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.4507806412513347, |
| "grad_norm": 3.1886528624855925, |
| "learning_rate": 5.493274836922012e-06, |
| "loss": 0.37, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.4513578251709907, |
| "grad_norm": 2.4110545743480527, |
| "learning_rate": 5.487502164752065e-06, |
| "loss": 0.3604, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.4519350090906467, |
| "grad_norm": 2.9847050808092166, |
| "learning_rate": 5.481729492582116e-06, |
| "loss": 0.3675, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.4525121930103027, |
| "grad_norm": 5.885118240316819, |
| "learning_rate": 5.475956820412169e-06, |
| "loss": 0.3856, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.4530893769299587, |
| "grad_norm": 5.303575867358966, |
| "learning_rate": 5.470184148242221e-06, |
| "loss": 0.3607, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.4536665608496147, |
| "grad_norm": 7.1549036006295035, |
| "learning_rate": 5.464411476072274e-06, |
| "loss": 0.3699, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.4542437447692707, |
| "grad_norm": 1.8926865310221554, |
| "learning_rate": 5.458638803902326e-06, |
| "loss": 0.3676, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.4548209286889267, |
| "grad_norm": 4.130403133399794, |
| "learning_rate": 5.452866131732379e-06, |
| "loss": 0.3624, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.4553981126085827, |
| "grad_norm": 3.4241484954051677, |
| "learning_rate": 5.4470934595624316e-06, |
| "loss": 0.3652, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.4559752965282387, |
| "grad_norm": 7.8305369558715725, |
| "learning_rate": 5.441320787392485e-06, |
| "loss": 0.35, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.4565524804478947, |
| "grad_norm": 7.1372161394964575, |
| "learning_rate": 5.435548115222537e-06, |
| "loss": 0.3742, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.4571296643675507, |
| "grad_norm": 20.273585832785447, |
| "learning_rate": 5.42977544305259e-06, |
| "loss": 0.3769, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.4577068482872067, |
| "grad_norm": 2.794498898766565, |
| "learning_rate": 5.424002770882641e-06, |
| "loss": 0.384, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.4582840322068627, |
| "grad_norm": 7.066469891261649, |
| "learning_rate": 5.418230098712695e-06, |
| "loss": 0.3795, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.4588612161265187, |
| "grad_norm": 2.8353456413911737, |
| "learning_rate": 5.412457426542746e-06, |
| "loss": 0.3643, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.4594384000461747, |
| "grad_norm": 3.0383570429357345, |
| "learning_rate": 5.406684754372799e-06, |
| "loss": 0.3718, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.4600155839658307, |
| "grad_norm": 3.6164323938018734, |
| "learning_rate": 5.400912082202852e-06, |
| "loss": 0.3849, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.4605927678854867, |
| "grad_norm": 2.7123845726783262, |
| "learning_rate": 5.3951394100329045e-06, |
| "loss": 0.3683, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.4611699518051427, |
| "grad_norm": 2.312599361300853, |
| "learning_rate": 5.3893667378629575e-06, |
| "loss": 0.3798, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.4617471357247987, |
| "grad_norm": 3.251330933463336, |
| "learning_rate": 5.38359406569301e-06, |
| "loss": 0.3587, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.4623243196444547, |
| "grad_norm": 6.596375932856641, |
| "learning_rate": 5.377821393523063e-06, |
| "loss": 0.3802, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.4629015035641107, |
| "grad_norm": 1.8050467998180781, |
| "learning_rate": 5.372048721353115e-06, |
| "loss": 0.3712, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.4634786874837667, |
| "grad_norm": 5.472845808317412, |
| "learning_rate": 5.366276049183168e-06, |
| "loss": 0.377, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.4640558714034227, |
| "grad_norm": 21.192833022577837, |
| "learning_rate": 5.36050337701322e-06, |
| "loss": 0.3745, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.4646330553230787, |
| "grad_norm": 2.3954727021255677, |
| "learning_rate": 5.354730704843273e-06, |
| "loss": 0.368, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.4652102392427347, |
| "grad_norm": 3.6751726559767652, |
| "learning_rate": 5.3489580326733245e-06, |
| "loss": 0.3803, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.4657874231623907, |
| "grad_norm": 2.6299220528922316, |
| "learning_rate": 5.3431853605033775e-06, |
| "loss": 0.3655, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.4663646070820467, |
| "grad_norm": 4.147182980327485, |
| "learning_rate": 5.33741268833343e-06, |
| "loss": 0.376, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.4669417910017027, |
| "grad_norm": 2.3318408925884526, |
| "learning_rate": 5.331640016163483e-06, |
| "loss": 0.3793, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.4675189749213587, |
| "grad_norm": 2.5630808791681106, |
| "learning_rate": 5.325867343993535e-06, |
| "loss": 0.3841, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.4680961588410147, |
| "grad_norm": 16.561602524939726, |
| "learning_rate": 5.320094671823588e-06, |
| "loss": 0.3628, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.4686733427606707, |
| "grad_norm": 2.655435817667697, |
| "learning_rate": 5.31432199965364e-06, |
| "loss": 0.3827, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.4692505266803267, |
| "grad_norm": 2.082608101455672, |
| "learning_rate": 5.308549327483693e-06, |
| "loss": 0.3726, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.4698277105999827, |
| "grad_norm": 7.450725164048278, |
| "learning_rate": 5.302776655313745e-06, |
| "loss": 0.3607, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.4704048945196387, |
| "grad_norm": 3.727260702005544, |
| "learning_rate": 5.297003983143798e-06, |
| "loss": 0.3666, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.4709820784392947, |
| "grad_norm": 2.9372607445086816, |
| "learning_rate": 5.29123131097385e-06, |
| "loss": 0.3638, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.4715592623589507, |
| "grad_norm": 3.011083825137573, |
| "learning_rate": 5.2854586388039034e-06, |
| "loss": 0.3711, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.4721364462786067, |
| "grad_norm": 5.58881805540413, |
| "learning_rate": 5.279685966633955e-06, |
| "loss": 0.3737, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.4727136301982627, |
| "grad_norm": 1.9307643066917193, |
| "learning_rate": 5.273913294464008e-06, |
| "loss": 0.3633, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.4732908141179187, |
| "grad_norm": 2.246158235550298, |
| "learning_rate": 5.26814062229406e-06, |
| "loss": 0.3811, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.4738679980375747, |
| "grad_norm": 5.378095626076901, |
| "learning_rate": 5.262367950124113e-06, |
| "loss": 0.3772, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.4744451819572307, |
| "grad_norm": 3.594146830036725, |
| "learning_rate": 5.256595277954165e-06, |
| "loss": 0.3689, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.4750223658768867, |
| "grad_norm": 1.880161955339062, |
| "learning_rate": 5.250822605784218e-06, |
| "loss": 0.3682, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.4755995497965427, |
| "grad_norm": 3.655512057019781, |
| "learning_rate": 5.24504993361427e-06, |
| "loss": 0.3601, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.4761767337161987, |
| "grad_norm": 4.5822643890603345, |
| "learning_rate": 5.239277261444323e-06, |
| "loss": 0.3799, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.4767539176358547, |
| "grad_norm": 10.062266012222976, |
| "learning_rate": 5.2335045892743755e-06, |
| "loss": 0.3651, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.4773311015555107, |
| "grad_norm": 7.820363388846345, |
| "learning_rate": 5.2277319171044286e-06, |
| "loss": 0.3629, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.47790828547516667, |
| "grad_norm": 3.04998159803599, |
| "learning_rate": 5.22195924493448e-06, |
| "loss": 0.3693, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.47848546939482267, |
| "grad_norm": 4.5635676614995475, |
| "learning_rate": 5.216186572764533e-06, |
| "loss": 0.384, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.47906265331447867, |
| "grad_norm": 2.1736842754902708, |
| "learning_rate": 5.210413900594585e-06, |
| "loss": 0.363, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.47963983723413467, |
| "grad_norm": 2.5418326594021887, |
| "learning_rate": 5.204641228424638e-06, |
| "loss": 0.3697, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.48021702115379067, |
| "grad_norm": 2.692093940326692, |
| "learning_rate": 5.19886855625469e-06, |
| "loss": 0.3691, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.48079420507344667, |
| "grad_norm": 2.4052933532982816, |
| "learning_rate": 5.193095884084743e-06, |
| "loss": 0.3506, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.48137138899310267, |
| "grad_norm": 6.620567354797733, |
| "learning_rate": 5.1873232119147955e-06, |
| "loss": 0.3838, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.48194857291275867, |
| "grad_norm": 2.9255268074292555, |
| "learning_rate": 5.1815505397448485e-06, |
| "loss": 0.3694, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.48252575683241467, |
| "grad_norm": 13.37602989555681, |
| "learning_rate": 5.175777867574901e-06, |
| "loss": 0.3752, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.48310294075207066, |
| "grad_norm": 3.511794563236054, |
| "learning_rate": 5.170005195404954e-06, |
| "loss": 0.3741, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.48368012467172666, |
| "grad_norm": 4.7758963928181375, |
| "learning_rate": 5.164232523235006e-06, |
| "loss": 0.369, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.48425730859138266, |
| "grad_norm": 3.8240515141083002, |
| "learning_rate": 5.158459851065059e-06, |
| "loss": 0.374, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.48483449251103866, |
| "grad_norm": 8.100590461673363, |
| "learning_rate": 5.15268717889511e-06, |
| "loss": 0.3563, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.48541167643069466, |
| "grad_norm": 4.117224392256427, |
| "learning_rate": 5.146914506725163e-06, |
| "loss": 0.3626, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.48598886035035066, |
| "grad_norm": 3.4662871354919904, |
| "learning_rate": 5.141141834555215e-06, |
| "loss": 0.3848, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.48656604427000666, |
| "grad_norm": 4.959524985987204, |
| "learning_rate": 5.1353691623852684e-06, |
| "loss": 0.3649, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.48714322818966266, |
| "grad_norm": 5.172300477902163, |
| "learning_rate": 5.129596490215321e-06, |
| "loss": 0.3732, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.48772041210931866, |
| "grad_norm": 2.996587870660032, |
| "learning_rate": 5.123823818045374e-06, |
| "loss": 0.3544, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.48829759602897466, |
| "grad_norm": 5.228719957469869, |
| "learning_rate": 5.118051145875426e-06, |
| "loss": 0.3623, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.48887477994863066, |
| "grad_norm": 7.078111368668544, |
| "learning_rate": 5.112278473705479e-06, |
| "loss": 0.3521, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.48945196386828665, |
| "grad_norm": 4.585249570356133, |
| "learning_rate": 5.106505801535531e-06, |
| "loss": 0.3602, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.49002914778794265, |
| "grad_norm": 3.769909535272591, |
| "learning_rate": 5.100733129365584e-06, |
| "loss": 0.3642, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.49060633170759865, |
| "grad_norm": 5.843171007267111, |
| "learning_rate": 5.094960457195637e-06, |
| "loss": 0.3844, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.49118351562725465, |
| "grad_norm": 5.011620359523228, |
| "learning_rate": 5.089187785025688e-06, |
| "loss": 0.3748, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.4917606995469106, |
| "grad_norm": 3.1629027771513667, |
| "learning_rate": 5.083415112855742e-06, |
| "loss": 0.3756, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.4923378834665666, |
| "grad_norm": 3.4287778879420583, |
| "learning_rate": 5.0776424406857936e-06, |
| "loss": 0.3654, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.4929150673862226, |
| "grad_norm": 2.3995913819961077, |
| "learning_rate": 5.0718697685158466e-06, |
| "loss": 0.3497, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.4934922513058786, |
| "grad_norm": 7.033069082305426, |
| "learning_rate": 5.066097096345899e-06, |
| "loss": 0.3679, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.4940694352255346, |
| "grad_norm": 9.94984806585782, |
| "learning_rate": 5.060324424175952e-06, |
| "loss": 0.3528, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.4946466191451906, |
| "grad_norm": 3.544058073278801, |
| "learning_rate": 5.054551752006004e-06, |
| "loss": 0.3578, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.4952238030648466, |
| "grad_norm": 2.5120291222524176, |
| "learning_rate": 5.048779079836057e-06, |
| "loss": 0.3548, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.4958009869845026, |
| "grad_norm": 3.3522580875814887, |
| "learning_rate": 5.043006407666109e-06, |
| "loss": 0.3606, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.4963781709041586, |
| "grad_norm": 2.412854685811211, |
| "learning_rate": 5.037233735496162e-06, |
| "loss": 0.3605, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.4969553548238146, |
| "grad_norm": 2.9209835829832613, |
| "learning_rate": 5.031461063326214e-06, |
| "loss": 0.3566, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.4975325387434706, |
| "grad_norm": 2.6580624391224568, |
| "learning_rate": 5.025688391156267e-06, |
| "loss": 0.3806, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.4981097226631266, |
| "grad_norm": 2.9252114550534567, |
| "learning_rate": 5.019915718986319e-06, |
| "loss": 0.3596, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.4986869065827826, |
| "grad_norm": 2.432042802537537, |
| "learning_rate": 5.014143046816372e-06, |
| "loss": 0.3849, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.4992640905024386, |
| "grad_norm": 14.195133545125286, |
| "learning_rate": 5.008370374646424e-06, |
| "loss": 0.3602, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.4998412744220946, |
| "grad_norm": 88.84290269765289, |
| "learning_rate": 5.002597702476477e-06, |
| "loss": 0.3642, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.5004184583417506, |
| "grad_norm": 2.935701463863382, |
| "learning_rate": 4.996825030306529e-06, |
| "loss": 0.3792, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.5009956422614066, |
| "grad_norm": 3.1279829631060396, |
| "learning_rate": 4.991052358136581e-06, |
| "loss": 0.3693, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.5015728261810626, |
| "grad_norm": 10.193114728366995, |
| "learning_rate": 4.985279685966634e-06, |
| "loss": 0.3597, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.5021500101007186, |
| "grad_norm": 2.4687234809350107, |
| "learning_rate": 4.9795070137966864e-06, |
| "loss": 0.3574, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.5027271940203746, |
| "grad_norm": 5.072249220362736, |
| "learning_rate": 4.9737343416267395e-06, |
| "loss": 0.3494, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.5033043779400306, |
| "grad_norm": 2.491352620261712, |
| "learning_rate": 4.967961669456792e-06, |
| "loss": 0.3567, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.5038815618596866, |
| "grad_norm": 4.883169609666169, |
| "learning_rate": 4.962188997286845e-06, |
| "loss": 0.3563, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.5044587457793426, |
| "grad_norm": 3.9469009793644623, |
| "learning_rate": 4.956416325116897e-06, |
| "loss": 0.3669, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.5050359296989986, |
| "grad_norm": 5.325339903718785, |
| "learning_rate": 4.95064365294695e-06, |
| "loss": 0.3479, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.5056131136186546, |
| "grad_norm": 2.924190713741744, |
| "learning_rate": 4.944870980777002e-06, |
| "loss": 0.3596, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.5061902975383106, |
| "grad_norm": 29.94785140992617, |
| "learning_rate": 4.939098308607055e-06, |
| "loss": 0.3482, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.5067674814579666, |
| "grad_norm": 4.600461351076627, |
| "learning_rate": 4.933325636437107e-06, |
| "loss": 0.3676, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.5073446653776226, |
| "grad_norm": 3.298079688278539, |
| "learning_rate": 4.92755296426716e-06, |
| "loss": 0.3849, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.5079218492972786, |
| "grad_norm": 3.3537368346024903, |
| "learning_rate": 4.921780292097212e-06, |
| "loss": 0.354, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.5084990332169346, |
| "grad_norm": 4.562710088882002, |
| "learning_rate": 4.9160076199272646e-06, |
| "loss": 0.3762, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.5090762171365906, |
| "grad_norm": 3.600922103480094, |
| "learning_rate": 4.910234947757318e-06, |
| "loss": 0.3709, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.5096534010562466, |
| "grad_norm": 5.507110655352795, |
| "learning_rate": 4.90446227558737e-06, |
| "loss": 0.35, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.5102305849759026, |
| "grad_norm": 14.876481920624023, |
| "learning_rate": 4.898689603417423e-06, |
| "loss": 0.3655, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.5108077688955586, |
| "grad_norm": 6.040943909655418, |
| "learning_rate": 4.892916931247475e-06, |
| "loss": 0.3537, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.5113849528152146, |
| "grad_norm": 4.784882455474531, |
| "learning_rate": 4.887144259077527e-06, |
| "loss": 0.365, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.5119621367348706, |
| "grad_norm": 3.5957944832471864, |
| "learning_rate": 4.88137158690758e-06, |
| "loss": 0.3649, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.5125393206545266, |
| "grad_norm": 7.656707006499249, |
| "learning_rate": 4.875598914737632e-06, |
| "loss": 0.3678, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.5131165045741826, |
| "grad_norm": 3.6610216347500666, |
| "learning_rate": 4.869826242567685e-06, |
| "loss": 0.3659, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.5136936884938386, |
| "grad_norm": 4.181649664719206, |
| "learning_rate": 4.8640535703977375e-06, |
| "loss": 0.358, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.5142708724134946, |
| "grad_norm": 1.8612925513884986, |
| "learning_rate": 4.8582808982277905e-06, |
| "loss": 0.3508, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.5148480563331506, |
| "grad_norm": 5.0292268546846195, |
| "learning_rate": 4.852508226057843e-06, |
| "loss": 0.3567, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.5154252402528066, |
| "grad_norm": 5.77083593828813, |
| "learning_rate": 4.846735553887895e-06, |
| "loss": 0.3706, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.5160024241724626, |
| "grad_norm": 3.1575366683166264, |
| "learning_rate": 4.840962881717948e-06, |
| "loss": 0.3603, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.5165796080921186, |
| "grad_norm": 3.0092615460602357, |
| "learning_rate": 4.835190209548e-06, |
| "loss": 0.3567, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.5171567920117746, |
| "grad_norm": 5.424773899652038, |
| "learning_rate": 4.829417537378053e-06, |
| "loss": 0.3569, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.5177339759314306, |
| "grad_norm": 4.0199024195308075, |
| "learning_rate": 4.823644865208105e-06, |
| "loss": 0.376, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.5183111598510866, |
| "grad_norm": 8.92239001104145, |
| "learning_rate": 4.8178721930381575e-06, |
| "loss": 0.3572, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.5188883437707426, |
| "grad_norm": 10.702196683213096, |
| "learning_rate": 4.8120995208682105e-06, |
| "loss": 0.3651, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.5194655276903986, |
| "grad_norm": 4.470162551927128, |
| "learning_rate": 4.806326848698263e-06, |
| "loss": 0.3654, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.5200427116100546, |
| "grad_norm": 2.1821708334970737, |
| "learning_rate": 4.800554176528316e-06, |
| "loss": 0.3516, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.5206198955297106, |
| "grad_norm": 5.347973377285275, |
| "learning_rate": 4.794781504358368e-06, |
| "loss": 0.3623, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.5211970794493666, |
| "grad_norm": 4.623834142826691, |
| "learning_rate": 4.78900883218842e-06, |
| "loss": 0.3727, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.5217742633690226, |
| "grad_norm": 5.255808460960779, |
| "learning_rate": 4.783236160018473e-06, |
| "loss": 0.3729, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.5223514472886785, |
| "grad_norm": 3.1254534476864215, |
| "learning_rate": 4.777463487848525e-06, |
| "loss": 0.3679, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.5229286312083345, |
| "grad_norm": 9.646051621259671, |
| "learning_rate": 4.771690815678578e-06, |
| "loss": 0.3834, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.5235058151279905, |
| "grad_norm": 4.490473334084667, |
| "learning_rate": 4.76591814350863e-06, |
| "loss": 0.3656, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.5240829990476465, |
| "grad_norm": 2.607385547296513, |
| "learning_rate": 4.760145471338683e-06, |
| "loss": 0.3861, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.5246601829673025, |
| "grad_norm": 3.7754579602738136, |
| "learning_rate": 4.754372799168736e-06, |
| "loss": 0.3506, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.5252373668869585, |
| "grad_norm": 2.8550942295775896, |
| "learning_rate": 4.748600126998788e-06, |
| "loss": 0.3567, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.5258145508066145, |
| "grad_norm": 1.7112590538881849, |
| "learning_rate": 4.742827454828841e-06, |
| "loss": 0.3604, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.5263917347262705, |
| "grad_norm": 2.7066239753400585, |
| "learning_rate": 4.737054782658893e-06, |
| "loss": 0.36, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.5269689186459265, |
| "grad_norm": 5.758530747558061, |
| "learning_rate": 4.731282110488946e-06, |
| "loss": 0.3634, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.5275461025655825, |
| "grad_norm": 210.77018196053547, |
| "learning_rate": 4.725509438318998e-06, |
| "loss": 0.3722, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.5281232864852385, |
| "grad_norm": 2.300368854843036, |
| "learning_rate": 4.71973676614905e-06, |
| "loss": 0.3729, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.5287004704048945, |
| "grad_norm": 2.210540304841504, |
| "learning_rate": 4.713964093979103e-06, |
| "loss": 0.3796, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.5292776543245505, |
| "grad_norm": 2.2634685628485145, |
| "learning_rate": 4.7081914218091555e-06, |
| "loss": 0.356, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.5298548382442065, |
| "grad_norm": 3.1055746156661614, |
| "learning_rate": 4.7024187496392085e-06, |
| "loss": 0.3582, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.5304320221638625, |
| "grad_norm": 3.654168505440511, |
| "learning_rate": 4.696646077469261e-06, |
| "loss": 0.3607, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.5310092060835185, |
| "grad_norm": 3.0537549885812347, |
| "learning_rate": 4.690873405299313e-06, |
| "loss": 0.3767, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.5315863900031745, |
| "grad_norm": 1.9675795559729068, |
| "learning_rate": 4.685100733129366e-06, |
| "loss": 0.3657, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.5321635739228305, |
| "grad_norm": 2.0115039115011606, |
| "learning_rate": 4.679328060959418e-06, |
| "loss": 0.3601, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.5327407578424865, |
| "grad_norm": 5.152045183089219, |
| "learning_rate": 4.673555388789471e-06, |
| "loss": 0.3874, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.5333179417621425, |
| "grad_norm": 3.394195501156254, |
| "learning_rate": 4.667782716619523e-06, |
| "loss": 0.3687, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.5338951256817985, |
| "grad_norm": 3.733950998673983, |
| "learning_rate": 4.6620100444495755e-06, |
| "loss": 0.3812, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.5344723096014545, |
| "grad_norm": 2.172335452644072, |
| "learning_rate": 4.6562373722796285e-06, |
| "loss": 0.3719, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.5350494935211105, |
| "grad_norm": 1.8466321499683245, |
| "learning_rate": 4.650464700109681e-06, |
| "loss": 0.3563, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.5356266774407665, |
| "grad_norm": 2.411259261337378, |
| "learning_rate": 4.644692027939734e-06, |
| "loss": 0.3652, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.5362038613604225, |
| "grad_norm": 2.525799052658632, |
| "learning_rate": 4.638919355769787e-06, |
| "loss": 0.357, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.5367810452800785, |
| "grad_norm": 4.322100164387683, |
| "learning_rate": 4.633146683599839e-06, |
| "loss": 0.344, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.5373582291997345, |
| "grad_norm": 2.9720413934133267, |
| "learning_rate": 4.627374011429892e-06, |
| "loss": 0.3591, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.5379354131193905, |
| "grad_norm": 3.236889177938413, |
| "learning_rate": 4.621601339259944e-06, |
| "loss": 0.3698, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.5385125970390465, |
| "grad_norm": 4.173966589558208, |
| "learning_rate": 4.615828667089996e-06, |
| "loss": 0.3687, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.5390897809587025, |
| "grad_norm": 3.3050072649704387, |
| "learning_rate": 4.610055994920049e-06, |
| "loss": 0.3559, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.5396669648783585, |
| "grad_norm": 2.2463012820732904, |
| "learning_rate": 4.6042833227501014e-06, |
| "loss": 0.3585, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.5402441487980145, |
| "grad_norm": 2.3061353512132357, |
| "learning_rate": 4.5985106505801544e-06, |
| "loss": 0.3666, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.5408213327176705, |
| "grad_norm": 2.8767755309606393, |
| "learning_rate": 4.592737978410207e-06, |
| "loss": 0.3656, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.5413985166373265, |
| "grad_norm": 3.5885590608959603, |
| "learning_rate": 4.586965306240259e-06, |
| "loss": 0.3619, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.5419757005569825, |
| "grad_norm": 3.012510637436092, |
| "learning_rate": 4.581192634070312e-06, |
| "loss": 0.3679, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.5425528844766385, |
| "grad_norm": 2.074304355176205, |
| "learning_rate": 4.575419961900364e-06, |
| "loss": 0.3558, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.5431300683962945, |
| "grad_norm": 2.410649696384616, |
| "learning_rate": 4.569647289730417e-06, |
| "loss": 0.3667, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.5437072523159505, |
| "grad_norm": 6.990964309593162, |
| "learning_rate": 4.563874617560469e-06, |
| "loss": 0.3544, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.5442844362356065, |
| "grad_norm": 1.6679890672242221, |
| "learning_rate": 4.558101945390521e-06, |
| "loss": 0.3635, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.5448616201552625, |
| "grad_norm": 3.5900096135866177, |
| "learning_rate": 4.552329273220574e-06, |
| "loss": 0.3548, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.5454388040749185, |
| "grad_norm": 2.8054899052225655, |
| "learning_rate": 4.5465566010506266e-06, |
| "loss": 0.3573, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.5460159879945745, |
| "grad_norm": 2.7792687957236315, |
| "learning_rate": 4.5407839288806796e-06, |
| "loss": 0.3677, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.5465931719142305, |
| "grad_norm": 2.2614901204437636, |
| "learning_rate": 4.535011256710732e-06, |
| "loss": 0.3643, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.5471703558338865, |
| "grad_norm": 2.062841207589413, |
| "learning_rate": 4.529238584540785e-06, |
| "loss": 0.3592, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.5477475397535425, |
| "grad_norm": 8.866919074207425, |
| "learning_rate": 4.523465912370837e-06, |
| "loss": 0.3552, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.5483247236731985, |
| "grad_norm": 2.5645835670129618, |
| "learning_rate": 4.517693240200889e-06, |
| "loss": 0.3498, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.5489019075928545, |
| "grad_norm": 3.1741970680335716, |
| "learning_rate": 4.511920568030942e-06, |
| "loss": 0.3682, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.5494790915125105, |
| "grad_norm": 2.0747984580427765, |
| "learning_rate": 4.506147895860994e-06, |
| "loss": 0.3646, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.5500562754321665, |
| "grad_norm": 2.3586125635742654, |
| "learning_rate": 4.500375223691047e-06, |
| "loss": 0.355, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.5506334593518225, |
| "grad_norm": 3.8182790115085927, |
| "learning_rate": 4.4946025515210995e-06, |
| "loss": 0.3528, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.5512106432714785, |
| "grad_norm": 2.623243257421812, |
| "learning_rate": 4.488829879351152e-06, |
| "loss": 0.3551, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.5517878271911345, |
| "grad_norm": 2.775469371839904, |
| "learning_rate": 4.483057207181205e-06, |
| "loss": 0.3556, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.5523650111107905, |
| "grad_norm": 2.345529859871698, |
| "learning_rate": 4.477284535011257e-06, |
| "loss": 0.3702, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.5529421950304465, |
| "grad_norm": 5.265133567547254, |
| "learning_rate": 4.47151186284131e-06, |
| "loss": 0.3617, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.5535193789501025, |
| "grad_norm": 3.1602517841252897, |
| "learning_rate": 4.465739190671362e-06, |
| "loss": 0.3524, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.5540965628697585, |
| "grad_norm": 1.8906279954560556, |
| "learning_rate": 4.459966518501414e-06, |
| "loss": 0.3584, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.5546737467894145, |
| "grad_norm": 3.4743194872868117, |
| "learning_rate": 4.454193846331467e-06, |
| "loss": 0.3531, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.5552509307090705, |
| "grad_norm": 2.9951619722989578, |
| "learning_rate": 4.4484211741615194e-06, |
| "loss": 0.3525, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.5558281146287264, |
| "grad_norm": 5.694532155563467, |
| "learning_rate": 4.4426485019915725e-06, |
| "loss": 0.3568, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.5564052985483824, |
| "grad_norm": 2.0095159465987398, |
| "learning_rate": 4.436875829821625e-06, |
| "loss": 0.3586, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.5569824824680384, |
| "grad_norm": 2.703106746299796, |
| "learning_rate": 4.431103157651677e-06, |
| "loss": 0.3586, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.5575596663876944, |
| "grad_norm": 4.366070322479832, |
| "learning_rate": 4.42533048548173e-06, |
| "loss": 0.3628, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.5581368503073504, |
| "grad_norm": 5.959735845454689, |
| "learning_rate": 4.419557813311782e-06, |
| "loss": 0.3453, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.5587140342270064, |
| "grad_norm": 3.887262505871754, |
| "learning_rate": 4.413785141141835e-06, |
| "loss": 0.3579, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.5592912181466624, |
| "grad_norm": 3.5847585820260273, |
| "learning_rate": 4.408012468971887e-06, |
| "loss": 0.353, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.5598684020663184, |
| "grad_norm": 11.183076093226834, |
| "learning_rate": 4.40223979680194e-06, |
| "loss": 0.354, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.5604455859859744, |
| "grad_norm": 8.751616032322445, |
| "learning_rate": 4.396467124631992e-06, |
| "loss": 0.358, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.5610227699056304, |
| "grad_norm": 8.759188394381018, |
| "learning_rate": 4.3906944524620446e-06, |
| "loss": 0.3477, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.5615999538252864, |
| "grad_norm": 3.185742989121124, |
| "learning_rate": 4.3849217802920976e-06, |
| "loss": 0.3527, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.5621771377449424, |
| "grad_norm": 2.6034839783878847, |
| "learning_rate": 4.37914910812215e-06, |
| "loss": 0.3614, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.5627543216645984, |
| "grad_norm": 3.8176039303217943, |
| "learning_rate": 4.373376435952203e-06, |
| "loss": 0.3629, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.5633315055842544, |
| "grad_norm": 2.892391389713988, |
| "learning_rate": 4.367603763782255e-06, |
| "loss": 0.3543, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.5639086895039104, |
| "grad_norm": 3.756297203577958, |
| "learning_rate": 4.361831091612307e-06, |
| "loss": 0.3618, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.5644858734235664, |
| "grad_norm": 5.678261576873807, |
| "learning_rate": 4.35605841944236e-06, |
| "loss": 0.3568, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.5650630573432224, |
| "grad_norm": 8.441638312518547, |
| "learning_rate": 4.350285747272412e-06, |
| "loss": 0.3729, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.5656402412628784, |
| "grad_norm": 11.963283236920255, |
| "learning_rate": 4.344513075102465e-06, |
| "loss": 0.3515, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.5662174251825344, |
| "grad_norm": 5.191809552146641, |
| "learning_rate": 4.338740402932518e-06, |
| "loss": 0.3555, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.5667946091021904, |
| "grad_norm": 4.402544511071244, |
| "learning_rate": 4.3329677307625705e-06, |
| "loss": 0.3593, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.5673717930218464, |
| "grad_norm": 3.877667010250195, |
| "learning_rate": 4.327195058592623e-06, |
| "loss": 0.3671, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.5679489769415024, |
| "grad_norm": 3.6190457236390907, |
| "learning_rate": 4.321422386422676e-06, |
| "loss": 0.3821, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.5685261608611584, |
| "grad_norm": 6.877296061159591, |
| "learning_rate": 4.315649714252728e-06, |
| "loss": 0.3603, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.5691033447808144, |
| "grad_norm": 4.590765318427339, |
| "learning_rate": 4.309877042082781e-06, |
| "loss": 0.3713, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.5696805287004704, |
| "grad_norm": 4.474831184892119, |
| "learning_rate": 4.304104369912833e-06, |
| "loss": 0.3475, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.5702577126201264, |
| "grad_norm": 6.348172468881283, |
| "learning_rate": 4.298331697742886e-06, |
| "loss": 0.3498, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.5708348965397824, |
| "grad_norm": 3.2161248922526906, |
| "learning_rate": 4.292559025572938e-06, |
| "loss": 0.3548, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.5714120804594384, |
| "grad_norm": 3.6712025528595476, |
| "learning_rate": 4.2867863534029905e-06, |
| "loss": 0.3698, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.5719892643790944, |
| "grad_norm": 6.372020331564792, |
| "learning_rate": 4.2810136812330435e-06, |
| "loss": 0.3774, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.5725664482987504, |
| "grad_norm": 7.0760072190976055, |
| "learning_rate": 4.275241009063096e-06, |
| "loss": 0.366, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.5731436322184064, |
| "grad_norm": 4.330191932801956, |
| "learning_rate": 4.269468336893149e-06, |
| "loss": 0.3659, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.5737208161380624, |
| "grad_norm": 3.7388442828506183, |
| "learning_rate": 4.263695664723201e-06, |
| "loss": 0.3585, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.5742980000577184, |
| "grad_norm": 3.788666515258982, |
| "learning_rate": 4.257922992553253e-06, |
| "loss": 0.3624, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.5748751839773744, |
| "grad_norm": 3.882574363808373, |
| "learning_rate": 4.252150320383306e-06, |
| "loss": 0.359, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.5754523678970304, |
| "grad_norm": 3.3860959596594764, |
| "learning_rate": 4.246377648213358e-06, |
| "loss": 0.3753, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.5760295518166864, |
| "grad_norm": 3.0843751033026936, |
| "learning_rate": 4.240604976043411e-06, |
| "loss": 0.3623, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.5766067357363424, |
| "grad_norm": 4.2670578544960165, |
| "learning_rate": 4.234832303873463e-06, |
| "loss": 0.3583, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.5771839196559984, |
| "grad_norm": 4.252817625823887, |
| "learning_rate": 4.229059631703516e-06, |
| "loss": 0.3576, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5777611035756544, |
| "grad_norm": 3.407625305823021, |
| "learning_rate": 4.223286959533569e-06, |
| "loss": 0.3552, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.5783382874953104, |
| "grad_norm": 3.1679099104052484, |
| "learning_rate": 4.217514287363621e-06, |
| "loss": 0.3738, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.5789154714149664, |
| "grad_norm": 3.6787102703539443, |
| "learning_rate": 4.211741615193674e-06, |
| "loss": 0.3578, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.5794926553346224, |
| "grad_norm": 5.851925555703329, |
| "learning_rate": 4.205968943023726e-06, |
| "loss": 0.3419, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.5800698392542784, |
| "grad_norm": 4.329132073061233, |
| "learning_rate": 4.200196270853779e-06, |
| "loss": 0.3643, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.5806470231739344, |
| "grad_norm": 5.997643376925449, |
| "learning_rate": 4.194423598683831e-06, |
| "loss": 0.377, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.5812242070935904, |
| "grad_norm": 2.847934197964713, |
| "learning_rate": 4.188650926513883e-06, |
| "loss": 0.3637, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.5818013910132463, |
| "grad_norm": 5.914457578502053, |
| "learning_rate": 4.182878254343936e-06, |
| "loss": 0.3674, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.5823785749329023, |
| "grad_norm": 2.717688952876076, |
| "learning_rate": 4.1771055821739885e-06, |
| "loss": 0.3445, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.5829557588525583, |
| "grad_norm": 4.506246357458738, |
| "learning_rate": 4.1713329100040415e-06, |
| "loss": 0.3596, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.5835329427722143, |
| "grad_norm": 4.390097302175211, |
| "learning_rate": 4.165560237834094e-06, |
| "loss": 0.3502, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.5841101266918703, |
| "grad_norm": 61.470288646964526, |
| "learning_rate": 4.159787565664146e-06, |
| "loss": 0.364, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.5846873106115263, |
| "grad_norm": 4.249154244474576, |
| "learning_rate": 4.154014893494199e-06, |
| "loss": 0.3494, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.5852644945311823, |
| "grad_norm": 2.6108964866695956, |
| "learning_rate": 4.148242221324251e-06, |
| "loss": 0.3417, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.5858416784508383, |
| "grad_norm": 3.655089863468255, |
| "learning_rate": 4.142469549154304e-06, |
| "loss": 0.3576, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.5864188623704943, |
| "grad_norm": 19.653637032520724, |
| "learning_rate": 4.136696876984356e-06, |
| "loss": 0.3603, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.5869960462901503, |
| "grad_norm": 4.623567506823282, |
| "learning_rate": 4.1309242048144085e-06, |
| "loss": 0.3486, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.5875732302098063, |
| "grad_norm": 5.14547645262892, |
| "learning_rate": 4.1251515326444615e-06, |
| "loss": 0.3591, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.5881504141294623, |
| "grad_norm": 20.59966690800242, |
| "learning_rate": 4.119378860474514e-06, |
| "loss": 0.3473, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.5887275980491183, |
| "grad_norm": 3.3637726076066796, |
| "learning_rate": 4.113606188304567e-06, |
| "loss": 0.3706, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.5893047819687743, |
| "grad_norm": 4.061688985881421, |
| "learning_rate": 4.107833516134619e-06, |
| "loss": 0.3694, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.5898819658884303, |
| "grad_norm": 4.941411464322626, |
| "learning_rate": 4.102060843964671e-06, |
| "loss": 0.3551, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.5904591498080863, |
| "grad_norm": 4.631050291252514, |
| "learning_rate": 4.096288171794724e-06, |
| "loss": 0.3669, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.5910363337277423, |
| "grad_norm": 5.833635533863073, |
| "learning_rate": 4.090515499624776e-06, |
| "loss": 0.354, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.5916135176473983, |
| "grad_norm": 17.060908158433886, |
| "learning_rate": 4.084742827454829e-06, |
| "loss": 0.355, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.5921907015670543, |
| "grad_norm": 4.438473043902829, |
| "learning_rate": 4.078970155284881e-06, |
| "loss": 0.3559, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.5927678854867103, |
| "grad_norm": 4.029441903808598, |
| "learning_rate": 4.0731974831149344e-06, |
| "loss": 0.3456, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.5933450694063663, |
| "grad_norm": 4.894018752470052, |
| "learning_rate": 4.067424810944987e-06, |
| "loss": 0.342, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.5939222533260223, |
| "grad_norm": 2.403763740670601, |
| "learning_rate": 4.061652138775039e-06, |
| "loss": 0.3653, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.5944994372456783, |
| "grad_norm": 4.283731997785048, |
| "learning_rate": 4.055879466605092e-06, |
| "loss": 0.3668, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.5950766211653343, |
| "grad_norm": 3.021670467218982, |
| "learning_rate": 4.050106794435144e-06, |
| "loss": 0.3554, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.5956538050849903, |
| "grad_norm": 39.77509257463927, |
| "learning_rate": 4.044334122265197e-06, |
| "loss": 0.373, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.5962309890046463, |
| "grad_norm": 7.734517576459877, |
| "learning_rate": 4.03856145009525e-06, |
| "loss": 0.3545, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.5968081729243023, |
| "grad_norm": 12.164334264143207, |
| "learning_rate": 4.032788777925302e-06, |
| "loss": 0.3593, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.5973853568439583, |
| "grad_norm": 6.962481288991228, |
| "learning_rate": 4.027016105755354e-06, |
| "loss": 0.3424, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.5979625407636143, |
| "grad_norm": 3.265508104782265, |
| "learning_rate": 4.021243433585407e-06, |
| "loss": 0.3401, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.5985397246832703, |
| "grad_norm": 3.169522843364254, |
| "learning_rate": 4.0154707614154596e-06, |
| "loss": 0.3534, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.5991169086029263, |
| "grad_norm": 3.2365860904520662, |
| "learning_rate": 4.0096980892455126e-06, |
| "loss": 0.3476, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.5996940925225823, |
| "grad_norm": 22.34179305900222, |
| "learning_rate": 4.003925417075565e-06, |
| "loss": 0.3615, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.6002712764422383, |
| "grad_norm": 3.6824711768745235, |
| "learning_rate": 3.998152744905617e-06, |
| "loss": 0.3598, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.6008484603618943, |
| "grad_norm": 2.244270658998301, |
| "learning_rate": 3.99238007273567e-06, |
| "loss": 0.3602, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.6014256442815503, |
| "grad_norm": 2.944264669766013, |
| "learning_rate": 3.986607400565722e-06, |
| "loss": 0.3557, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.6020028282012063, |
| "grad_norm": 3.6813564246612893, |
| "learning_rate": 3.980834728395775e-06, |
| "loss": 0.3426, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.6025800121208623, |
| "grad_norm": 2.4642472523118193, |
| "learning_rate": 3.975062056225827e-06, |
| "loss": 0.3498, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.6031571960405183, |
| "grad_norm": 2.612111770933025, |
| "learning_rate": 3.96928938405588e-06, |
| "loss": 0.3738, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.6037343799601743, |
| "grad_norm": 4.989290119921459, |
| "learning_rate": 3.9635167118859325e-06, |
| "loss": 0.3437, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.6043115638798303, |
| "grad_norm": 3.191111659552641, |
| "learning_rate": 3.957744039715985e-06, |
| "loss": 0.3513, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.6048887477994863, |
| "grad_norm": 6.289514020679802, |
| "learning_rate": 3.951971367546038e-06, |
| "loss": 0.3617, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.6054659317191423, |
| "grad_norm": 4.369159045847553, |
| "learning_rate": 3.94619869537609e-06, |
| "loss": 0.3622, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.6060431156387983, |
| "grad_norm": 4.272747590300094, |
| "learning_rate": 3.940426023206143e-06, |
| "loss": 0.3679, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.6066202995584543, |
| "grad_norm": 6.298266009612924, |
| "learning_rate": 3.934653351036195e-06, |
| "loss": 0.3605, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.6071974834781103, |
| "grad_norm": 2.853912711053667, |
| "learning_rate": 3.928880678866247e-06, |
| "loss": 0.3643, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.6077746673977663, |
| "grad_norm": 4.905191894605176, |
| "learning_rate": 3.9231080066963e-06, |
| "loss": 0.3566, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.6083518513174223, |
| "grad_norm": 3.7179571361092307, |
| "learning_rate": 3.9173353345263524e-06, |
| "loss": 0.3534, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.6089290352370783, |
| "grad_norm": 4.720816418264325, |
| "learning_rate": 3.9115626623564055e-06, |
| "loss": 0.3445, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.6095062191567343, |
| "grad_norm": 6.0994643547541045, |
| "learning_rate": 3.905789990186458e-06, |
| "loss": 0.3447, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.6100834030763903, |
| "grad_norm": 7.143461522640564, |
| "learning_rate": 3.90001731801651e-06, |
| "loss": 0.3486, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.6106605869960463, |
| "grad_norm": 3.5865672738484515, |
| "learning_rate": 3.894244645846563e-06, |
| "loss": 0.3543, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.6112377709157023, |
| "grad_norm": 3.44671753994167, |
| "learning_rate": 3.888471973676615e-06, |
| "loss": 0.339, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.6118149548353583, |
| "grad_norm": 4.037111129069171, |
| "learning_rate": 3.882699301506668e-06, |
| "loss": 0.3542, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.6123921387550143, |
| "grad_norm": 2.5068462700876752, |
| "learning_rate": 3.87692662933672e-06, |
| "loss": 0.3612, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.6129693226746703, |
| "grad_norm": 2.69916896955261, |
| "learning_rate": 3.871153957166772e-06, |
| "loss": 0.3552, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.6135465065943263, |
| "grad_norm": 2.12828690128291, |
| "learning_rate": 3.865381284996825e-06, |
| "loss": 0.3464, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.6141236905139823, |
| "grad_norm": 2.4651478648163754, |
| "learning_rate": 3.8596086128268776e-06, |
| "loss": 0.3714, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.6147008744336383, |
| "grad_norm": 4.709377859928187, |
| "learning_rate": 3.853835940656931e-06, |
| "loss": 0.3462, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.6152780583532943, |
| "grad_norm": 14.878118519317356, |
| "learning_rate": 3.848063268486983e-06, |
| "loss": 0.355, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.6158552422729503, |
| "grad_norm": 7.0013585156765314, |
| "learning_rate": 3.842290596317036e-06, |
| "loss": 0.3548, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.6164324261926063, |
| "grad_norm": 4.781645383408167, |
| "learning_rate": 3.836517924147088e-06, |
| "loss": 0.3544, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.6170096101122623, |
| "grad_norm": 2.5404756093298695, |
| "learning_rate": 3.83074525197714e-06, |
| "loss": 0.3534, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.6175867940319183, |
| "grad_norm": 8.95780042415011, |
| "learning_rate": 3.824972579807193e-06, |
| "loss": 0.3636, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.6181639779515743, |
| "grad_norm": 4.989641662422552, |
| "learning_rate": 3.819199907637245e-06, |
| "loss": 0.3673, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.6187411618712303, |
| "grad_norm": 3.0860360298250096, |
| "learning_rate": 3.813427235467298e-06, |
| "loss": 0.3454, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.6193183457908863, |
| "grad_norm": 5.75583484430528, |
| "learning_rate": 3.8076545632973505e-06, |
| "loss": 0.3598, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.6198955297105423, |
| "grad_norm": 2.949832246732552, |
| "learning_rate": 3.801881891127403e-06, |
| "loss": 0.3553, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.6204727136301983, |
| "grad_norm": 5.670166660495844, |
| "learning_rate": 3.7961092189574557e-06, |
| "loss": 0.3626, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.6210498975498543, |
| "grad_norm": 3.972108943307402, |
| "learning_rate": 3.790336546787508e-06, |
| "loss": 0.3473, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.6216270814695103, |
| "grad_norm": 6.588272622863319, |
| "learning_rate": 3.7845638746175605e-06, |
| "loss": 0.3504, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.6222042653891663, |
| "grad_norm": 10.062313562042537, |
| "learning_rate": 3.778791202447613e-06, |
| "loss": 0.348, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.6227814493088223, |
| "grad_norm": 2.393822027910724, |
| "learning_rate": 3.7730185302776657e-06, |
| "loss": 0.332, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.6233586332284783, |
| "grad_norm": 3.726451963447983, |
| "learning_rate": 3.7672458581077183e-06, |
| "loss": 0.3544, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.6239358171481343, |
| "grad_norm": 3.0713208436951405, |
| "learning_rate": 3.761473185937771e-06, |
| "loss": 0.3569, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.6245130010677903, |
| "grad_norm": 8.506793083475245, |
| "learning_rate": 3.755700513767823e-06, |
| "loss": 0.3498, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.6250901849874463, |
| "grad_norm": 5.0436471202946205, |
| "learning_rate": 3.7499278415978756e-06, |
| "loss": 0.3539, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.6256673689071023, |
| "grad_norm": 3.0241635545445957, |
| "learning_rate": 3.7441551694279282e-06, |
| "loss": 0.3623, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.6262445528267583, |
| "grad_norm": 2.4939286036772703, |
| "learning_rate": 3.7383824972579812e-06, |
| "loss": 0.3578, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.6268217367464143, |
| "grad_norm": 3.3678230813106373, |
| "learning_rate": 3.732609825088034e-06, |
| "loss": 0.3568, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.6273989206660703, |
| "grad_norm": 3.1630251340425795, |
| "learning_rate": 3.7268371529180864e-06, |
| "loss": 0.3488, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.6279761045857263, |
| "grad_norm": 4.715668356612278, |
| "learning_rate": 3.721064480748139e-06, |
| "loss": 0.3487, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.6285532885053823, |
| "grad_norm": 3.430089287433023, |
| "learning_rate": 3.7152918085781912e-06, |
| "loss": 0.3676, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.6291304724250383, |
| "grad_norm": 2.9339747157831546, |
| "learning_rate": 3.709519136408244e-06, |
| "loss": 0.3507, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.6297076563446943, |
| "grad_norm": 4.2204605393920485, |
| "learning_rate": 3.7037464642382964e-06, |
| "loss": 0.3446, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.6302848402643503, |
| "grad_norm": 2.5323029057405764, |
| "learning_rate": 3.697973792068349e-06, |
| "loss": 0.3507, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.6308620241840063, |
| "grad_norm": 1.959685524861653, |
| "learning_rate": 3.6922011198984016e-06, |
| "loss": 0.3551, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.6314392081036623, |
| "grad_norm": 2.2201928530131085, |
| "learning_rate": 3.6864284477284538e-06, |
| "loss": 0.3476, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.6320163920233183, |
| "grad_norm": 2.5294366254069645, |
| "learning_rate": 3.6806557755585064e-06, |
| "loss": 0.3502, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.6325935759429743, |
| "grad_norm": 2.5929823326561103, |
| "learning_rate": 3.674883103388559e-06, |
| "loss": 0.3477, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.6331707598626303, |
| "grad_norm": 3.0643397308226903, |
| "learning_rate": 3.6691104312186116e-06, |
| "loss": 0.3511, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.6337479437822863, |
| "grad_norm": 3.725143554468828, |
| "learning_rate": 3.663337759048664e-06, |
| "loss": 0.3597, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.6343251277019423, |
| "grad_norm": 2.332988363561149, |
| "learning_rate": 3.6575650868787168e-06, |
| "loss": 0.3547, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.6349023116215983, |
| "grad_norm": 4.338506151135665, |
| "learning_rate": 3.651792414708769e-06, |
| "loss": 0.3621, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.6354794955412543, |
| "grad_norm": 5.853920773449472, |
| "learning_rate": 3.6460197425388215e-06, |
| "loss": 0.3467, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.6360566794609103, |
| "grad_norm": 2.9801395957721217, |
| "learning_rate": 3.640247070368874e-06, |
| "loss": 0.3533, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.6366338633805663, |
| "grad_norm": 5.428993197624115, |
| "learning_rate": 3.6344743981989267e-06, |
| "loss": 0.3477, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.6372110473002223, |
| "grad_norm": 2.1575911965605914, |
| "learning_rate": 3.6287017260289793e-06, |
| "loss": 0.3463, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.6377882312198783, |
| "grad_norm": 3.3210877709918982, |
| "learning_rate": 3.622929053859032e-06, |
| "loss": 0.3546, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.6383654151395343, |
| "grad_norm": 2.686843207231148, |
| "learning_rate": 3.617156381689084e-06, |
| "loss": 0.3518, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.6389425990591903, |
| "grad_norm": 5.280345153851947, |
| "learning_rate": 3.6113837095191367e-06, |
| "loss": 0.3579, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.6395197829788463, |
| "grad_norm": 5.403871542937742, |
| "learning_rate": 3.6056110373491893e-06, |
| "loss": 0.3489, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.6400969668985023, |
| "grad_norm": 2.9735701887326833, |
| "learning_rate": 3.599838365179242e-06, |
| "loss": 0.3502, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.6406741508181583, |
| "grad_norm": 2.6891252705595368, |
| "learning_rate": 3.5940656930092945e-06, |
| "loss": 0.3612, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.6412513347378141, |
| "grad_norm": 4.660072834904341, |
| "learning_rate": 3.5882930208393467e-06, |
| "loss": 0.3408, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.6418285186574701, |
| "grad_norm": 8.181225664129359, |
| "learning_rate": 3.5825203486693993e-06, |
| "loss": 0.3466, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.6424057025771261, |
| "grad_norm": 3.8250794191372943, |
| "learning_rate": 3.576747676499452e-06, |
| "loss": 0.3428, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.6429828864967821, |
| "grad_norm": 2.5770691997974975, |
| "learning_rate": 3.5709750043295044e-06, |
| "loss": 0.3617, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.6435600704164381, |
| "grad_norm": 3.518147076569533, |
| "learning_rate": 3.565202332159557e-06, |
| "loss": 0.3598, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.6441372543360941, |
| "grad_norm": 2.3978173360258332, |
| "learning_rate": 3.5594296599896096e-06, |
| "loss": 0.3486, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.6447144382557501, |
| "grad_norm": 4.963521259349147, |
| "learning_rate": 3.553656987819662e-06, |
| "loss": 0.3465, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.6452916221754061, |
| "grad_norm": 3.7768523495827, |
| "learning_rate": 3.5478843156497144e-06, |
| "loss": 0.3507, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.6458688060950621, |
| "grad_norm": 1.9990854220435814, |
| "learning_rate": 3.542111643479767e-06, |
| "loss": 0.3485, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.6464459900147181, |
| "grad_norm": 2.434785484655442, |
| "learning_rate": 3.5363389713098196e-06, |
| "loss": 0.3414, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.6470231739343741, |
| "grad_norm": 3.764273559187499, |
| "learning_rate": 3.530566299139872e-06, |
| "loss": 0.3475, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.6476003578540301, |
| "grad_norm": 3.063611287505477, |
| "learning_rate": 3.5247936269699244e-06, |
| "loss": 0.3487, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.6481775417736861, |
| "grad_norm": 2.7961532657594357, |
| "learning_rate": 3.519020954799977e-06, |
| "loss": 0.3591, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.6487547256933421, |
| "grad_norm": 5.675631273424128, |
| "learning_rate": 3.5132482826300296e-06, |
| "loss": 0.3387, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.6493319096129981, |
| "grad_norm": 4.038281786465871, |
| "learning_rate": 3.507475610460082e-06, |
| "loss": 0.3544, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.6499090935326541, |
| "grad_norm": 19.461560362822837, |
| "learning_rate": 3.5017029382901348e-06, |
| "loss": 0.3612, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.6504862774523101, |
| "grad_norm": 2.9170938007747838, |
| "learning_rate": 3.4959302661201874e-06, |
| "loss": 0.3633, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.6510634613719661, |
| "grad_norm": 2.100392402638713, |
| "learning_rate": 3.4901575939502395e-06, |
| "loss": 0.3628, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.6516406452916221, |
| "grad_norm": 7.466900360838518, |
| "learning_rate": 3.484384921780292e-06, |
| "loss": 0.3544, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.6522178292112781, |
| "grad_norm": 2.3522582138412984, |
| "learning_rate": 3.4786122496103447e-06, |
| "loss": 0.3768, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.6527950131309341, |
| "grad_norm": 2.0677131586462556, |
| "learning_rate": 3.4728395774403973e-06, |
| "loss": 0.3514, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.6533721970505901, |
| "grad_norm": 3.1804108497752943, |
| "learning_rate": 3.46706690527045e-06, |
| "loss": 0.3506, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.6539493809702461, |
| "grad_norm": 5.396390003664786, |
| "learning_rate": 3.461294233100502e-06, |
| "loss": 0.3539, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.6545265648899021, |
| "grad_norm": 5.1304776342645235, |
| "learning_rate": 3.4555215609305547e-06, |
| "loss": 0.3578, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.6551037488095581, |
| "grad_norm": 2.625555244563686, |
| "learning_rate": 3.4497488887606073e-06, |
| "loss": 0.3344, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.6556809327292141, |
| "grad_norm": 2.765166389820208, |
| "learning_rate": 3.44397621659066e-06, |
| "loss": 0.3467, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.6562581166488701, |
| "grad_norm": 3.1348291760774556, |
| "learning_rate": 3.4382035444207125e-06, |
| "loss": 0.348, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.6568353005685261, |
| "grad_norm": 2.6149817919704486, |
| "learning_rate": 3.4324308722507655e-06, |
| "loss": 0.3581, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.6574124844881821, |
| "grad_norm": 2.4071247325320084, |
| "learning_rate": 3.426658200080818e-06, |
| "loss": 0.3425, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.6579896684078381, |
| "grad_norm": 6.966272376463285, |
| "learning_rate": 3.4208855279108703e-06, |
| "loss": 0.3648, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.6585668523274941, |
| "grad_norm": 2.410341785080001, |
| "learning_rate": 3.415112855740923e-06, |
| "loss": 0.3518, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.6591440362471501, |
| "grad_norm": 4.052333811947672, |
| "learning_rate": 3.4093401835709755e-06, |
| "loss": 0.3538, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.6597212201668061, |
| "grad_norm": 5.112675962153542, |
| "learning_rate": 3.403567511401028e-06, |
| "loss": 0.3462, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.6602984040864621, |
| "grad_norm": 2.0711228360250873, |
| "learning_rate": 3.3977948392310807e-06, |
| "loss": 0.3523, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.6608755880061181, |
| "grad_norm": 3.0723705312379677, |
| "learning_rate": 3.3920221670611333e-06, |
| "loss": 0.3538, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.6614527719257741, |
| "grad_norm": 7.22672591716136, |
| "learning_rate": 3.3862494948911854e-06, |
| "loss": 0.3508, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.6620299558454301, |
| "grad_norm": 3.4674616674239447, |
| "learning_rate": 3.380476822721238e-06, |
| "loss": 0.3488, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.6626071397650861, |
| "grad_norm": 3.0526763692239602, |
| "learning_rate": 3.3747041505512906e-06, |
| "loss": 0.3413, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.6631843236847421, |
| "grad_norm": 8.92552529404141, |
| "learning_rate": 3.3689314783813432e-06, |
| "loss": 0.3559, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.6637615076043981, |
| "grad_norm": 7.126184068548845, |
| "learning_rate": 3.363158806211396e-06, |
| "loss": 0.3431, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.6643386915240541, |
| "grad_norm": 3.6795312021204993, |
| "learning_rate": 3.357386134041448e-06, |
| "loss": 0.3644, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.6649158754437101, |
| "grad_norm": 3.1394487426454765, |
| "learning_rate": 3.3516134618715006e-06, |
| "loss": 0.355, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.6654930593633661, |
| "grad_norm": 3.115800444710574, |
| "learning_rate": 3.345840789701553e-06, |
| "loss": 0.3595, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.6660702432830221, |
| "grad_norm": 3.04464377321414, |
| "learning_rate": 3.3400681175316058e-06, |
| "loss": 0.3474, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.6666474272026781, |
| "grad_norm": 11.414003410738056, |
| "learning_rate": 3.3342954453616584e-06, |
| "loss": 0.344, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.6672246111223341, |
| "grad_norm": 7.794881460371124, |
| "learning_rate": 3.328522773191711e-06, |
| "loss": 0.3489, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.6678017950419901, |
| "grad_norm": 3.606879802040075, |
| "learning_rate": 3.322750101021763e-06, |
| "loss": 0.3542, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.6683789789616461, |
| "grad_norm": 3.271233323948874, |
| "learning_rate": 3.3169774288518158e-06, |
| "loss": 0.3572, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.6689561628813021, |
| "grad_norm": 5.312528784803595, |
| "learning_rate": 3.3112047566818683e-06, |
| "loss": 0.344, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.6695333468009581, |
| "grad_norm": 4.414037045732359, |
| "learning_rate": 3.305432084511921e-06, |
| "loss": 0.3757, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.6701105307206141, |
| "grad_norm": 4.420990727422642, |
| "learning_rate": 3.2996594123419735e-06, |
| "loss": 0.3361, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.6706877146402701, |
| "grad_norm": 28.79897728988663, |
| "learning_rate": 3.2938867401720257e-06, |
| "loss": 0.3549, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.6712648985599261, |
| "grad_norm": 5.938341510395738, |
| "learning_rate": 3.2881140680020783e-06, |
| "loss": 0.3528, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.6718420824795821, |
| "grad_norm": 7.625014010774144, |
| "learning_rate": 3.282341395832131e-06, |
| "loss": 0.3569, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.6724192663992381, |
| "grad_norm": 2.627772129382934, |
| "learning_rate": 3.2765687236621835e-06, |
| "loss": 0.341, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.6729964503188941, |
| "grad_norm": 6.6462109495436765, |
| "learning_rate": 3.270796051492236e-06, |
| "loss": 0.3432, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.6735736342385501, |
| "grad_norm": 4.140894146799749, |
| "learning_rate": 3.2650233793222887e-06, |
| "loss": 0.35, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.6741508181582061, |
| "grad_norm": 7.023320528238819, |
| "learning_rate": 3.259250707152341e-06, |
| "loss": 0.3483, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.6747280020778621, |
| "grad_norm": 3.56371765942958, |
| "learning_rate": 3.2534780349823935e-06, |
| "loss": 0.3456, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.6753051859975181, |
| "grad_norm": 4.693701665628699, |
| "learning_rate": 3.247705362812446e-06, |
| "loss": 0.3556, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.6758823699171741, |
| "grad_norm": 4.8769232133317955, |
| "learning_rate": 3.2419326906424987e-06, |
| "loss": 0.3406, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.6764595538368301, |
| "grad_norm": 14.213756604351863, |
| "learning_rate": 3.2361600184725513e-06, |
| "loss": 0.3422, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.6770367377564861, |
| "grad_norm": 4.009649954155962, |
| "learning_rate": 3.2303873463026034e-06, |
| "loss": 0.3449, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.6776139216761421, |
| "grad_norm": 3.538273145119479, |
| "learning_rate": 3.224614674132656e-06, |
| "loss": 0.3457, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.6781911055957981, |
| "grad_norm": 3.0329790960952026, |
| "learning_rate": 3.2188420019627086e-06, |
| "loss": 0.3479, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.6787682895154541, |
| "grad_norm": 8.21491132526687, |
| "learning_rate": 3.2130693297927612e-06, |
| "loss": 0.3577, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.6793454734351101, |
| "grad_norm": 4.981836103874383, |
| "learning_rate": 3.207296657622814e-06, |
| "loss": 0.355, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.6799226573547661, |
| "grad_norm": 5.1547852515451975, |
| "learning_rate": 3.2015239854528664e-06, |
| "loss": 0.3392, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.6804998412744221, |
| "grad_norm": 9.811331834930291, |
| "learning_rate": 3.1957513132829186e-06, |
| "loss": 0.3545, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.6810770251940781, |
| "grad_norm": 4.083480395202693, |
| "learning_rate": 3.189978641112971e-06, |
| "loss": 0.3575, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.6816542091137341, |
| "grad_norm": 4.764988431769556, |
| "learning_rate": 3.184205968943024e-06, |
| "loss": 0.3372, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.6822313930333901, |
| "grad_norm": 6.794798971465098, |
| "learning_rate": 3.1784332967730764e-06, |
| "loss": 0.3539, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.6828085769530461, |
| "grad_norm": 7.111672589883507, |
| "learning_rate": 3.172660624603129e-06, |
| "loss": 0.3544, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.6833857608727021, |
| "grad_norm": 24.124344459801147, |
| "learning_rate": 3.166887952433181e-06, |
| "loss": 0.3602, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.6839629447923581, |
| "grad_norm": 12.424128663469016, |
| "learning_rate": 3.1611152802632338e-06, |
| "loss": 0.3441, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.6845401287120141, |
| "grad_norm": 3.903963843774075, |
| "learning_rate": 3.1553426080932864e-06, |
| "loss": 0.3572, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.6851173126316701, |
| "grad_norm": 7.5954443349430525, |
| "learning_rate": 3.149569935923339e-06, |
| "loss": 0.3459, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.6856944965513261, |
| "grad_norm": 5.147769651660261, |
| "learning_rate": 3.1437972637533915e-06, |
| "loss": 0.3443, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.6862716804709821, |
| "grad_norm": 8.531813464808828, |
| "learning_rate": 3.138024591583444e-06, |
| "loss": 0.3427, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.6868488643906381, |
| "grad_norm": 9.203737344622347, |
| "learning_rate": 3.1322519194134963e-06, |
| "loss": 0.3466, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.6874260483102941, |
| "grad_norm": 4.1913039016792055, |
| "learning_rate": 3.1264792472435498e-06, |
| "loss": 0.3667, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.6880032322299501, |
| "grad_norm": 4.984073528450747, |
| "learning_rate": 3.120706575073602e-06, |
| "loss": 0.3417, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.6885804161496061, |
| "grad_norm": 4.3895825229927725, |
| "learning_rate": 3.1149339029036545e-06, |
| "loss": 0.3341, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.6891576000692621, |
| "grad_norm": 5.323675472371107, |
| "learning_rate": 3.109161230733707e-06, |
| "loss": 0.3341, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.6897347839889181, |
| "grad_norm": 4.263152619263457, |
| "learning_rate": 3.1033885585637597e-06, |
| "loss": 0.3463, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.6903119679085741, |
| "grad_norm": 6.116151443830828, |
| "learning_rate": 3.0976158863938123e-06, |
| "loss": 0.3603, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.6908891518282301, |
| "grad_norm": 10.96676225065857, |
| "learning_rate": 3.0918432142238645e-06, |
| "loss": 0.3317, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.6914663357478861, |
| "grad_norm": 5.204338558399774, |
| "learning_rate": 3.086070542053917e-06, |
| "loss": 0.3432, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.6920435196675421, |
| "grad_norm": 3.481643749818502, |
| "learning_rate": 3.0802978698839697e-06, |
| "loss": 0.3343, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.6926207035871981, |
| "grad_norm": 5.422983882620972, |
| "learning_rate": 3.0745251977140223e-06, |
| "loss": 0.3444, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6931978875068541, |
| "grad_norm": 3.293726670681602, |
| "learning_rate": 3.068752525544075e-06, |
| "loss": 0.3536, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.6937750714265101, |
| "grad_norm": 11.162281611641948, |
| "learning_rate": 3.0629798533741275e-06, |
| "loss": 0.3461, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.6943522553461661, |
| "grad_norm": 4.538803769431588, |
| "learning_rate": 3.0572071812041797e-06, |
| "loss": 0.3404, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.6949294392658221, |
| "grad_norm": 7.297266576912264, |
| "learning_rate": 3.0514345090342323e-06, |
| "loss": 0.3399, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.6955066231854781, |
| "grad_norm": 4.498082885030529, |
| "learning_rate": 3.045661836864285e-06, |
| "loss": 0.3572, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.6960838071051341, |
| "grad_norm": 6.71445000313715, |
| "learning_rate": 3.0398891646943374e-06, |
| "loss": 0.3456, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.6966609910247901, |
| "grad_norm": 4.130838744263147, |
| "learning_rate": 3.03411649252439e-06, |
| "loss": 0.3382, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.6972381749444461, |
| "grad_norm": 2.959998512168581, |
| "learning_rate": 3.0283438203544422e-06, |
| "loss": 0.3441, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.6978153588641021, |
| "grad_norm": 8.68519096842326, |
| "learning_rate": 3.022571148184495e-06, |
| "loss": 0.3536, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.6983925427837581, |
| "grad_norm": 6.068123748807202, |
| "learning_rate": 3.0167984760145474e-06, |
| "loss": 0.3336, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.6989697267034141, |
| "grad_norm": 13.720643945389472, |
| "learning_rate": 3.0110258038446e-06, |
| "loss": 0.3453, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.6995469106230701, |
| "grad_norm": 5.2345993949285115, |
| "learning_rate": 3.0052531316746526e-06, |
| "loss": 0.3514, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.7001240945427261, |
| "grad_norm": 5.995140869193482, |
| "learning_rate": 2.999480459504705e-06, |
| "loss": 0.3465, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.7007012784623821, |
| "grad_norm": 5.534508344244959, |
| "learning_rate": 2.9937077873347574e-06, |
| "loss": 0.3436, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.7012784623820381, |
| "grad_norm": 3.837652212059965, |
| "learning_rate": 2.98793511516481e-06, |
| "loss": 0.347, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.7018556463016941, |
| "grad_norm": 32.35178311186503, |
| "learning_rate": 2.9821624429948626e-06, |
| "loss": 0.3543, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.7024328302213501, |
| "grad_norm": 4.1065566840261125, |
| "learning_rate": 2.976389770824915e-06, |
| "loss": 0.3421, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.7030100141410061, |
| "grad_norm": 6.144592670774153, |
| "learning_rate": 2.9706170986549678e-06, |
| "loss": 0.3466, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.7035871980606621, |
| "grad_norm": 5.684740566371751, |
| "learning_rate": 2.96484442648502e-06, |
| "loss": 0.349, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.704164381980318, |
| "grad_norm": 5.234589933221641, |
| "learning_rate": 2.9590717543150725e-06, |
| "loss": 0.3221, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.704741565899974, |
| "grad_norm": 5.9090591405034205, |
| "learning_rate": 2.953299082145125e-06, |
| "loss": 0.3461, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.70531874981963, |
| "grad_norm": 5.161621785333446, |
| "learning_rate": 2.9475264099751777e-06, |
| "loss": 0.3343, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.705895933739286, |
| "grad_norm": 7.454875045097898, |
| "learning_rate": 2.9417537378052303e-06, |
| "loss": 0.339, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.706473117658942, |
| "grad_norm": 3.3533355785311936, |
| "learning_rate": 2.935981065635283e-06, |
| "loss": 0.3226, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.707050301578598, |
| "grad_norm": 3.3526244501016507, |
| "learning_rate": 2.930208393465335e-06, |
| "loss": 0.3525, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.707627485498254, |
| "grad_norm": 46.63609588889749, |
| "learning_rate": 2.9244357212953877e-06, |
| "loss": 0.3388, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.70820466941791, |
| "grad_norm": 6.343222491694745, |
| "learning_rate": 2.9186630491254403e-06, |
| "loss": 0.3458, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.708781853337566, |
| "grad_norm": 7.406012410848603, |
| "learning_rate": 2.912890376955493e-06, |
| "loss": 0.3523, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.709359037257222, |
| "grad_norm": 4.391956311756113, |
| "learning_rate": 2.9071177047855455e-06, |
| "loss": 0.3462, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.709936221176878, |
| "grad_norm": 7.365023031476813, |
| "learning_rate": 2.9013450326155977e-06, |
| "loss": 0.3507, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.710513405096534, |
| "grad_norm": 8.355149496371373, |
| "learning_rate": 2.8955723604456503e-06, |
| "loss": 0.3516, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.71109058901619, |
| "grad_norm": 4.12597154889129, |
| "learning_rate": 2.889799688275703e-06, |
| "loss": 0.3387, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.711667772935846, |
| "grad_norm": 22.783055812157006, |
| "learning_rate": 2.8840270161057555e-06, |
| "loss": 0.3505, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.712244956855502, |
| "grad_norm": 6.598877289364409, |
| "learning_rate": 2.878254343935808e-06, |
| "loss": 0.3511, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.712822140775158, |
| "grad_norm": 6.658331270547365, |
| "learning_rate": 2.8724816717658606e-06, |
| "loss": 0.3472, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.713399324694814, |
| "grad_norm": 4.249593904236529, |
| "learning_rate": 2.866708999595913e-06, |
| "loss": 0.3462, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.71397650861447, |
| "grad_norm": 4.820541412534286, |
| "learning_rate": 2.8609363274259654e-06, |
| "loss": 0.3327, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.714553692534126, |
| "grad_norm": 29.19756399575411, |
| "learning_rate": 2.855163655256018e-06, |
| "loss": 0.3423, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.715130876453782, |
| "grad_norm": 2.5170693039148695, |
| "learning_rate": 2.8493909830860706e-06, |
| "loss": 0.3507, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.715708060373438, |
| "grad_norm": 4.848573397975011, |
| "learning_rate": 2.843618310916123e-06, |
| "loss": 0.3614, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.716285244293094, |
| "grad_norm": 8.083798553592858, |
| "learning_rate": 2.8378456387461754e-06, |
| "loss": 0.3444, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.71686242821275, |
| "grad_norm": 6.013516986737268, |
| "learning_rate": 2.832072966576228e-06, |
| "loss": 0.3411, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.717439612132406, |
| "grad_norm": 58.40825846546123, |
| "learning_rate": 2.826300294406281e-06, |
| "loss": 0.3639, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.718016796052062, |
| "grad_norm": 3.486306193338465, |
| "learning_rate": 2.8205276222363336e-06, |
| "loss": 0.3508, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.718593979971718, |
| "grad_norm": 11.117203715725822, |
| "learning_rate": 2.814754950066386e-06, |
| "loss": 0.3441, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.719171163891374, |
| "grad_norm": 6.959568782940119, |
| "learning_rate": 2.808982277896439e-06, |
| "loss": 0.355, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.71974834781103, |
| "grad_norm": 4.77575740578489, |
| "learning_rate": 2.8032096057264914e-06, |
| "loss": 0.3634, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.720325531730686, |
| "grad_norm": 12.450518772632652, |
| "learning_rate": 2.797436933556544e-06, |
| "loss": 0.3454, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.720902715650342, |
| "grad_norm": 4.606989122142115, |
| "learning_rate": 2.791664261386596e-06, |
| "loss": 0.338, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.721479899569998, |
| "grad_norm": 4.456459932783003, |
| "learning_rate": 2.7858915892166488e-06, |
| "loss": 0.3474, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.722057083489654, |
| "grad_norm": 4.348978440263518, |
| "learning_rate": 2.7801189170467014e-06, |
| "loss": 0.3488, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.72263426740931, |
| "grad_norm": 3.220553815408838, |
| "learning_rate": 2.774346244876754e-06, |
| "loss": 0.3372, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.723211451328966, |
| "grad_norm": 6.662149536726665, |
| "learning_rate": 2.7685735727068065e-06, |
| "loss": 0.3487, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.723788635248622, |
| "grad_norm": 4.489888078931735, |
| "learning_rate": 2.7628009005368587e-06, |
| "loss": 0.3589, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.724365819168278, |
| "grad_norm": 3.911221624507832, |
| "learning_rate": 2.7570282283669113e-06, |
| "loss": 0.3364, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.724943003087934, |
| "grad_norm": 4.397535770990173, |
| "learning_rate": 2.751255556196964e-06, |
| "loss": 0.3468, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.72552018700759, |
| "grad_norm": 4.138162758687126, |
| "learning_rate": 2.7454828840270165e-06, |
| "loss": 0.351, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.726097370927246, |
| "grad_norm": 3.6393806674007396, |
| "learning_rate": 2.739710211857069e-06, |
| "loss": 0.3439, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.7266745548469019, |
| "grad_norm": 10.959014317841367, |
| "learning_rate": 2.7339375396871217e-06, |
| "loss": 0.3466, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.7272517387665579, |
| "grad_norm": 8.5956598139555, |
| "learning_rate": 2.728164867517174e-06, |
| "loss": 0.3474, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.7278289226862139, |
| "grad_norm": 8.199922135307672, |
| "learning_rate": 2.7223921953472265e-06, |
| "loss": 0.3334, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.7284061066058699, |
| "grad_norm": 3.4555204304735563, |
| "learning_rate": 2.716619523177279e-06, |
| "loss": 0.3437, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.7289832905255259, |
| "grad_norm": 4.838169983017387, |
| "learning_rate": 2.7108468510073317e-06, |
| "loss": 0.3444, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.7295604744451819, |
| "grad_norm": 3.294894158340646, |
| "learning_rate": 2.7050741788373843e-06, |
| "loss": 0.345, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.7301376583648379, |
| "grad_norm": 3.886142779724859, |
| "learning_rate": 2.6993015066674364e-06, |
| "loss": 0.34, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.7307148422844939, |
| "grad_norm": 3.1827555908179814, |
| "learning_rate": 2.693528834497489e-06, |
| "loss": 0.3383, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.7312920262041499, |
| "grad_norm": 20.97526772421675, |
| "learning_rate": 2.6877561623275416e-06, |
| "loss": 0.3416, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.7318692101238059, |
| "grad_norm": 3.474528368008189, |
| "learning_rate": 2.6819834901575942e-06, |
| "loss": 0.3359, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.7324463940434619, |
| "grad_norm": 11.34967364860296, |
| "learning_rate": 2.676210817987647e-06, |
| "loss": 0.3581, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.7330235779631179, |
| "grad_norm": 3.495369176137086, |
| "learning_rate": 2.6704381458176994e-06, |
| "loss": 0.3369, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.7336007618827739, |
| "grad_norm": 4.744362874719428, |
| "learning_rate": 2.6646654736477516e-06, |
| "loss": 0.3502, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.7341779458024299, |
| "grad_norm": 2.554299735229823, |
| "learning_rate": 2.658892801477804e-06, |
| "loss": 0.3547, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.7347551297220859, |
| "grad_norm": 4.114324789692039, |
| "learning_rate": 2.653120129307857e-06, |
| "loss": 0.3406, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.7353323136417419, |
| "grad_norm": 5.99465461001967, |
| "learning_rate": 2.6473474571379094e-06, |
| "loss": 0.3568, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.7359094975613979, |
| "grad_norm": 5.08850145360636, |
| "learning_rate": 2.641574784967962e-06, |
| "loss": 0.3437, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.7364866814810539, |
| "grad_norm": 2.901320860603511, |
| "learning_rate": 2.635802112798014e-06, |
| "loss": 0.334, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.7370638654007099, |
| "grad_norm": 7.740697658957906, |
| "learning_rate": 2.6300294406280668e-06, |
| "loss": 0.3361, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.7376410493203659, |
| "grad_norm": 6.245600892762995, |
| "learning_rate": 2.6242567684581194e-06, |
| "loss": 0.3308, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.7382182332400219, |
| "grad_norm": 5.308126429822944, |
| "learning_rate": 2.618484096288172e-06, |
| "loss": 0.3527, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.7387954171596779, |
| "grad_norm": 4.212291401570202, |
| "learning_rate": 2.6127114241182245e-06, |
| "loss": 0.3409, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.7393726010793339, |
| "grad_norm": 3.7060833092802556, |
| "learning_rate": 2.606938751948277e-06, |
| "loss": 0.337, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.7399497849989899, |
| "grad_norm": 13.303775807909831, |
| "learning_rate": 2.6011660797783293e-06, |
| "loss": 0.3521, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.7405269689186459, |
| "grad_norm": 5.855236173414753, |
| "learning_rate": 2.595393407608382e-06, |
| "loss": 0.344, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.7411041528383019, |
| "grad_norm": 2.862607162292994, |
| "learning_rate": 2.5896207354384345e-06, |
| "loss": 0.3386, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.7416813367579579, |
| "grad_norm": 3.190707127525178, |
| "learning_rate": 2.583848063268487e-06, |
| "loss": 0.3451, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.7422585206776139, |
| "grad_norm": 7.479465788278306, |
| "learning_rate": 2.5780753910985397e-06, |
| "loss": 0.3418, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.7428357045972699, |
| "grad_norm": 3.9314452580240795, |
| "learning_rate": 2.572302718928592e-06, |
| "loss": 0.3558, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.7434128885169259, |
| "grad_norm": 2.586918485101635, |
| "learning_rate": 2.5665300467586445e-06, |
| "loss": 0.3521, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.7439900724365819, |
| "grad_norm": 5.285658124727487, |
| "learning_rate": 2.560757374588697e-06, |
| "loss": 0.3467, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.7445672563562379, |
| "grad_norm": 16.29975276837285, |
| "learning_rate": 2.5549847024187497e-06, |
| "loss": 0.338, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.7451444402758939, |
| "grad_norm": 5.18800369782506, |
| "learning_rate": 2.5492120302488023e-06, |
| "loss": 0.3423, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.7457216241955499, |
| "grad_norm": 2.7621346083831737, |
| "learning_rate": 2.543439358078855e-06, |
| "loss": 0.3474, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.7462988081152059, |
| "grad_norm": 9.136730427994907, |
| "learning_rate": 2.537666685908907e-06, |
| "loss": 0.3599, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.7468759920348619, |
| "grad_norm": 4.494380679642752, |
| "learning_rate": 2.5318940137389596e-06, |
| "loss": 0.3509, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.7474531759545179, |
| "grad_norm": 3.8278742483492554, |
| "learning_rate": 2.5261213415690122e-06, |
| "loss": 0.3419, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.7480303598741739, |
| "grad_norm": 13.52933963754357, |
| "learning_rate": 2.5203486693990653e-06, |
| "loss": 0.3255, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.7486075437938299, |
| "grad_norm": 4.13788172504123, |
| "learning_rate": 2.514575997229118e-06, |
| "loss": 0.3446, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.7491847277134859, |
| "grad_norm": 6.859750860335564, |
| "learning_rate": 2.5088033250591704e-06, |
| "loss": 0.3586, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.7497619116331419, |
| "grad_norm": 9.235304443739427, |
| "learning_rate": 2.503030652889223e-06, |
| "loss": 0.3393, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.7503390955527979, |
| "grad_norm": 5.829087177972612, |
| "learning_rate": 2.4972579807192752e-06, |
| "loss": 0.3518, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.7509162794724539, |
| "grad_norm": 4.209497926424704, |
| "learning_rate": 2.491485308549328e-06, |
| "loss": 0.3392, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.7514934633921099, |
| "grad_norm": 7.292629657573057, |
| "learning_rate": 2.48571263637938e-06, |
| "loss": 0.3253, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.7520706473117659, |
| "grad_norm": 2.647834968655611, |
| "learning_rate": 2.4799399642094326e-06, |
| "loss": 0.3304, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.7526478312314219, |
| "grad_norm": 3.2625519939417638, |
| "learning_rate": 2.474167292039485e-06, |
| "loss": 0.3389, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.7532250151510779, |
| "grad_norm": 11.316624355429083, |
| "learning_rate": 2.4683946198695378e-06, |
| "loss": 0.3369, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.7538021990707339, |
| "grad_norm": 9.673257149513491, |
| "learning_rate": 2.4626219476995904e-06, |
| "loss": 0.3334, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.7543793829903899, |
| "grad_norm": 4.342384289505803, |
| "learning_rate": 2.4568492755296426e-06, |
| "loss": 0.3464, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.7549565669100459, |
| "grad_norm": 11.18476020427808, |
| "learning_rate": 2.451076603359695e-06, |
| "loss": 0.3427, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.7555337508297019, |
| "grad_norm": 3.7726393590227274, |
| "learning_rate": 2.445303931189748e-06, |
| "loss": 0.334, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.7561109347493579, |
| "grad_norm": 12.714136611436194, |
| "learning_rate": 2.4395312590198008e-06, |
| "loss": 0.3268, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.7566881186690139, |
| "grad_norm": 2.1269070192971307, |
| "learning_rate": 2.433758586849853e-06, |
| "loss": 0.3441, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.7572653025886699, |
| "grad_norm": 6.054989937046523, |
| "learning_rate": 2.4279859146799055e-06, |
| "loss": 0.3428, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.7578424865083259, |
| "grad_norm": 4.270071372155915, |
| "learning_rate": 2.422213242509958e-06, |
| "loss": 0.3539, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.7584196704279819, |
| "grad_norm": 3.1183499805433814, |
| "learning_rate": 2.4164405703400107e-06, |
| "loss": 0.3347, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.7589968543476379, |
| "grad_norm": 8.120134607992474, |
| "learning_rate": 2.4106678981700633e-06, |
| "loss": 0.3513, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.7595740382672939, |
| "grad_norm": 4.275672737070938, |
| "learning_rate": 2.4048952260001155e-06, |
| "loss": 0.3333, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.7601512221869499, |
| "grad_norm": 4.309409121092216, |
| "learning_rate": 2.399122553830168e-06, |
| "loss": 0.342, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.7607284061066059, |
| "grad_norm": 14.975672326288917, |
| "learning_rate": 2.3933498816602207e-06, |
| "loss": 0.3491, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.7613055900262619, |
| "grad_norm": 3.3391093670284984, |
| "learning_rate": 2.3875772094902733e-06, |
| "loss": 0.3377, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.7618827739459179, |
| "grad_norm": 9.934238745041949, |
| "learning_rate": 2.381804537320326e-06, |
| "loss": 0.3316, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.7624599578655739, |
| "grad_norm": 3.323343149796346, |
| "learning_rate": 2.3760318651503785e-06, |
| "loss": 0.3217, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.7630371417852299, |
| "grad_norm": 5.084030929299541, |
| "learning_rate": 2.3702591929804307e-06, |
| "loss": 0.3582, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.7636143257048859, |
| "grad_norm": 7.012741879467996, |
| "learning_rate": 2.3644865208104833e-06, |
| "loss": 0.3433, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.7641915096245419, |
| "grad_norm": 4.5407340579502025, |
| "learning_rate": 2.358713848640536e-06, |
| "loss": 0.3388, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.7647686935441979, |
| "grad_norm": 4.194705290127997, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 0.3312, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.7653458774638539, |
| "grad_norm": 3.7140413786909203, |
| "learning_rate": 2.347168504300641e-06, |
| "loss": 0.3496, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.7659230613835099, |
| "grad_norm": 3.257080159852974, |
| "learning_rate": 2.3413958321306936e-06, |
| "loss": 0.3366, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.7665002453031659, |
| "grad_norm": 5.234732296881502, |
| "learning_rate": 2.335623159960746e-06, |
| "loss": 0.3427, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.7670774292228218, |
| "grad_norm": 6.870037441960529, |
| "learning_rate": 2.3298504877907984e-06, |
| "loss": 0.3354, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.7676546131424778, |
| "grad_norm": 9.876776601961089, |
| "learning_rate": 2.324077815620851e-06, |
| "loss": 0.3315, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.7682317970621338, |
| "grad_norm": 5.912982961911957, |
| "learning_rate": 2.3183051434509036e-06, |
| "loss": 0.3485, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.7688089809817898, |
| "grad_norm": 4.832716437104076, |
| "learning_rate": 2.312532471280956e-06, |
| "loss": 0.3348, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.7693861649014458, |
| "grad_norm": 5.098375721379664, |
| "learning_rate": 2.3067597991110084e-06, |
| "loss": 0.3484, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.7699633488211018, |
| "grad_norm": 4.63656346157866, |
| "learning_rate": 2.300987126941061e-06, |
| "loss": 0.3366, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.7705405327407578, |
| "grad_norm": 6.588466951994697, |
| "learning_rate": 2.2952144547711136e-06, |
| "loss": 0.3457, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.7711177166604138, |
| "grad_norm": 4.824094255832894, |
| "learning_rate": 2.2894417826011666e-06, |
| "loss": 0.341, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.7716949005800698, |
| "grad_norm": 4.637581573877111, |
| "learning_rate": 2.2836691104312188e-06, |
| "loss": 0.3554, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.7722720844997258, |
| "grad_norm": 6.056831078560241, |
| "learning_rate": 2.2778964382612714e-06, |
| "loss": 0.3186, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.7728492684193818, |
| "grad_norm": 45.47481090136826, |
| "learning_rate": 2.272123766091324e-06, |
| "loss": 0.3299, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.7734264523390378, |
| "grad_norm": 22.884051964592864, |
| "learning_rate": 2.2663510939213766e-06, |
| "loss": 0.3392, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.7740036362586938, |
| "grad_norm": 6.492791637464672, |
| "learning_rate": 2.260578421751429e-06, |
| "loss": 0.3436, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.7745808201783498, |
| "grad_norm": 5.820865415832969, |
| "learning_rate": 2.2548057495814813e-06, |
| "loss": 0.3347, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.7751580040980058, |
| "grad_norm": 3.6093618399609406, |
| "learning_rate": 2.249033077411534e-06, |
| "loss": 0.3457, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.7757351880176618, |
| "grad_norm": 3.8630308240200977, |
| "learning_rate": 2.2432604052415865e-06, |
| "loss": 0.345, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.7763123719373178, |
| "grad_norm": 5.534539731109275, |
| "learning_rate": 2.237487733071639e-06, |
| "loss": 0.3383, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.7768895558569738, |
| "grad_norm": 9.50423287415909, |
| "learning_rate": 2.2317150609016917e-06, |
| "loss": 0.3528, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.7774667397766298, |
| "grad_norm": 11.597704923923128, |
| "learning_rate": 2.2259423887317443e-06, |
| "loss": 0.3482, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.7780439236962858, |
| "grad_norm": 4.724944252593318, |
| "learning_rate": 2.2201697165617965e-06, |
| "loss": 0.341, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.7786211076159418, |
| "grad_norm": 4.741588520697521, |
| "learning_rate": 2.214397044391849e-06, |
| "loss": 0.3443, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.7791982915355978, |
| "grad_norm": 4.723745867093743, |
| "learning_rate": 2.2086243722219017e-06, |
| "loss": 0.3468, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.7797754754552538, |
| "grad_norm": 4.302802386626385, |
| "learning_rate": 2.2028517000519543e-06, |
| "loss": 0.3534, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.7803526593749098, |
| "grad_norm": 3.6957459272751385, |
| "learning_rate": 2.197079027882007e-06, |
| "loss": 0.3438, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.7809298432945658, |
| "grad_norm": 9.02964602805584, |
| "learning_rate": 2.191306355712059e-06, |
| "loss": 0.3507, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.7815070272142218, |
| "grad_norm": 4.584120438677978, |
| "learning_rate": 2.1855336835421117e-06, |
| "loss": 0.3392, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.7820842111338778, |
| "grad_norm": 3.634370316749477, |
| "learning_rate": 2.1797610113721642e-06, |
| "loss": 0.3434, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.7826613950535338, |
| "grad_norm": 7.974537610574205, |
| "learning_rate": 2.173988339202217e-06, |
| "loss": 0.3488, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.7832385789731898, |
| "grad_norm": 15.500270051358006, |
| "learning_rate": 2.1682156670322694e-06, |
| "loss": 0.3319, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.7838157628928458, |
| "grad_norm": 3.6214865336598288, |
| "learning_rate": 2.162442994862322e-06, |
| "loss": 0.3431, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.7843929468125018, |
| "grad_norm": 4.251663962722991, |
| "learning_rate": 2.1566703226923742e-06, |
| "loss": 0.339, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.7849701307321578, |
| "grad_norm": 5.8477391728665875, |
| "learning_rate": 2.150897650522427e-06, |
| "loss": 0.3425, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.7855473146518138, |
| "grad_norm": 12.661348588140084, |
| "learning_rate": 2.1451249783524794e-06, |
| "loss": 0.3385, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.7861244985714698, |
| "grad_norm": 3.153875030872274, |
| "learning_rate": 2.139352306182532e-06, |
| "loss": 0.3429, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.7867016824911258, |
| "grad_norm": 7.53800643811605, |
| "learning_rate": 2.1335796340125846e-06, |
| "loss": 0.3492, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.7872788664107818, |
| "grad_norm": 4.143985301762935, |
| "learning_rate": 2.127806961842637e-06, |
| "loss": 0.3361, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.7878560503304378, |
| "grad_norm": 3.005269949918135, |
| "learning_rate": 2.12203428967269e-06, |
| "loss": 0.3405, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.7884332342500938, |
| "grad_norm": 3.018587955955484, |
| "learning_rate": 2.1162616175027424e-06, |
| "loss": 0.3447, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.7890104181697498, |
| "grad_norm": 3.002589774112856, |
| "learning_rate": 2.110488945332795e-06, |
| "loss": 0.3253, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.7895876020894058, |
| "grad_norm": 3.85103590680487, |
| "learning_rate": 2.104716273162847e-06, |
| "loss": 0.3395, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.7901647860090618, |
| "grad_norm": 4.040110821545416, |
| "learning_rate": 2.0989436009928998e-06, |
| "loss": 0.3439, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.7907419699287178, |
| "grad_norm": 8.395833969271274, |
| "learning_rate": 2.0931709288229524e-06, |
| "loss": 0.3334, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.7913191538483738, |
| "grad_norm": 8.808629294549078, |
| "learning_rate": 2.087398256653005e-06, |
| "loss": 0.3516, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.7918963377680298, |
| "grad_norm": 5.183013748264493, |
| "learning_rate": 2.0816255844830576e-06, |
| "loss": 0.3468, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.7924735216876858, |
| "grad_norm": 4.651858039147579, |
| "learning_rate": 2.0758529123131097e-06, |
| "loss": 0.3383, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.7930507056073418, |
| "grad_norm": 5.147274649825693, |
| "learning_rate": 2.0700802401431623e-06, |
| "loss": 0.3435, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.7936278895269978, |
| "grad_norm": 3.3157064357282646, |
| "learning_rate": 2.064307567973215e-06, |
| "loss": 0.3374, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.7942050734466538, |
| "grad_norm": 5.359738899671234, |
| "learning_rate": 2.0585348958032675e-06, |
| "loss": 0.3519, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.7947822573663098, |
| "grad_norm": 3.232416306077255, |
| "learning_rate": 2.05276222363332e-06, |
| "loss": 0.3466, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.7953594412859658, |
| "grad_norm": 5.634484175482586, |
| "learning_rate": 2.0469895514633727e-06, |
| "loss": 0.3435, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.7959366252056218, |
| "grad_norm": 4.812177943413768, |
| "learning_rate": 2.041216879293425e-06, |
| "loss": 0.3394, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.7965138091252778, |
| "grad_norm": 4.18690284048581, |
| "learning_rate": 2.0354442071234775e-06, |
| "loss": 0.339, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.7970909930449338, |
| "grad_norm": 2.6302278323472024, |
| "learning_rate": 2.02967153495353e-06, |
| "loss": 0.3211, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.7976681769645898, |
| "grad_norm": 3.7677609162356567, |
| "learning_rate": 2.0238988627835827e-06, |
| "loss": 0.3459, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.7982453608842458, |
| "grad_norm": 3.877790721238228, |
| "learning_rate": 2.0181261906136353e-06, |
| "loss": 0.3527, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.7988225448039018, |
| "grad_norm": 3.3882391753576897, |
| "learning_rate": 2.0123535184436874e-06, |
| "loss": 0.3418, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.7993997287235578, |
| "grad_norm": 8.275704148522843, |
| "learning_rate": 2.00658084627374e-06, |
| "loss": 0.3584, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.7999769126432138, |
| "grad_norm": 4.07120977222577, |
| "learning_rate": 2.0008081741037926e-06, |
| "loss": 0.3369, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.8005540965628698, |
| "grad_norm": 6.796016189984939, |
| "learning_rate": 1.9950355019338452e-06, |
| "loss": 0.3359, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.8011312804825258, |
| "grad_norm": 4.70618096154267, |
| "learning_rate": 1.989262829763898e-06, |
| "loss": 0.3383, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.8017084644021818, |
| "grad_norm": 9.622472004428674, |
| "learning_rate": 1.9834901575939504e-06, |
| "loss": 0.3492, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.8022856483218378, |
| "grad_norm": 20.930166870041855, |
| "learning_rate": 1.977717485424003e-06, |
| "loss": 0.3343, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.8028628322414938, |
| "grad_norm": 10.85418413095689, |
| "learning_rate": 1.9719448132540556e-06, |
| "loss": 0.3399, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.8034400161611498, |
| "grad_norm": 3.9293112562677353, |
| "learning_rate": 1.9661721410841082e-06, |
| "loss": 0.363, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.8040172000808058, |
| "grad_norm": 10.773909045381199, |
| "learning_rate": 1.960399468914161e-06, |
| "loss": 0.3427, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.8045943840004618, |
| "grad_norm": 4.932078250676689, |
| "learning_rate": 1.954626796744213e-06, |
| "loss": 0.3353, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.8051715679201178, |
| "grad_norm": 5.376321864962683, |
| "learning_rate": 1.9488541245742656e-06, |
| "loss": 0.3502, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.8057487518397738, |
| "grad_norm": 4.014977881261309, |
| "learning_rate": 1.943081452404318e-06, |
| "loss": 0.3169, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.8063259357594298, |
| "grad_norm": 8.651359182721874, |
| "learning_rate": 1.9373087802343708e-06, |
| "loss": 0.3349, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.8069031196790858, |
| "grad_norm": 4.683539873403375, |
| "learning_rate": 1.9315361080644234e-06, |
| "loss": 0.3366, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.8074803035987418, |
| "grad_norm": 7.755677266741849, |
| "learning_rate": 1.9257634358944756e-06, |
| "loss": 0.3377, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.8080574875183978, |
| "grad_norm": 14.533972050557356, |
| "learning_rate": 1.919990763724528e-06, |
| "loss": 0.3337, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.8086346714380538, |
| "grad_norm": 3.5287179981983052, |
| "learning_rate": 1.9142180915545807e-06, |
| "loss": 0.3371, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.8092118553577098, |
| "grad_norm": 3.6897961566375588, |
| "learning_rate": 1.9084454193846333e-06, |
| "loss": 0.3437, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.8097890392773658, |
| "grad_norm": 4.275916003879041, |
| "learning_rate": 1.9026727472146857e-06, |
| "loss": 0.3505, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.8103662231970218, |
| "grad_norm": 5.853581087992128, |
| "learning_rate": 1.8969000750447383e-06, |
| "loss": 0.3447, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.8109434071166778, |
| "grad_norm": 4.456219664367194, |
| "learning_rate": 1.891127402874791e-06, |
| "loss": 0.3405, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.8115205910363338, |
| "grad_norm": 3.609665854590679, |
| "learning_rate": 1.8853547307048433e-06, |
| "loss": 0.3539, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.8120977749559897, |
| "grad_norm": 8.81894657017933, |
| "learning_rate": 1.879582058534896e-06, |
| "loss": 0.3434, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.8126749588756457, |
| "grad_norm": 73.62257234022623, |
| "learning_rate": 1.8738093863649485e-06, |
| "loss": 0.3362, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.8132521427953017, |
| "grad_norm": 4.843976388543906, |
| "learning_rate": 1.8680367141950009e-06, |
| "loss": 0.3522, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.8138293267149577, |
| "grad_norm": 6.3647153836539925, |
| "learning_rate": 1.8622640420250535e-06, |
| "loss": 0.3351, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.8144065106346137, |
| "grad_norm": 15.067604508734842, |
| "learning_rate": 1.8564913698551059e-06, |
| "loss": 0.341, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.8149836945542697, |
| "grad_norm": 2.837118277081176, |
| "learning_rate": 1.8507186976851585e-06, |
| "loss": 0.3433, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.8155608784739257, |
| "grad_norm": 3.8243460158503337, |
| "learning_rate": 1.844946025515211e-06, |
| "loss": 0.3433, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.8161380623935817, |
| "grad_norm": 5.476403512789154, |
| "learning_rate": 1.8391733533452635e-06, |
| "loss": 0.3507, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.8167152463132377, |
| "grad_norm": 7.503101973214223, |
| "learning_rate": 1.8334006811753163e-06, |
| "loss": 0.3426, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.8172924302328937, |
| "grad_norm": 13.62010751923973, |
| "learning_rate": 1.8276280090053689e-06, |
| "loss": 0.3467, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.8178696141525497, |
| "grad_norm": 5.792511670187818, |
| "learning_rate": 1.8218553368354215e-06, |
| "loss": 0.3618, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.8184467980722057, |
| "grad_norm": 3.268214743983259, |
| "learning_rate": 1.8160826646654738e-06, |
| "loss": 0.3383, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.8190239819918617, |
| "grad_norm": 4.2120443389536, |
| "learning_rate": 1.8103099924955264e-06, |
| "loss": 0.3444, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.8196011659115177, |
| "grad_norm": 5.497732760033444, |
| "learning_rate": 1.804537320325579e-06, |
| "loss": 0.3321, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.8201783498311737, |
| "grad_norm": 3.752608960335712, |
| "learning_rate": 1.7987646481556314e-06, |
| "loss": 0.34, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.8207555337508297, |
| "grad_norm": 5.61044709148666, |
| "learning_rate": 1.792991975985684e-06, |
| "loss": 0.3477, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.8213327176704857, |
| "grad_norm": 3.2289347743222, |
| "learning_rate": 1.7872193038157364e-06, |
| "loss": 0.3354, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.8219099015901417, |
| "grad_norm": 4.132295269856051, |
| "learning_rate": 1.781446631645789e-06, |
| "loss": 0.3454, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.8224870855097977, |
| "grad_norm": 8.94256483462556, |
| "learning_rate": 1.7756739594758416e-06, |
| "loss": 0.3437, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.8230642694294537, |
| "grad_norm": 6.100086954024239, |
| "learning_rate": 1.769901287305894e-06, |
| "loss": 0.3358, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.8236414533491097, |
| "grad_norm": 4.7378243458253255, |
| "learning_rate": 1.7641286151359466e-06, |
| "loss": 0.3357, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.8242186372687657, |
| "grad_norm": 4.402684079414389, |
| "learning_rate": 1.7583559429659992e-06, |
| "loss": 0.346, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.8247958211884217, |
| "grad_norm": 7.94048495060501, |
| "learning_rate": 1.7525832707960516e-06, |
| "loss": 0.3334, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.8253730051080777, |
| "grad_norm": 4.248624676291536, |
| "learning_rate": 1.7468105986261042e-06, |
| "loss": 0.3348, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.8259501890277336, |
| "grad_norm": 5.863500294171999, |
| "learning_rate": 1.7410379264561568e-06, |
| "loss": 0.3545, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.8265273729473896, |
| "grad_norm": 5.462151225452732, |
| "learning_rate": 1.7352652542862091e-06, |
| "loss": 0.3324, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.8271045568670456, |
| "grad_norm": 2.757724626485063, |
| "learning_rate": 1.7294925821162617e-06, |
| "loss": 0.3411, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.8276817407867016, |
| "grad_norm": 2.5548734181876456, |
| "learning_rate": 1.7237199099463141e-06, |
| "loss": 0.3278, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.8282589247063576, |
| "grad_norm": 4.871936623866052, |
| "learning_rate": 1.7179472377763667e-06, |
| "loss": 0.3306, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.8288361086260136, |
| "grad_norm": 3.10101382610628, |
| "learning_rate": 1.7121745656064193e-06, |
| "loss": 0.3387, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.8294132925456696, |
| "grad_norm": 10.575051742858964, |
| "learning_rate": 1.7064018934364717e-06, |
| "loss": 0.3428, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.8299904764653256, |
| "grad_norm": 4.367916255458309, |
| "learning_rate": 1.7006292212665243e-06, |
| "loss": 0.3397, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.8305676603849816, |
| "grad_norm": 6.616840660969775, |
| "learning_rate": 1.694856549096577e-06, |
| "loss": 0.331, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.8311448443046376, |
| "grad_norm": 12.71189460753895, |
| "learning_rate": 1.6890838769266293e-06, |
| "loss": 0.3352, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.8317220282242936, |
| "grad_norm": 8.817998813310439, |
| "learning_rate": 1.683311204756682e-06, |
| "loss": 0.3425, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.8322992121439496, |
| "grad_norm": 4.766835462814113, |
| "learning_rate": 1.6775385325867347e-06, |
| "loss": 0.3398, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.8328763960636056, |
| "grad_norm": 4.169057753174133, |
| "learning_rate": 1.6717658604167873e-06, |
| "loss": 0.3294, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.8334535799832616, |
| "grad_norm": 5.150260088289917, |
| "learning_rate": 1.6659931882468397e-06, |
| "loss": 0.3273, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.8340307639029176, |
| "grad_norm": 5.053684619580007, |
| "learning_rate": 1.6602205160768923e-06, |
| "loss": 0.3642, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.8346079478225736, |
| "grad_norm": 4.068134996065944, |
| "learning_rate": 1.6544478439069447e-06, |
| "loss": 0.3441, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.8351851317422296, |
| "grad_norm": 4.004964659229183, |
| "learning_rate": 1.6486751717369972e-06, |
| "loss": 0.3408, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.8357623156618856, |
| "grad_norm": 4.02559591240971, |
| "learning_rate": 1.6429024995670498e-06, |
| "loss": 0.3385, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.8363394995815416, |
| "grad_norm": 5.168203382157519, |
| "learning_rate": 1.6371298273971022e-06, |
| "loss": 0.3424, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.8369166835011976, |
| "grad_norm": 6.3562693056901285, |
| "learning_rate": 1.6313571552271548e-06, |
| "loss": 0.3432, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.8374938674208536, |
| "grad_norm": 7.105132387517162, |
| "learning_rate": 1.6255844830572074e-06, |
| "loss": 0.3385, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.8380710513405096, |
| "grad_norm": 5.167657366774912, |
| "learning_rate": 1.6198118108872598e-06, |
| "loss": 0.3357, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.8386482352601656, |
| "grad_norm": 5.322779082569645, |
| "learning_rate": 1.6140391387173124e-06, |
| "loss": 0.351, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.8392254191798216, |
| "grad_norm": 5.555960049234855, |
| "learning_rate": 1.608266466547365e-06, |
| "loss": 0.3427, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.8398026030994776, |
| "grad_norm": 14.30097637805443, |
| "learning_rate": 1.6024937943774174e-06, |
| "loss": 0.3268, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.8403797870191336, |
| "grad_norm": 5.930941981679822, |
| "learning_rate": 1.59672112220747e-06, |
| "loss": 0.3378, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.8409569709387896, |
| "grad_norm": 6.41989158034055, |
| "learning_rate": 1.5909484500375224e-06, |
| "loss": 0.3425, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.8415341548584456, |
| "grad_norm": 5.309996017042821, |
| "learning_rate": 1.585175777867575e-06, |
| "loss": 0.347, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.8421113387781016, |
| "grad_norm": 5.483533161522683, |
| "learning_rate": 1.5794031056976276e-06, |
| "loss": 0.3292, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.8426885226977576, |
| "grad_norm": 5.692527937195376, |
| "learning_rate": 1.57363043352768e-06, |
| "loss": 0.3377, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.8432657066174136, |
| "grad_norm": 4.521114906145438, |
| "learning_rate": 1.5678577613577325e-06, |
| "loss": 0.33, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.8438428905370696, |
| "grad_norm": 5.387502803416387, |
| "learning_rate": 1.5620850891877851e-06, |
| "loss": 0.3418, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.8444200744567256, |
| "grad_norm": 3.5934061953432783, |
| "learning_rate": 1.5563124170178375e-06, |
| "loss": 0.3386, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.8449972583763816, |
| "grad_norm": 11.194143114734654, |
| "learning_rate": 1.5505397448478901e-06, |
| "loss": 0.345, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.8455744422960376, |
| "grad_norm": 10.094088344395155, |
| "learning_rate": 1.5447670726779427e-06, |
| "loss": 0.3526, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.8461516262156936, |
| "grad_norm": 3.4477998903369538, |
| "learning_rate": 1.5389944005079951e-06, |
| "loss": 0.3373, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.8467288101353496, |
| "grad_norm": 7.047707090908251, |
| "learning_rate": 1.533221728338048e-06, |
| "loss": 0.3403, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.8473059940550056, |
| "grad_norm": 2.8576436370742893, |
| "learning_rate": 1.5274490561681005e-06, |
| "loss": 0.3453, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.8478831779746616, |
| "grad_norm": 12.938468126614902, |
| "learning_rate": 1.521676383998153e-06, |
| "loss": 0.336, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.8484603618943176, |
| "grad_norm": 5.011512190053106, |
| "learning_rate": 1.5159037118282055e-06, |
| "loss": 0.3424, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.8490375458139736, |
| "grad_norm": 5.063829942230378, |
| "learning_rate": 1.510131039658258e-06, |
| "loss": 0.346, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.8496147297336296, |
| "grad_norm": 6.679043646426254, |
| "learning_rate": 1.5043583674883105e-06, |
| "loss": 0.3422, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.8501919136532856, |
| "grad_norm": 5.800788583586723, |
| "learning_rate": 1.498585695318363e-06, |
| "loss": 0.3421, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.8507690975729416, |
| "grad_norm": 3.968739599812534, |
| "learning_rate": 1.4928130231484157e-06, |
| "loss": 0.3395, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.8513462814925976, |
| "grad_norm": 7.880056504602316, |
| "learning_rate": 1.487040350978468e-06, |
| "loss": 0.3326, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.8519234654122536, |
| "grad_norm": 5.685089944033708, |
| "learning_rate": 1.4812676788085207e-06, |
| "loss": 0.3356, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.8525006493319096, |
| "grad_norm": 4.795808126193561, |
| "learning_rate": 1.475495006638573e-06, |
| "loss": 0.316, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.8530778332515656, |
| "grad_norm": 6.693555623774071, |
| "learning_rate": 1.4697223344686256e-06, |
| "loss": 0.3416, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.8536550171712216, |
| "grad_norm": 5.752237694272574, |
| "learning_rate": 1.4639496622986782e-06, |
| "loss": 0.3348, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.8542322010908776, |
| "grad_norm": 7.135726397784308, |
| "learning_rate": 1.4581769901287306e-06, |
| "loss": 0.3378, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.8548093850105336, |
| "grad_norm": 3.2833774570611234, |
| "learning_rate": 1.4524043179587832e-06, |
| "loss": 0.3504, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.8553865689301896, |
| "grad_norm": 6.256462721047408, |
| "learning_rate": 1.4466316457888358e-06, |
| "loss": 0.349, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.8559637528498456, |
| "grad_norm": 6.040295806596955, |
| "learning_rate": 1.4408589736188882e-06, |
| "loss": 0.3208, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.8565409367695016, |
| "grad_norm": 4.729301211824621, |
| "learning_rate": 1.4350863014489408e-06, |
| "loss": 0.3277, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.8571181206891576, |
| "grad_norm": 4.313779706679082, |
| "learning_rate": 1.4293136292789934e-06, |
| "loss": 0.3306, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.8576953046088136, |
| "grad_norm": 11.790043476255672, |
| "learning_rate": 1.4235409571090458e-06, |
| "loss": 0.3391, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.8582724885284696, |
| "grad_norm": 7.642639050872643, |
| "learning_rate": 1.4177682849390984e-06, |
| "loss": 0.3388, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.8588496724481256, |
| "grad_norm": 7.260077362208394, |
| "learning_rate": 1.4119956127691508e-06, |
| "loss": 0.3422, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.8594268563677816, |
| "grad_norm": 4.754130590048299, |
| "learning_rate": 1.4062229405992034e-06, |
| "loss": 0.3436, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.8600040402874376, |
| "grad_norm": 6.225554657816755, |
| "learning_rate": 1.400450268429256e-06, |
| "loss": 0.3352, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.8605812242070936, |
| "grad_norm": 9.369073827925245, |
| "learning_rate": 1.3946775962593083e-06, |
| "loss": 0.3552, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.8611584081267496, |
| "grad_norm": 22.876915271022913, |
| "learning_rate": 1.388904924089361e-06, |
| "loss": 0.3492, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.8617355920464056, |
| "grad_norm": 3.2981136451706132, |
| "learning_rate": 1.3831322519194135e-06, |
| "loss": 0.3328, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.8623127759660616, |
| "grad_norm": 15.61626227774467, |
| "learning_rate": 1.3773595797494663e-06, |
| "loss": 0.337, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.8628899598857176, |
| "grad_norm": 3.605130100716397, |
| "learning_rate": 1.3715869075795187e-06, |
| "loss": 0.3385, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.8634671438053736, |
| "grad_norm": 12.284387392936685, |
| "learning_rate": 1.3658142354095713e-06, |
| "loss": 0.338, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.8640443277250296, |
| "grad_norm": 3.278014138295641, |
| "learning_rate": 1.360041563239624e-06, |
| "loss": 0.3337, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.8646215116446856, |
| "grad_norm": 6.104850658183799, |
| "learning_rate": 1.3542688910696763e-06, |
| "loss": 0.3276, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.8651986955643416, |
| "grad_norm": 5.279235884437097, |
| "learning_rate": 1.348496218899729e-06, |
| "loss": 0.3289, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.8657758794839976, |
| "grad_norm": 8.449467479597608, |
| "learning_rate": 1.3427235467297813e-06, |
| "loss": 0.3314, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.8663530634036536, |
| "grad_norm": 5.665683889458834, |
| "learning_rate": 1.3369508745598339e-06, |
| "loss": 0.3416, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.8669302473233096, |
| "grad_norm": 3.9335607560335735, |
| "learning_rate": 1.3311782023898865e-06, |
| "loss": 0.3525, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.8675074312429656, |
| "grad_norm": 2.894014459167942, |
| "learning_rate": 1.3254055302199389e-06, |
| "loss": 0.3328, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.8680846151626216, |
| "grad_norm": 65.75688988910291, |
| "learning_rate": 1.3196328580499915e-06, |
| "loss": 0.331, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.8686617990822776, |
| "grad_norm": 61.00701606633841, |
| "learning_rate": 1.313860185880044e-06, |
| "loss": 0.3407, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.8692389830019336, |
| "grad_norm": 4.364812051967769, |
| "learning_rate": 1.3080875137100965e-06, |
| "loss": 0.335, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.8698161669215896, |
| "grad_norm": 24.662511548298095, |
| "learning_rate": 1.302314841540149e-06, |
| "loss": 0.3348, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.8703933508412456, |
| "grad_norm": 4.160466497716753, |
| "learning_rate": 1.2965421693702016e-06, |
| "loss": 0.3252, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.8709705347609016, |
| "grad_norm": 6.134539334165056, |
| "learning_rate": 1.290769497200254e-06, |
| "loss": 0.343, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.8715477186805576, |
| "grad_norm": 7.228713295937482, |
| "learning_rate": 1.2849968250303066e-06, |
| "loss": 0.3338, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.8721249026002136, |
| "grad_norm": 4.918201123965152, |
| "learning_rate": 1.279224152860359e-06, |
| "loss": 0.3527, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.8727020865198696, |
| "grad_norm": 11.079488033226095, |
| "learning_rate": 1.2734514806904116e-06, |
| "loss": 0.3485, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.8732792704395256, |
| "grad_norm": 4.928609183215457, |
| "learning_rate": 1.2676788085204642e-06, |
| "loss": 0.3354, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.8738564543591816, |
| "grad_norm": 6.740385950730952, |
| "learning_rate": 1.2619061363505166e-06, |
| "loss": 0.333, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.8744336382788376, |
| "grad_norm": 4.67490770106929, |
| "learning_rate": 1.2561334641805692e-06, |
| "loss": 0.331, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.8750108221984936, |
| "grad_norm": 13.018881009854521, |
| "learning_rate": 1.2503607920106218e-06, |
| "loss": 0.3418, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.8755880061181496, |
| "grad_norm": 5.447550954730696, |
| "learning_rate": 1.2445881198406744e-06, |
| "loss": 0.3372, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.8761651900378056, |
| "grad_norm": 3.273410656701021, |
| "learning_rate": 1.238815447670727e-06, |
| "loss": 0.3199, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.8767423739574616, |
| "grad_norm": 4.277523732895546, |
| "learning_rate": 1.2330427755007794e-06, |
| "loss": 0.341, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.8773195578771176, |
| "grad_norm": 15.742782882783079, |
| "learning_rate": 1.227270103330832e-06, |
| "loss": 0.341, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.8778967417967736, |
| "grad_norm": 5.131013300289669, |
| "learning_rate": 1.2214974311608844e-06, |
| "loss": 0.3303, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.8784739257164296, |
| "grad_norm": 12.866336037106171, |
| "learning_rate": 1.215724758990937e-06, |
| "loss": 0.3442, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.8790511096360856, |
| "grad_norm": 5.127710193300859, |
| "learning_rate": 1.2099520868209895e-06, |
| "loss": 0.352, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.8796282935557416, |
| "grad_norm": 7.005612361267027, |
| "learning_rate": 1.204179414651042e-06, |
| "loss": 0.3376, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.8802054774753976, |
| "grad_norm": 19.994629507428854, |
| "learning_rate": 1.1984067424810945e-06, |
| "loss": 0.3366, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.8807826613950536, |
| "grad_norm": 19.289162587657003, |
| "learning_rate": 1.1926340703111471e-06, |
| "loss": 0.3339, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.8813598453147096, |
| "grad_norm": 5.780176197085242, |
| "learning_rate": 1.1868613981411997e-06, |
| "loss": 0.3328, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.8819370292343656, |
| "grad_norm": 5.322625710215796, |
| "learning_rate": 1.1810887259712523e-06, |
| "loss": 0.3364, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.8825142131540216, |
| "grad_norm": 4.160665784312188, |
| "learning_rate": 1.1753160538013047e-06, |
| "loss": 0.3298, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.8830913970736776, |
| "grad_norm": 39.76071118055878, |
| "learning_rate": 1.1695433816313573e-06, |
| "loss": 0.348, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.8836685809933336, |
| "grad_norm": 3.5659508030627958, |
| "learning_rate": 1.16377070946141e-06, |
| "loss": 0.3271, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.8842457649129896, |
| "grad_norm": 4.436695102429374, |
| "learning_rate": 1.1579980372914623e-06, |
| "loss": 0.3407, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.8848229488326456, |
| "grad_norm": 11.403167564876071, |
| "learning_rate": 1.1522253651215149e-06, |
| "loss": 0.3383, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.8854001327523016, |
| "grad_norm": 4.535641723893359, |
| "learning_rate": 1.1464526929515673e-06, |
| "loss": 0.3328, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.8859773166719576, |
| "grad_norm": 7.663265877666311, |
| "learning_rate": 1.1406800207816199e-06, |
| "loss": 0.3365, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.8865545005916136, |
| "grad_norm": 7.820505381715719, |
| "learning_rate": 1.1349073486116725e-06, |
| "loss": 0.3432, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.8871316845112696, |
| "grad_norm": 6.006599271526383, |
| "learning_rate": 1.1291346764417248e-06, |
| "loss": 0.3487, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.8877088684309256, |
| "grad_norm": 11.881435307105928, |
| "learning_rate": 1.1233620042717774e-06, |
| "loss": 0.3332, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.8882860523505816, |
| "grad_norm": 3.4819561819318103, |
| "learning_rate": 1.11758933210183e-06, |
| "loss": 0.335, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.8888632362702376, |
| "grad_norm": 6.893650513052578, |
| "learning_rate": 1.1118166599318826e-06, |
| "loss": 0.3493, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.8894404201898936, |
| "grad_norm": 6.925493159604081, |
| "learning_rate": 1.1060439877619352e-06, |
| "loss": 0.3448, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.8900176041095496, |
| "grad_norm": 5.5200268397262775, |
| "learning_rate": 1.1002713155919876e-06, |
| "loss": 0.3256, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.8905947880292056, |
| "grad_norm": 7.638890582405007, |
| "learning_rate": 1.0944986434220402e-06, |
| "loss": 0.3327, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.8911719719488616, |
| "grad_norm": 4.585636430659814, |
| "learning_rate": 1.0887259712520926e-06, |
| "loss": 0.3386, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.8917491558685176, |
| "grad_norm": 7.147252901738225, |
| "learning_rate": 1.0829532990821452e-06, |
| "loss": 0.3402, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.8923263397881736, |
| "grad_norm": 5.40597402568476, |
| "learning_rate": 1.0771806269121978e-06, |
| "loss": 0.3385, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.8929035237078295, |
| "grad_norm": 4.260919590930795, |
| "learning_rate": 1.0714079547422502e-06, |
| "loss": 0.3232, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.8934807076274855, |
| "grad_norm": 9.083532528696407, |
| "learning_rate": 1.0656352825723028e-06, |
| "loss": 0.354, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.8940578915471415, |
| "grad_norm": 4.996043459346209, |
| "learning_rate": 1.0598626104023554e-06, |
| "loss": 0.329, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.8946350754667975, |
| "grad_norm": 4.117500728065101, |
| "learning_rate": 1.0540899382324078e-06, |
| "loss": 0.3269, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.8952122593864535, |
| "grad_norm": 4.9248570764155435, |
| "learning_rate": 1.0483172660624604e-06, |
| "loss": 0.3321, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.8957894433061095, |
| "grad_norm": 4.696024737813827, |
| "learning_rate": 1.042544593892513e-06, |
| "loss": 0.3309, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.8963666272257655, |
| "grad_norm": 7.733578705149999, |
| "learning_rate": 1.0367719217225656e-06, |
| "loss": 0.3305, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.8969438111454215, |
| "grad_norm": 5.394346330597671, |
| "learning_rate": 1.0309992495526181e-06, |
| "loss": 0.3388, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.8975209950650774, |
| "grad_norm": 5.311667852016155, |
| "learning_rate": 1.0252265773826705e-06, |
| "loss": 0.3376, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.8980981789847334, |
| "grad_norm": 25.908831946398365, |
| "learning_rate": 1.0194539052127231e-06, |
| "loss": 0.3265, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.8986753629043894, |
| "grad_norm": 33.665974796607145, |
| "learning_rate": 1.0136812330427755e-06, |
| "loss": 0.3348, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.8992525468240454, |
| "grad_norm": 6.825259095726871, |
| "learning_rate": 1.0079085608728281e-06, |
| "loss": 0.3453, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.8998297307437014, |
| "grad_norm": 3.1735048623731092, |
| "learning_rate": 1.0021358887028807e-06, |
| "loss": 0.3218, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.9004069146633574, |
| "grad_norm": 9.114656814093948, |
| "learning_rate": 9.96363216532933e-07, |
| "loss": 0.3228, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.9009840985830134, |
| "grad_norm": 5.87155385500696, |
| "learning_rate": 9.905905443629857e-07, |
| "loss": 0.3309, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.9015612825026694, |
| "grad_norm": 14.717991716657272, |
| "learning_rate": 9.848178721930383e-07, |
| "loss": 0.3486, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.9021384664223254, |
| "grad_norm": 3.7767948462311067, |
| "learning_rate": 9.790452000230907e-07, |
| "loss": 0.3182, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.9027156503419814, |
| "grad_norm": 3.6787944993169006, |
| "learning_rate": 9.732725278531433e-07, |
| "loss": 0.3336, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.9032928342616374, |
| "grad_norm": 11.812395475228488, |
| "learning_rate": 9.674998556831959e-07, |
| "loss": 0.3286, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.9038700181812934, |
| "grad_norm": 5.186436534605756, |
| "learning_rate": 9.617271835132485e-07, |
| "loss": 0.3275, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.9044472021009494, |
| "grad_norm": 4.424154650673984, |
| "learning_rate": 9.559545113433009e-07, |
| "loss": 0.3185, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.9050243860206054, |
| "grad_norm": 6.3580022358578105, |
| "learning_rate": 9.501818391733534e-07, |
| "loss": 0.3226, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.9056015699402614, |
| "grad_norm": 6.310999676892304, |
| "learning_rate": 9.444091670034059e-07, |
| "loss": 0.3304, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.9061787538599174, |
| "grad_norm": 3.7913010315172633, |
| "learning_rate": 9.386364948334585e-07, |
| "loss": 0.3412, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.9067559377795734, |
| "grad_norm": 3.67407928861924, |
| "learning_rate": 9.32863822663511e-07, |
| "loss": 0.341, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.9073331216992294, |
| "grad_norm": 7.334570042771433, |
| "learning_rate": 9.270911504935635e-07, |
| "loss": 0.3265, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.9079103056188854, |
| "grad_norm": 6.456404235720165, |
| "learning_rate": 9.213184783236161e-07, |
| "loss": 0.3444, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.9084874895385414, |
| "grad_norm": 4.201612752804453, |
| "learning_rate": 9.155458061536686e-07, |
| "loss": 0.348, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.9090646734581974, |
| "grad_norm": 5.071365974662106, |
| "learning_rate": 9.097731339837211e-07, |
| "loss": 0.3153, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.9096418573778534, |
| "grad_norm": 5.035641882967374, |
| "learning_rate": 9.040004618137736e-07, |
| "loss": 0.3359, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.9102190412975094, |
| "grad_norm": 12.03037859002001, |
| "learning_rate": 8.982277896438262e-07, |
| "loss": 0.3242, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.9107962252171654, |
| "grad_norm": 7.140437657447056, |
| "learning_rate": 8.924551174738787e-07, |
| "loss": 0.3333, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.9113734091368214, |
| "grad_norm": 8.294728901327792, |
| "learning_rate": 8.866824453039313e-07, |
| "loss": 0.3267, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.9119505930564774, |
| "grad_norm": 5.258892430294796, |
| "learning_rate": 8.809097731339839e-07, |
| "loss": 0.3306, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.9125277769761334, |
| "grad_norm": 4.56809810586393, |
| "learning_rate": 8.751371009640364e-07, |
| "loss": 0.3354, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.9131049608957894, |
| "grad_norm": 8.754714473224658, |
| "learning_rate": 8.693644287940889e-07, |
| "loss": 0.3258, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.9136821448154454, |
| "grad_norm": 10.722318127648052, |
| "learning_rate": 8.635917566241415e-07, |
| "loss": 0.3251, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.9142593287351014, |
| "grad_norm": 17.100240147200765, |
| "learning_rate": 8.578190844541939e-07, |
| "loss": 0.3243, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.9148365126547574, |
| "grad_norm": 6.487613210408211, |
| "learning_rate": 8.520464122842464e-07, |
| "loss": 0.3299, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.9154136965744134, |
| "grad_norm": 3.047470063867609, |
| "learning_rate": 8.462737401142989e-07, |
| "loss": 0.3277, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.9159908804940694, |
| "grad_norm": 3.3099261534656823, |
| "learning_rate": 8.405010679443515e-07, |
| "loss": 0.3225, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.9165680644137254, |
| "grad_norm": 12.904829402744845, |
| "learning_rate": 8.34728395774404e-07, |
| "loss": 0.3456, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.9171452483333814, |
| "grad_norm": 3.874918781355711, |
| "learning_rate": 8.289557236044565e-07, |
| "loss": 0.324, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.9177224322530374, |
| "grad_norm": 8.856329412411933, |
| "learning_rate": 8.231830514345091e-07, |
| "loss": 0.3336, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.9182996161726934, |
| "grad_norm": 8.142861706815804, |
| "learning_rate": 8.174103792645616e-07, |
| "loss": 0.3346, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.9188768000923494, |
| "grad_norm": 6.512456297032329, |
| "learning_rate": 8.116377070946142e-07, |
| "loss": 0.3356, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.9194539840120054, |
| "grad_norm": 5.20827680094837, |
| "learning_rate": 8.058650349246668e-07, |
| "loss": 0.327, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.9200311679316614, |
| "grad_norm": 3.6566931406552166, |
| "learning_rate": 8.000923627547193e-07, |
| "loss": 0.3393, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.9206083518513174, |
| "grad_norm": 5.430549059652793, |
| "learning_rate": 7.943196905847718e-07, |
| "loss": 0.336, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.9211855357709734, |
| "grad_norm": 11.48641695737308, |
| "learning_rate": 7.885470184148243e-07, |
| "loss": 0.3239, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.9217627196906294, |
| "grad_norm": 5.74247518929047, |
| "learning_rate": 7.827743462448769e-07, |
| "loss": 0.3272, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.9223399036102854, |
| "grad_norm": 2.9972434937325954, |
| "learning_rate": 7.770016740749293e-07, |
| "loss": 0.3444, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.9229170875299414, |
| "grad_norm": 4.3485773330395405, |
| "learning_rate": 7.712290019049818e-07, |
| "loss": 0.3343, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.9234942714495974, |
| "grad_norm": 5.8420315281490725, |
| "learning_rate": 7.654563297350344e-07, |
| "loss": 0.3418, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.9240714553692534, |
| "grad_norm": 3.3727101894208924, |
| "learning_rate": 7.596836575650869e-07, |
| "loss": 0.3313, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.9246486392889094, |
| "grad_norm": 4.3271834892078305, |
| "learning_rate": 7.539109853951394e-07, |
| "loss": 0.3263, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.9252258232085654, |
| "grad_norm": 4.114539060448778, |
| "learning_rate": 7.481383132251919e-07, |
| "loss": 0.3494, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.9258030071282214, |
| "grad_norm": 3.6454496106451946, |
| "learning_rate": 7.423656410552445e-07, |
| "loss": 0.3287, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.9263801910478774, |
| "grad_norm": 3.738978996136776, |
| "learning_rate": 7.365929688852971e-07, |
| "loss": 0.3463, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.9269573749675334, |
| "grad_norm": 4.789282815458411, |
| "learning_rate": 7.308202967153497e-07, |
| "loss": 0.3237, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.9275345588871894, |
| "grad_norm": 9.761730219338645, |
| "learning_rate": 7.250476245454022e-07, |
| "loss": 0.3469, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.9281117428068454, |
| "grad_norm": 3.028208359321862, |
| "learning_rate": 7.192749523754547e-07, |
| "loss": 0.3355, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.9286889267265014, |
| "grad_norm": 18.45944225954803, |
| "learning_rate": 7.135022802055072e-07, |
| "loss": 0.3177, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.9292661106461574, |
| "grad_norm": 7.253195406338047, |
| "learning_rate": 7.077296080355598e-07, |
| "loss": 0.3439, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.9298432945658134, |
| "grad_norm": 4.739735623729803, |
| "learning_rate": 7.019569358656123e-07, |
| "loss": 0.3357, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.9304204784854694, |
| "grad_norm": 7.262658023729907, |
| "learning_rate": 6.961842636956648e-07, |
| "loss": 0.3307, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.9309976624051254, |
| "grad_norm": 4.3276336459159275, |
| "learning_rate": 6.904115915257172e-07, |
| "loss": 0.3397, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.9315748463247814, |
| "grad_norm": 4.773618213493451, |
| "learning_rate": 6.846389193557698e-07, |
| "loss": 0.3265, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.9321520302444374, |
| "grad_norm": 3.469713298213091, |
| "learning_rate": 6.788662471858223e-07, |
| "loss": 0.3356, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.9327292141640934, |
| "grad_norm": 2.680891067948385, |
| "learning_rate": 6.730935750158748e-07, |
| "loss": 0.3351, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.9333063980837494, |
| "grad_norm": 4.795647821005584, |
| "learning_rate": 6.673209028459274e-07, |
| "loss": 0.316, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.9338835820034054, |
| "grad_norm": 4.072329465945383, |
| "learning_rate": 6.615482306759799e-07, |
| "loss": 0.3323, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.9344607659230614, |
| "grad_norm": 6.139834777308556, |
| "learning_rate": 6.557755585060325e-07, |
| "loss": 0.3389, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.9350379498427174, |
| "grad_norm": 6.679020147433282, |
| "learning_rate": 6.500028863360851e-07, |
| "loss": 0.3213, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.9356151337623734, |
| "grad_norm": 3.931169778597499, |
| "learning_rate": 6.442302141661376e-07, |
| "loss": 0.3202, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.9361923176820294, |
| "grad_norm": 9.793848920416451, |
| "learning_rate": 6.384575419961901e-07, |
| "loss": 0.3312, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.9367695016016854, |
| "grad_norm": 3.165076810544466, |
| "learning_rate": 6.326848698262427e-07, |
| "loss": 0.3268, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.9373466855213414, |
| "grad_norm": 11.613653214904037, |
| "learning_rate": 6.269121976562952e-07, |
| "loss": 0.3395, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.9379238694409974, |
| "grad_norm": 8.287793663837986, |
| "learning_rate": 6.211395254863477e-07, |
| "loss": 0.337, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.9385010533606534, |
| "grad_norm": 4.919246593010951, |
| "learning_rate": 6.153668533164002e-07, |
| "loss": 0.3183, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.9390782372803094, |
| "grad_norm": 12.710584575769794, |
| "learning_rate": 6.095941811464528e-07, |
| "loss": 0.3371, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.9396554211999654, |
| "grad_norm": 13.642759112617313, |
| "learning_rate": 6.038215089765054e-07, |
| "loss": 0.3364, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.9402326051196214, |
| "grad_norm": 3.2163528904964567, |
| "learning_rate": 5.980488368065578e-07, |
| "loss": 0.3482, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.9408097890392774, |
| "grad_norm": 6.3234687340970845, |
| "learning_rate": 5.922761646366103e-07, |
| "loss": 0.3389, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.9413869729589334, |
| "grad_norm": 4.810430576924547, |
| "learning_rate": 5.865034924666628e-07, |
| "loss": 0.3273, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.9419641568785894, |
| "grad_norm": 16.667420292327467, |
| "learning_rate": 5.807308202967154e-07, |
| "loss": 0.3333, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.9425413407982454, |
| "grad_norm": 4.476842181947663, |
| "learning_rate": 5.749581481267679e-07, |
| "loss": 0.3319, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.9431185247179014, |
| "grad_norm": 2.753605309820116, |
| "learning_rate": 5.691854759568204e-07, |
| "loss": 0.324, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.9436957086375574, |
| "grad_norm": 7.258998836239923, |
| "learning_rate": 5.63412803786873e-07, |
| "loss": 0.3317, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.9442728925572134, |
| "grad_norm": 3.3810984203362513, |
| "learning_rate": 5.576401316169255e-07, |
| "loss": 0.3308, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.9448500764768694, |
| "grad_norm": 10.448881769543355, |
| "learning_rate": 5.518674594469781e-07, |
| "loss": 0.3389, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.9454272603965254, |
| "grad_norm": 7.485384669846898, |
| "learning_rate": 5.460947872770306e-07, |
| "loss": 0.3225, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.9460044443161814, |
| "grad_norm": 7.048976390316521, |
| "learning_rate": 5.403221151070831e-07, |
| "loss": 0.3316, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.9465816282358374, |
| "grad_norm": 5.343786757617583, |
| "learning_rate": 5.345494429371357e-07, |
| "loss": 0.324, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.9471588121554934, |
| "grad_norm": 8.606885533079547, |
| "learning_rate": 5.287767707671882e-07, |
| "loss": 0.339, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.9477359960751494, |
| "grad_norm": 5.1476348800105205, |
| "learning_rate": 5.230040985972408e-07, |
| "loss": 0.3316, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.9483131799948054, |
| "grad_norm": 3.9518622190419386, |
| "learning_rate": 5.172314264272933e-07, |
| "loss": 0.3257, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.9488903639144614, |
| "grad_norm": 4.759115152912856, |
| "learning_rate": 5.114587542573457e-07, |
| "loss": 0.3252, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.9494675478341174, |
| "grad_norm": 2.3577377378728936, |
| "learning_rate": 5.056860820873983e-07, |
| "loss": 0.3354, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.9500447317537734, |
| "grad_norm": 8.233491658698778, |
| "learning_rate": 4.999134099174508e-07, |
| "loss": 0.3269, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.9506219156734294, |
| "grad_norm": 5.830593517325124, |
| "learning_rate": 4.941407377475033e-07, |
| "loss": 0.3303, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.9511990995930854, |
| "grad_norm": 4.5976658225857205, |
| "learning_rate": 4.883680655775559e-07, |
| "loss": 0.3164, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.9517762835127414, |
| "grad_norm": 2.683143832655395, |
| "learning_rate": 4.825953934076084e-07, |
| "loss": 0.3389, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.9523534674323973, |
| "grad_norm": 6.256568384332184, |
| "learning_rate": 4.7682272123766096e-07, |
| "loss": 0.3379, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.9529306513520533, |
| "grad_norm": 5.947037852710701, |
| "learning_rate": 4.710500490677135e-07, |
| "loss": 0.3336, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.9535078352717093, |
| "grad_norm": 3.9206339656766183, |
| "learning_rate": 4.65277376897766e-07, |
| "loss": 0.3416, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.9540850191913653, |
| "grad_norm": 5.305934878449426, |
| "learning_rate": 4.5950470472781854e-07, |
| "loss": 0.343, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.9546622031110213, |
| "grad_norm": 4.905648955862364, |
| "learning_rate": 4.53732032557871e-07, |
| "loss": 0.3326, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.9552393870306773, |
| "grad_norm": 6.934144679851784, |
| "learning_rate": 4.479593603879236e-07, |
| "loss": 0.3315, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.9558165709503333, |
| "grad_norm": 6.121333752853476, |
| "learning_rate": 4.4218668821797617e-07, |
| "loss": 0.3337, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.9563937548699893, |
| "grad_norm": 4.161869077945622, |
| "learning_rate": 4.3641401604802866e-07, |
| "loss": 0.354, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.9569709387896453, |
| "grad_norm": 4.792938959925312, |
| "learning_rate": 4.306413438780812e-07, |
| "loss": 0.3385, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.9575481227093013, |
| "grad_norm": 13.85786954380734, |
| "learning_rate": 4.248686717081337e-07, |
| "loss": 0.3206, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.9581253066289573, |
| "grad_norm": 21.263443082950594, |
| "learning_rate": 4.1909599953818624e-07, |
| "loss": 0.3325, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.9587024905486133, |
| "grad_norm": 3.660403999109124, |
| "learning_rate": 4.1332332736823884e-07, |
| "loss": 0.3227, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.9592796744682693, |
| "grad_norm": 3.9235176913649994, |
| "learning_rate": 4.075506551982913e-07, |
| "loss": 0.3283, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.9598568583879253, |
| "grad_norm": 5.6449372673837965, |
| "learning_rate": 4.0177798302834387e-07, |
| "loss": 0.3427, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.9604340423075813, |
| "grad_norm": 5.248416354277083, |
| "learning_rate": 3.9600531085839636e-07, |
| "loss": 0.3288, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.9610112262272373, |
| "grad_norm": 8.246345220378487, |
| "learning_rate": 3.902326386884489e-07, |
| "loss": 0.3354, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.9615884101468933, |
| "grad_norm": 13.274950590494653, |
| "learning_rate": 3.8445996651850145e-07, |
| "loss": 0.3312, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.9621655940665493, |
| "grad_norm": 15.537361667999631, |
| "learning_rate": 3.7868729434855394e-07, |
| "loss": 0.3237, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.9627427779862053, |
| "grad_norm": 17.30099668558429, |
| "learning_rate": 3.7291462217860654e-07, |
| "loss": 0.3447, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.9633199619058613, |
| "grad_norm": 6.00682814280853, |
| "learning_rate": 3.671419500086591e-07, |
| "loss": 0.3285, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.9638971458255173, |
| "grad_norm": 4.337349097771177, |
| "learning_rate": 3.613692778387116e-07, |
| "loss": 0.3406, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.9644743297451733, |
| "grad_norm": 5.112346889090425, |
| "learning_rate": 3.555966056687641e-07, |
| "loss": 0.3245, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.9650515136648293, |
| "grad_norm": 3.333315383479396, |
| "learning_rate": 3.498239334988166e-07, |
| "loss": 0.323, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.9656286975844853, |
| "grad_norm": 6.363838641104665, |
| "learning_rate": 3.4405126132886915e-07, |
| "loss": 0.3335, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.9662058815041413, |
| "grad_norm": 4.4727853159969095, |
| "learning_rate": 3.3827858915892164e-07, |
| "loss": 0.3195, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.9667830654237973, |
| "grad_norm": 2.9245486184268525, |
| "learning_rate": 3.3250591698897424e-07, |
| "loss": 0.334, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.9673602493434533, |
| "grad_norm": 8.292114221205217, |
| "learning_rate": 3.267332448190268e-07, |
| "loss": 0.3417, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.9679374332631093, |
| "grad_norm": 4.467404105277962, |
| "learning_rate": 3.209605726490793e-07, |
| "loss": 0.3273, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.9685146171827653, |
| "grad_norm": 3.555136063724782, |
| "learning_rate": 3.151879004791318e-07, |
| "loss": 0.3319, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.9690918011024213, |
| "grad_norm": 3.5864439964386206, |
| "learning_rate": 3.0941522830918436e-07, |
| "loss": 0.3151, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.9696689850220773, |
| "grad_norm": 4.286014953806982, |
| "learning_rate": 3.036425561392369e-07, |
| "loss": 0.3348, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.9702461689417333, |
| "grad_norm": 4.937397445129751, |
| "learning_rate": 2.978698839692894e-07, |
| "loss": 0.3447, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.9708233528613893, |
| "grad_norm": 4.053983936820117, |
| "learning_rate": 2.9209721179934194e-07, |
| "loss": 0.3234, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.9714005367810453, |
| "grad_norm": 4.668613443745286, |
| "learning_rate": 2.863245396293945e-07, |
| "loss": 0.3318, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.9719777207007013, |
| "grad_norm": 4.726308017445137, |
| "learning_rate": 2.80551867459447e-07, |
| "loss": 0.3384, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.9725549046203573, |
| "grad_norm": 5.20234640635383, |
| "learning_rate": 2.747791952894995e-07, |
| "loss": 0.3414, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.9731320885400133, |
| "grad_norm": 5.471268202808402, |
| "learning_rate": 2.6900652311955207e-07, |
| "loss": 0.3158, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.9737092724596693, |
| "grad_norm": 4.041775287210815, |
| "learning_rate": 2.632338509496046e-07, |
| "loss": 0.3353, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.9742864563793253, |
| "grad_norm": 6.9340722075810515, |
| "learning_rate": 2.574611787796571e-07, |
| "loss": 0.3292, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.9748636402989813, |
| "grad_norm": 5.462231359128078, |
| "learning_rate": 2.5168850660970965e-07, |
| "loss": 0.3382, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.9754408242186373, |
| "grad_norm": 5.016835747194534, |
| "learning_rate": 2.459158344397622e-07, |
| "loss": 0.3264, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.9760180081382933, |
| "grad_norm": 6.59783102359862, |
| "learning_rate": 2.4014316226981474e-07, |
| "loss": 0.3303, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.9765951920579493, |
| "grad_norm": 11.129353025607179, |
| "learning_rate": 2.3437049009986723e-07, |
| "loss": 0.3196, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.9771723759776053, |
| "grad_norm": 5.828386789897742, |
| "learning_rate": 2.285978179299198e-07, |
| "loss": 0.3319, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.9777495598972613, |
| "grad_norm": 2.710691717608737, |
| "learning_rate": 2.2282514575997232e-07, |
| "loss": 0.3319, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.9783267438169173, |
| "grad_norm": 5.1520597373996715, |
| "learning_rate": 2.1705247359002483e-07, |
| "loss": 0.3314, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.9789039277365733, |
| "grad_norm": 3.185806720570308, |
| "learning_rate": 2.1127980142007738e-07, |
| "loss": 0.3296, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.9794811116562293, |
| "grad_norm": 6.515634970555692, |
| "learning_rate": 2.055071292501299e-07, |
| "loss": 0.3312, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.9800582955758853, |
| "grad_norm": 10.485461655446002, |
| "learning_rate": 1.997344570801824e-07, |
| "loss": 0.3422, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.9806354794955413, |
| "grad_norm": 3.8847690688300727, |
| "learning_rate": 1.9396178491023498e-07, |
| "loss": 0.3283, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.9812126634151973, |
| "grad_norm": 9.994920110996672, |
| "learning_rate": 1.881891127402875e-07, |
| "loss": 0.3327, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.9817898473348533, |
| "grad_norm": 5.025058096183087, |
| "learning_rate": 1.8241644057034002e-07, |
| "loss": 0.3207, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.9823670312545093, |
| "grad_norm": 11.136774459380124, |
| "learning_rate": 1.7664376840039256e-07, |
| "loss": 0.3233, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.9829442151741652, |
| "grad_norm": 2.906968928719714, |
| "learning_rate": 1.708710962304451e-07, |
| "loss": 0.335, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.9835213990938212, |
| "grad_norm": 47.22379036340986, |
| "learning_rate": 1.6509842406049762e-07, |
| "loss": 0.326, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.9840985830134772, |
| "grad_norm": 3.5068334227447537, |
| "learning_rate": 1.5932575189055014e-07, |
| "loss": 0.3308, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.9846757669331332, |
| "grad_norm": 6.058207244664307, |
| "learning_rate": 1.5355307972060266e-07, |
| "loss": 0.336, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.9852529508527892, |
| "grad_norm": 14.19694786309551, |
| "learning_rate": 1.477804075506552e-07, |
| "loss": 0.3482, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.9858301347724452, |
| "grad_norm": 4.351680537742745, |
| "learning_rate": 1.4200773538070775e-07, |
| "loss": 0.3533, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.9864073186921012, |
| "grad_norm": 11.029248449585278, |
| "learning_rate": 1.3623506321076027e-07, |
| "loss": 0.3196, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.9869845026117572, |
| "grad_norm": 5.081238129188481, |
| "learning_rate": 1.304623910408128e-07, |
| "loss": 0.333, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.9875616865314132, |
| "grad_norm": 23.011375672208313, |
| "learning_rate": 1.2468971887086533e-07, |
| "loss": 0.3353, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.9881388704510692, |
| "grad_norm": 3.615170936112003, |
| "learning_rate": 1.1891704670091786e-07, |
| "loss": 0.3293, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.9887160543707252, |
| "grad_norm": 6.0241909835288645, |
| "learning_rate": 1.131443745309704e-07, |
| "loss": 0.3305, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.9892932382903812, |
| "grad_norm": 6.4242440293309, |
| "learning_rate": 1.0737170236102292e-07, |
| "loss": 0.3229, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.9898704222100372, |
| "grad_norm": 4.8207248692315465, |
| "learning_rate": 1.0159903019107546e-07, |
| "loss": 0.3317, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.9904476061296932, |
| "grad_norm": 4.674342685671797, |
| "learning_rate": 9.5826358021128e-08, |
| "loss": 0.328, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.9910247900493492, |
| "grad_norm": 5.524320830604144, |
| "learning_rate": 9.005368585118051e-08, |
| "loss": 0.3297, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.9916019739690052, |
| "grad_norm": 6.1310872624369175, |
| "learning_rate": 8.428101368123306e-08, |
| "loss": 0.3342, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.9921791578886612, |
| "grad_norm": 4.736837397124582, |
| "learning_rate": 7.850834151128557e-08, |
| "loss": 0.3261, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.9927563418083172, |
| "grad_norm": 3.3135028507038498, |
| "learning_rate": 7.273566934133812e-08, |
| "loss": 0.3125, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.9933335257279732, |
| "grad_norm": 4.80914101916905, |
| "learning_rate": 6.696299717139064e-08, |
| "loss": 0.3233, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.9939107096476292, |
| "grad_norm": 4.178676432109751, |
| "learning_rate": 6.119032500144317e-08, |
| "loss": 0.316, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.9944878935672852, |
| "grad_norm": 4.648278510632473, |
| "learning_rate": 5.5417652831495705e-08, |
| "loss": 0.3247, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.9950650774869412, |
| "grad_norm": 6.4221153929916515, |
| "learning_rate": 4.9644980661548236e-08, |
| "loss": 0.3252, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.9956422614065972, |
| "grad_norm": 4.57910460292981, |
| "learning_rate": 4.3872308491600766e-08, |
| "loss": 0.346, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.9962194453262532, |
| "grad_norm": 6.013404774431674, |
| "learning_rate": 3.80996363216533e-08, |
| "loss": 0.3286, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.9967966292459092, |
| "grad_norm": 4.660240371593313, |
| "learning_rate": 3.232696415170583e-08, |
| "loss": 0.3238, |
| "step": 17270 |
| }, |
| { |
| "epoch": 0.9973738131655652, |
| "grad_norm": 3.0170050218995734, |
| "learning_rate": 2.6554291981758356e-08, |
| "loss": 0.3399, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.9979509970852212, |
| "grad_norm": 3.235155684245671, |
| "learning_rate": 2.078161981181089e-08, |
| "loss": 0.343, |
| "step": 17290 |
| }, |
| { |
| "epoch": 0.9985281810048772, |
| "grad_norm": 3.668276198786728, |
| "learning_rate": 1.500894764186342e-08, |
| "loss": 0.3299, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.9991053649245332, |
| "grad_norm": 5.359004024139474, |
| "learning_rate": 9.23627547191595e-09, |
| "loss": 0.3202, |
| "step": 17310 |
| }, |
| { |
| "epoch": 0.9996825488441892, |
| "grad_norm": 5.70686001442703, |
| "learning_rate": 3.4636033019684815e-09, |
| "loss": 0.3161, |
| "step": 17320 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 17325, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9935909437571072.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|