| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 2058, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0029154518950437317, |
| "grad_norm": 0.08236155658960342, |
| "learning_rate": 1.4563106796116505e-07, |
| "loss": 1.120621681213379, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0058309037900874635, |
| "grad_norm": 0.5269019603729248, |
| "learning_rate": 4.368932038834952e-07, |
| "loss": 1.9105433225631714, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008746355685131196, |
| "grad_norm": 0.3449331521987915, |
| "learning_rate": 7.281553398058253e-07, |
| "loss": 1.8805404901504517, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.011661807580174927, |
| "grad_norm": 0.10853756964206696, |
| "learning_rate": 1.0194174757281554e-06, |
| "loss": 1.5699371099472046, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.014577259475218658, |
| "grad_norm": 1.1428029537200928, |
| "learning_rate": 1.3106796116504856e-06, |
| "loss": 1.4362584352493286, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01749271137026239, |
| "grad_norm": 0.5868045091629028, |
| "learning_rate": 1.6019417475728156e-06, |
| "loss": 2.0035324096679688, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02040816326530612, |
| "grad_norm": 0.08258485049009323, |
| "learning_rate": 1.8932038834951458e-06, |
| "loss": 1.5183849334716797, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.023323615160349854, |
| "grad_norm": 2.6764633655548096, |
| "learning_rate": 2.1844660194174755e-06, |
| "loss": 1.8052839040756226, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.026239067055393587, |
| "grad_norm": 0.342227965593338, |
| "learning_rate": 2.475728155339806e-06, |
| "loss": 1.8929893970489502, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.029154518950437316, |
| "grad_norm": 0.1563744992017746, |
| "learning_rate": 2.766990291262136e-06, |
| "loss": 1.7904902696609497, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03206997084548105, |
| "grad_norm": 0.1933348923921585, |
| "learning_rate": 3.058252427184466e-06, |
| "loss": 1.4513907432556152, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03498542274052478, |
| "grad_norm": 0.5887855291366577, |
| "learning_rate": 3.3495145631067963e-06, |
| "loss": 2.2697947025299072, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.037900874635568516, |
| "grad_norm": 0.33995822072029114, |
| "learning_rate": 3.6407766990291263e-06, |
| "loss": 1.7317644357681274, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04081632653061224, |
| "grad_norm": 0.6888100504875183, |
| "learning_rate": 3.932038834951457e-06, |
| "loss": 1.8117475509643555, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.043731778425655975, |
| "grad_norm": 0.7777397036552429, |
| "learning_rate": 4.223300970873786e-06, |
| "loss": 1.8055756092071533, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04664723032069971, |
| "grad_norm": 0.4058018624782562, |
| "learning_rate": 4.514563106796117e-06, |
| "loss": 1.9432220458984375, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04956268221574344, |
| "grad_norm": 0.22737905383110046, |
| "learning_rate": 4.805825242718447e-06, |
| "loss": 1.6058305501937866, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.052478134110787174, |
| "grad_norm": 0.3183080852031708, |
| "learning_rate": 5.097087378640777e-06, |
| "loss": 1.8658274412155151, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05539358600583091, |
| "grad_norm": 0.17585590481758118, |
| "learning_rate": 5.388349514563107e-06, |
| "loss": 2.2423486709594727, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.05830903790087463, |
| "grad_norm": 0.10230281203985214, |
| "learning_rate": 5.679611650485437e-06, |
| "loss": 1.5302915573120117, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.061224489795918366, |
| "grad_norm": 0.4932399392127991, |
| "learning_rate": 5.970873786407767e-06, |
| "loss": 1.813106656074524, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0641399416909621, |
| "grad_norm": 0.15170824527740479, |
| "learning_rate": 6.262135922330097e-06, |
| "loss": 1.6509969234466553, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.06705539358600583, |
| "grad_norm": 0.1539481282234192, |
| "learning_rate": 6.553398058252427e-06, |
| "loss": 1.7683537006378174, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.06997084548104957, |
| "grad_norm": 0.6599376201629639, |
| "learning_rate": 6.844660194174757e-06, |
| "loss": 2.1816630363464355, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0728862973760933, |
| "grad_norm": 0.24105864763259888, |
| "learning_rate": 7.135922330097088e-06, |
| "loss": 1.910886526107788, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07580174927113703, |
| "grad_norm": 0.09656477719545364, |
| "learning_rate": 7.427184466019417e-06, |
| "loss": 1.199069857597351, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.07871720116618076, |
| "grad_norm": 0.3129803240299225, |
| "learning_rate": 7.718446601941748e-06, |
| "loss": 1.7870614528656006, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.08163265306122448, |
| "grad_norm": 0.253489226102829, |
| "learning_rate": 8.009708737864077e-06, |
| "loss": 2.0801727771759033, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.08454810495626822, |
| "grad_norm": 0.12343698740005493, |
| "learning_rate": 8.300970873786407e-06, |
| "loss": 1.4909915924072266, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.08746355685131195, |
| "grad_norm": 0.19224074482917786, |
| "learning_rate": 8.592233009708738e-06, |
| "loss": 2.0119330883026123, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09037900874635568, |
| "grad_norm": 0.2891639471054077, |
| "learning_rate": 8.883495145631068e-06, |
| "loss": 1.9431190490722656, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09329446064139942, |
| "grad_norm": 0.8908348083496094, |
| "learning_rate": 9.174757281553397e-06, |
| "loss": 1.8723704814910889, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.09620991253644315, |
| "grad_norm": 0.09907913953065872, |
| "learning_rate": 9.466019417475729e-06, |
| "loss": 1.556423306465149, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.09912536443148688, |
| "grad_norm": 0.18893972039222717, |
| "learning_rate": 9.75728155339806e-06, |
| "loss": 1.8031634092330933, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.10204081632653061, |
| "grad_norm": 0.3021998107433319, |
| "learning_rate": 1.004854368932039e-05, |
| "loss": 1.6836217641830444, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10495626822157435, |
| "grad_norm": 0.19465358555316925, |
| "learning_rate": 1.0339805825242719e-05, |
| "loss": 1.3162983655929565, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.10787172011661808, |
| "grad_norm": 0.35194098949432373, |
| "learning_rate": 1.0631067961165048e-05, |
| "loss": 1.6223976612091064, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.11078717201166181, |
| "grad_norm": 0.11608141660690308, |
| "learning_rate": 1.092233009708738e-05, |
| "loss": 1.5001176595687866, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.11370262390670553, |
| "grad_norm": 0.17615102231502533, |
| "learning_rate": 1.121359223300971e-05, |
| "loss": 1.6835155487060547, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11661807580174927, |
| "grad_norm": 0.10972107201814651, |
| "learning_rate": 1.1504854368932039e-05, |
| "loss": 1.0958292484283447, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.119533527696793, |
| "grad_norm": 0.2486797422170639, |
| "learning_rate": 1.1796116504854368e-05, |
| "loss": 1.5743815898895264, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.12244897959183673, |
| "grad_norm": 0.6029097437858582, |
| "learning_rate": 1.20873786407767e-05, |
| "loss": 1.4908254146575928, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.12536443148688048, |
| "grad_norm": 0.47159314155578613, |
| "learning_rate": 1.237864077669903e-05, |
| "loss": 1.3921440839767456, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1282798833819242, |
| "grad_norm": 0.23478780686855316, |
| "learning_rate": 1.2669902912621359e-05, |
| "loss": 1.60302734375, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.13119533527696792, |
| "grad_norm": 0.06909849494695663, |
| "learning_rate": 1.2961165048543688e-05, |
| "loss": 1.3646469116210938, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.13411078717201166, |
| "grad_norm": 0.12045982480049133, |
| "learning_rate": 1.3252427184466021e-05, |
| "loss": 1.3031418323516846, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.13702623906705538, |
| "grad_norm": 0.2616878151893616, |
| "learning_rate": 1.3543689320388351e-05, |
| "loss": 1.4213391542434692, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.13994169096209913, |
| "grad_norm": 0.19713328778743744, |
| "learning_rate": 1.383495145631068e-05, |
| "loss": 1.8326067924499512, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.42456164956092834, |
| "learning_rate": 1.412621359223301e-05, |
| "loss": 2.064007043838501, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1457725947521866, |
| "grad_norm": 0.1171143651008606, |
| "learning_rate": 1.4417475728155341e-05, |
| "loss": 1.3881018161773682, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14868804664723032, |
| "grad_norm": 0.5466513633728027, |
| "learning_rate": 1.470873786407767e-05, |
| "loss": 1.7975414991378784, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.15160349854227406, |
| "grad_norm": 0.2429724484682083, |
| "learning_rate": 1.5e-05, |
| "loss": 1.581913709640503, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.15451895043731778, |
| "grad_norm": 0.16082407534122467, |
| "learning_rate": 1.4999965139018001e-05, |
| "loss": 1.6313072443008423, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.15743440233236153, |
| "grad_norm": 0.20626085996627808, |
| "learning_rate": 1.4999860556432087e-05, |
| "loss": 1.4128293991088867, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.16034985422740525, |
| "grad_norm": 0.08978555351495743, |
| "learning_rate": 1.4999686253322514e-05, |
| "loss": 1.6325119733810425, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.16326530612244897, |
| "grad_norm": 0.17410112917423248, |
| "learning_rate": 1.4999442231489687e-05, |
| "loss": 1.6410691738128662, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1661807580174927, |
| "grad_norm": 0.11147186905145645, |
| "learning_rate": 1.4999128493454151e-05, |
| "loss": 1.3302874565124512, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.16909620991253643, |
| "grad_norm": 0.44229331612586975, |
| "learning_rate": 1.4998745042456563e-05, |
| "loss": 1.6997064352035522, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.17201166180758018, |
| "grad_norm": 0.1818253993988037, |
| "learning_rate": 1.499829188245766e-05, |
| "loss": 1.3123167753219604, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1749271137026239, |
| "grad_norm": 0.15915799140930176, |
| "learning_rate": 1.4997769018138212e-05, |
| "loss": 1.6660683155059814, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.17784256559766765, |
| "grad_norm": 0.2367630898952484, |
| "learning_rate": 1.4997176454898977e-05, |
| "loss": 1.4073443412780762, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.18075801749271136, |
| "grad_norm": 0.653868556022644, |
| "learning_rate": 1.4996514198860649e-05, |
| "loss": 1.351149082183838, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1836734693877551, |
| "grad_norm": 0.08681757003068924, |
| "learning_rate": 1.4995782256863785e-05, |
| "loss": 1.3422613143920898, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.18658892128279883, |
| "grad_norm": 0.06514488905668259, |
| "learning_rate": 1.4994980636468756e-05, |
| "loss": 1.3343521356582642, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.18950437317784258, |
| "grad_norm": 0.9027652740478516, |
| "learning_rate": 1.4994109345955632e-05, |
| "loss": 1.4679464101791382, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1924198250728863, |
| "grad_norm": 0.35018599033355713, |
| "learning_rate": 1.4993168394324137e-05, |
| "loss": 1.1963084936141968, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.19533527696793002, |
| "grad_norm": 0.13998304307460785, |
| "learning_rate": 1.4992157791293523e-05, |
| "loss": 1.333540678024292, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.19825072886297376, |
| "grad_norm": 0.11608795821666718, |
| "learning_rate": 1.4991077547302497e-05, |
| "loss": 1.5141417980194092, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.20116618075801748, |
| "grad_norm": 0.08046405762434006, |
| "learning_rate": 1.4989927673509089e-05, |
| "loss": 1.3266879320144653, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.20408163265306123, |
| "grad_norm": 0.1371355652809143, |
| "learning_rate": 1.4988708181790555e-05, |
| "loss": 1.2892866134643555, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.20699708454810495, |
| "grad_norm": 0.1368686705827713, |
| "learning_rate": 1.4987419084743244e-05, |
| "loss": 1.0467798709869385, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2099125364431487, |
| "grad_norm": 0.23302382230758667, |
| "learning_rate": 1.4986060395682469e-05, |
| "loss": 1.1930760145187378, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.21282798833819241, |
| "grad_norm": 1.9061791896820068, |
| "learning_rate": 1.4984632128642375e-05, |
| "loss": 1.4475537538528442, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.21574344023323616, |
| "grad_norm": 0.18942643702030182, |
| "learning_rate": 1.4983134298375787e-05, |
| "loss": 1.376928448677063, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.21865889212827988, |
| "grad_norm": 0.21135789155960083, |
| "learning_rate": 1.498156692035407e-05, |
| "loss": 1.5480635166168213, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.22157434402332363, |
| "grad_norm": 0.13644421100616455, |
| "learning_rate": 1.4979930010766947e-05, |
| "loss": 1.7161264419555664, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.22448979591836735, |
| "grad_norm": 0.12430273741483688, |
| "learning_rate": 1.4978223586522351e-05, |
| "loss": 1.242932677268982, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.22740524781341107, |
| "grad_norm": 0.7622217535972595, |
| "learning_rate": 1.4976447665246251e-05, |
| "loss": 0.5300056338310242, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2303206997084548, |
| "grad_norm": 0.13458958268165588, |
| "learning_rate": 1.4974602265282451e-05, |
| "loss": 1.571650743484497, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.23323615160349853, |
| "grad_norm": 0.2972854673862457, |
| "learning_rate": 1.4972687405692425e-05, |
| "loss": 1.2033076286315918, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.23615160349854228, |
| "grad_norm": 0.29232847690582275, |
| "learning_rate": 1.4970703106255095e-05, |
| "loss": 1.4756550788879395, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.239067055393586, |
| "grad_norm": 0.07210766524076462, |
| "learning_rate": 1.4968649387466655e-05, |
| "loss": 1.3033177852630615, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.24198250728862974, |
| "grad_norm": 0.5424373745918274, |
| "learning_rate": 1.4966526270540327e-05, |
| "loss": 1.0460329055786133, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.24489795918367346, |
| "grad_norm": 0.28463321924209595, |
| "learning_rate": 1.4964333777406174e-05, |
| "loss": 1.250373363494873, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2478134110787172, |
| "grad_norm": 0.3408065140247345, |
| "learning_rate": 1.496207193071085e-05, |
| "loss": 0.8593610525131226, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.25072886297376096, |
| "grad_norm": 0.14829058945178986, |
| "learning_rate": 1.4959740753817374e-05, |
| "loss": 1.304344892501831, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2536443148688047, |
| "grad_norm": 0.8436731696128845, |
| "learning_rate": 1.4957340270804896e-05, |
| "loss": 1.2743805646896362, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2565597667638484, |
| "grad_norm": 0.11323361843824387, |
| "learning_rate": 1.4954870506468434e-05, |
| "loss": 1.329984188079834, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2594752186588921, |
| "grad_norm": 0.09321129322052002, |
| "learning_rate": 1.4952331486318626e-05, |
| "loss": 1.2258719205856323, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.26239067055393583, |
| "grad_norm": 0.37252843379974365, |
| "learning_rate": 1.4949723236581472e-05, |
| "loss": 1.0671582221984863, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2653061224489796, |
| "grad_norm": 0.3797838091850281, |
| "learning_rate": 1.4947045784198052e-05, |
| "loss": 1.2696138620376587, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.26822157434402333, |
| "grad_norm": 0.16805821657180786, |
| "learning_rate": 1.4944299156824251e-05, |
| "loss": 1.4738816022872925, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.27113702623906705, |
| "grad_norm": 0.2671731114387512, |
| "learning_rate": 1.4941483382830475e-05, |
| "loss": 1.3171305656433105, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.27405247813411077, |
| "grad_norm": 0.07962363958358765, |
| "learning_rate": 1.4938598491301369e-05, |
| "loss": 1.2901722192764282, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.27696793002915454, |
| "grad_norm": 0.280506432056427, |
| "learning_rate": 1.4935644512035486e-05, |
| "loss": 1.3184595108032227, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.27988338192419826, |
| "grad_norm": 0.13458193838596344, |
| "learning_rate": 1.4932621475545014e-05, |
| "loss": 1.1937448978424072, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.282798833819242, |
| "grad_norm": 0.7079519033432007, |
| "learning_rate": 1.4929529413055442e-05, |
| "loss": 1.1439327001571655, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.18462230265140533, |
| "learning_rate": 1.4926368356505236e-05, |
| "loss": 1.5497668981552124, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2886297376093295, |
| "grad_norm": 0.16043758392333984, |
| "learning_rate": 1.492313833854552e-05, |
| "loss": 1.4568783044815063, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.2915451895043732, |
| "grad_norm": 0.42396068572998047, |
| "learning_rate": 1.491983939253973e-05, |
| "loss": 1.6005096435546875, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2944606413994169, |
| "grad_norm": 0.21155761182308197, |
| "learning_rate": 1.4916471552563272e-05, |
| "loss": 1.3397752046585083, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.29737609329446063, |
| "grad_norm": 0.17219677567481995, |
| "learning_rate": 1.4913034853403173e-05, |
| "loss": 1.3317774534225464, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.30029154518950435, |
| "grad_norm": 0.12617312371730804, |
| "learning_rate": 1.4909529330557714e-05, |
| "loss": 1.2119510173797607, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3032069970845481, |
| "grad_norm": 0.14850527048110962, |
| "learning_rate": 1.4905955020236072e-05, |
| "loss": 1.385998010635376, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.30612244897959184, |
| "grad_norm": 0.1191219687461853, |
| "learning_rate": 1.490231195935794e-05, |
| "loss": 1.5534725189208984, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.30903790087463556, |
| "grad_norm": 0.06989572942256927, |
| "learning_rate": 1.4898600185553152e-05, |
| "loss": 1.4775235652923584, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3119533527696793, |
| "grad_norm": 0.08547376841306686, |
| "learning_rate": 1.4894819737161285e-05, |
| "loss": 1.033743977546692, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.31486880466472306, |
| "grad_norm": 0.11992272734642029, |
| "learning_rate": 1.489097065323127e-05, |
| "loss": 1.0980379581451416, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3177842565597668, |
| "grad_norm": 0.30880632996559143, |
| "learning_rate": 1.488705297352099e-05, |
| "loss": 1.317891001701355, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3206997084548105, |
| "grad_norm": 0.6510909795761108, |
| "learning_rate": 1.4883066738496858e-05, |
| "loss": 0.9413776993751526, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3236151603498542, |
| "grad_norm": 0.43388184905052185, |
| "learning_rate": 1.4879011989333418e-05, |
| "loss": 1.381697177886963, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.32653061224489793, |
| "grad_norm": 0.21984761953353882, |
| "learning_rate": 1.4874888767912902e-05, |
| "loss": 1.2626378536224365, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.3294460641399417, |
| "grad_norm": 0.2687482237815857, |
| "learning_rate": 1.48706971168248e-05, |
| "loss": 1.2034857273101807, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3323615160349854, |
| "grad_norm": 0.08195902407169342, |
| "learning_rate": 1.4866437079365439e-05, |
| "loss": 1.2773680686950684, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.33527696793002915, |
| "grad_norm": 0.1009335145354271, |
| "learning_rate": 1.4862108699537504e-05, |
| "loss": 1.0853190422058105, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.33819241982507287, |
| "grad_norm": 0.3376968204975128, |
| "learning_rate": 1.4857712022049617e-05, |
| "loss": 1.5481150150299072, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.34110787172011664, |
| "grad_norm": 0.7441994547843933, |
| "learning_rate": 1.4853247092315843e-05, |
| "loss": 0.9510725140571594, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.34402332361516036, |
| "grad_norm": 0.04717664048075676, |
| "learning_rate": 1.484871395645525e-05, |
| "loss": 1.4734127521514893, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3469387755102041, |
| "grad_norm": 0.7886844873428345, |
| "learning_rate": 1.4844112661291409e-05, |
| "loss": 1.3192212581634521, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3498542274052478, |
| "grad_norm": 0.4841660261154175, |
| "learning_rate": 1.4839443254351925e-05, |
| "loss": 1.691177487373352, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.35276967930029157, |
| "grad_norm": 0.06492076069116592, |
| "learning_rate": 1.4834705783867948e-05, |
| "loss": 1.329490065574646, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3556851311953353, |
| "grad_norm": 0.13113148510456085, |
| "learning_rate": 1.4829900298773655e-05, |
| "loss": 1.4308984279632568, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.358600583090379, |
| "grad_norm": 0.2137414813041687, |
| "learning_rate": 1.4825026848705774e-05, |
| "loss": 1.5191004276275635, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.36151603498542273, |
| "grad_norm": 0.1302558183670044, |
| "learning_rate": 1.482008548400304e-05, |
| "loss": 1.1112821102142334, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.36443148688046645, |
| "grad_norm": 0.24971581995487213, |
| "learning_rate": 1.4815076255705704e-05, |
| "loss": 1.2628142833709717, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3673469387755102, |
| "grad_norm": 0.06637357920408249, |
| "learning_rate": 1.4809999215554978e-05, |
| "loss": 1.0483888387680054, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.37026239067055394, |
| "grad_norm": 0.16134153306484222, |
| "learning_rate": 1.4804854415992531e-05, |
| "loss": 0.8284896612167358, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.37317784256559766, |
| "grad_norm": 0.22190812230110168, |
| "learning_rate": 1.479964191015992e-05, |
| "loss": 1.228007197380066, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3760932944606414, |
| "grad_norm": 0.3965594172477722, |
| "learning_rate": 1.4794361751898052e-05, |
| "loss": 1.461411952972412, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.37900874635568516, |
| "grad_norm": 0.08565931022167206, |
| "learning_rate": 1.4789013995746636e-05, |
| "loss": 1.33036208152771, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3819241982507289, |
| "grad_norm": 0.11709296703338623, |
| "learning_rate": 1.4783598696943603e-05, |
| "loss": 1.1803240776062012, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3848396501457726, |
| "grad_norm": 0.15489286184310913, |
| "learning_rate": 1.4778115911424552e-05, |
| "loss": 1.234659194946289, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.3877551020408163, |
| "grad_norm": 0.19184595346450806, |
| "learning_rate": 1.4772565695822158e-05, |
| "loss": 1.2707804441452026, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.39067055393586003, |
| "grad_norm": 0.1356089860200882, |
| "learning_rate": 1.4766948107465598e-05, |
| "loss": 1.192071557044983, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.3935860058309038, |
| "grad_norm": 0.11989542841911316, |
| "learning_rate": 1.476126320437995e-05, |
| "loss": 1.391566276550293, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3965014577259475, |
| "grad_norm": 0.47645920515060425, |
| "learning_rate": 1.4755511045285605e-05, |
| "loss": 1.1564279794692993, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.39941690962099125, |
| "grad_norm": 0.4125911593437195, |
| "learning_rate": 1.4749691689597646e-05, |
| "loss": 1.536888599395752, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.40233236151603496, |
| "grad_norm": 0.08971330523490906, |
| "learning_rate": 1.4743805197425243e-05, |
| "loss": 1.2086325883865356, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.40524781341107874, |
| "grad_norm": 0.08347416669130325, |
| "learning_rate": 1.4737851629571035e-05, |
| "loss": 1.190657615661621, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.40816326530612246, |
| "grad_norm": 0.20587654411792755, |
| "learning_rate": 1.4731831047530493e-05, |
| "loss": 1.3656525611877441, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4110787172011662, |
| "grad_norm": 0.22432878613471985, |
| "learning_rate": 1.4725743513491294e-05, |
| "loss": 1.1042253971099854, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4139941690962099, |
| "grad_norm": 0.26549288630485535, |
| "learning_rate": 1.471958909033267e-05, |
| "loss": 1.3797943592071533, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.41690962099125367, |
| "grad_norm": 0.15680500864982605, |
| "learning_rate": 1.4713367841624764e-05, |
| "loss": 1.3377087116241455, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4198250728862974, |
| "grad_norm": 0.4737466275691986, |
| "learning_rate": 1.4707079831627975e-05, |
| "loss": 1.3034449815750122, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4227405247813411, |
| "grad_norm": 0.1271553486585617, |
| "learning_rate": 1.4700725125292288e-05, |
| "loss": 1.1474194526672363, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.42565597667638483, |
| "grad_norm": 0.06102332845330238, |
| "learning_rate": 1.469430378825661e-05, |
| "loss": 1.1918046474456787, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.15844929218292236, |
| "learning_rate": 1.4687815886848083e-05, |
| "loss": 1.206626296043396, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4314868804664723, |
| "grad_norm": 0.24055972695350647, |
| "learning_rate": 1.4681261488081409e-05, |
| "loss": 1.5187625885009766, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.43440233236151604, |
| "grad_norm": 0.7840580344200134, |
| "learning_rate": 1.4674640659658149e-05, |
| "loss": 1.0932797193527222, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.43731778425655976, |
| "grad_norm": 0.10844213515520096, |
| "learning_rate": 1.4667953469966035e-05, |
| "loss": 1.1951229572296143, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4402332361516035, |
| "grad_norm": 0.11183289438486099, |
| "learning_rate": 1.466119998807825e-05, |
| "loss": 1.1717019081115723, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.44314868804664725, |
| "grad_norm": 0.30403003096580505, |
| "learning_rate": 1.4654380283752722e-05, |
| "loss": 1.4022222757339478, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.446064139941691, |
| "grad_norm": 0.13156169652938843, |
| "learning_rate": 1.4647494427431404e-05, |
| "loss": 1.4486730098724365, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4489795918367347, |
| "grad_norm": 0.1186894103884697, |
| "learning_rate": 1.4640542490239546e-05, |
| "loss": 1.2088007926940918, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4518950437317784, |
| "grad_norm": 0.3326444625854492, |
| "learning_rate": 1.4633524543984956e-05, |
| "loss": 1.3544650077819824, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.45481049562682213, |
| "grad_norm": 0.1379825323820114, |
| "learning_rate": 1.4626440661157263e-05, |
| "loss": 1.330404281616211, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.4577259475218659, |
| "grad_norm": 0.1476340889930725, |
| "learning_rate": 1.4619290914927168e-05, |
| "loss": 1.3507134914398193, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4606413994169096, |
| "grad_norm": 0.1802261918783188, |
| "learning_rate": 1.4612075379145683e-05, |
| "loss": 1.2097649574279785, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.46355685131195334, |
| "grad_norm": 0.12077829986810684, |
| "learning_rate": 1.460479412834338e-05, |
| "loss": 1.3490198850631714, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.46647230320699706, |
| "grad_norm": 0.22901231050491333, |
| "learning_rate": 1.4597447237729602e-05, |
| "loss": 1.3041571378707886, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.46938775510204084, |
| "grad_norm": 0.1394783854484558, |
| "learning_rate": 1.4590034783191705e-05, |
| "loss": 1.3151127099990845, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.47230320699708456, |
| "grad_norm": 0.15815502405166626, |
| "learning_rate": 1.4582556841294272e-05, |
| "loss": 1.4624110460281372, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4752186588921283, |
| "grad_norm": 0.2137562483549118, |
| "learning_rate": 1.45750134892783e-05, |
| "loss": 1.4430997371673584, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.478134110787172, |
| "grad_norm": 0.3299601376056671, |
| "learning_rate": 1.4567404805060432e-05, |
| "loss": 1.3537228107452393, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.48104956268221577, |
| "grad_norm": 0.21562345325946808, |
| "learning_rate": 1.4559730867232141e-05, |
| "loss": 1.169204592704773, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4839650145772595, |
| "grad_norm": 0.1736089438199997, |
| "learning_rate": 1.4551991755058902e-05, |
| "loss": 1.1071885824203491, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.4868804664723032, |
| "grad_norm": 0.1834300458431244, |
| "learning_rate": 1.45441875484794e-05, |
| "loss": 1.5676034688949585, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.4897959183673469, |
| "grad_norm": 0.0843748077750206, |
| "learning_rate": 1.4536318328104693e-05, |
| "loss": 1.2121503353118896, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.49271137026239065, |
| "grad_norm": 0.36758843064308167, |
| "learning_rate": 1.452838417521737e-05, |
| "loss": 1.1275235414505005, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.4956268221574344, |
| "grad_norm": 0.18445612490177155, |
| "learning_rate": 1.452038517177072e-05, |
| "loss": 1.3472223281860352, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.49854227405247814, |
| "grad_norm": 0.05781463533639908, |
| "learning_rate": 1.4512321400387896e-05, |
| "loss": 1.0872787237167358, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5014577259475219, |
| "grad_norm": 0.19518744945526123, |
| "learning_rate": 1.4504192944361035e-05, |
| "loss": 1.1387406587600708, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5043731778425656, |
| "grad_norm": 0.12471595406532288, |
| "learning_rate": 1.4495999887650425e-05, |
| "loss": 1.2551310062408447, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5072886297376094, |
| "grad_norm": 0.21368560194969177, |
| "learning_rate": 1.4487742314883622e-05, |
| "loss": 1.4745806455612183, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5102040816326531, |
| "grad_norm": 0.20728199183940887, |
| "learning_rate": 1.447942031135458e-05, |
| "loss": 1.3776572942733765, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5131195335276968, |
| "grad_norm": 0.3676038384437561, |
| "learning_rate": 1.447103396302277e-05, |
| "loss": 1.393446922302246, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5160349854227405, |
| "grad_norm": 0.4812930226325989, |
| "learning_rate": 1.4462583356512293e-05, |
| "loss": 1.6455305814743042, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5189504373177842, |
| "grad_norm": 0.14569929242134094, |
| "learning_rate": 1.4454068579110982e-05, |
| "loss": 1.1214039325714111, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.521865889212828, |
| "grad_norm": 0.08566080778837204, |
| "learning_rate": 1.4445489718769505e-05, |
| "loss": 1.0862312316894531, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5247813411078717, |
| "grad_norm": 0.1737866848707199, |
| "learning_rate": 1.4436846864100454e-05, |
| "loss": 1.4677766561508179, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5276967930029155, |
| "grad_norm": 0.24478068947792053, |
| "learning_rate": 1.4428140104377428e-05, |
| "loss": 1.4088914394378662, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5306122448979592, |
| "grad_norm": 0.07167135179042816, |
| "learning_rate": 1.4419369529534117e-05, |
| "loss": 1.0589109659194946, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5335276967930029, |
| "grad_norm": 0.4344414472579956, |
| "learning_rate": 1.4410535230163361e-05, |
| "loss": 1.0916839838027954, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5364431486880467, |
| "grad_norm": 0.1588602066040039, |
| "learning_rate": 1.440163729751623e-05, |
| "loss": 1.2339898347854614, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5393586005830904, |
| "grad_norm": 0.08355646580457687, |
| "learning_rate": 1.4392675823501075e-05, |
| "loss": 1.0559823513031006, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5422740524781341, |
| "grad_norm": 0.09950409084558487, |
| "learning_rate": 1.4383650900682563e-05, |
| "loss": 1.1664844751358032, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5451895043731778, |
| "grad_norm": 0.21663829684257507, |
| "learning_rate": 1.4374562622280753e-05, |
| "loss": 1.2800816297531128, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5481049562682215, |
| "grad_norm": 0.45721420645713806, |
| "learning_rate": 1.4365411082170105e-05, |
| "loss": 1.0968526601791382, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5510204081632653, |
| "grad_norm": 0.34029263257980347, |
| "learning_rate": 1.435619637487852e-05, |
| "loss": 1.4795793294906616, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.5539358600583091, |
| "grad_norm": 0.07205039262771606, |
| "learning_rate": 1.4346918595586371e-05, |
| "loss": 0.8370588421821594, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5568513119533528, |
| "grad_norm": 0.12168021500110626, |
| "learning_rate": 1.4337577840125506e-05, |
| "loss": 1.2106021642684937, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5597667638483965, |
| "grad_norm": 0.32209160923957825, |
| "learning_rate": 1.4328174204978268e-05, |
| "loss": 1.321066975593567, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5626822157434402, |
| "grad_norm": 0.2250237762928009, |
| "learning_rate": 1.4318707787276499e-05, |
| "loss": 1.292655348777771, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.565597667638484, |
| "grad_norm": 0.2742823362350464, |
| "learning_rate": 1.4309178684800527e-05, |
| "loss": 1.2520337104797363, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5685131195335277, |
| "grad_norm": 0.27688226103782654, |
| "learning_rate": 1.4299586995978166e-05, |
| "loss": 1.38676917552948, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.2949990928173065, |
| "learning_rate": 1.4289932819883696e-05, |
| "loss": 0.8451089262962341, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.5743440233236151, |
| "grad_norm": 0.1089571937918663, |
| "learning_rate": 1.4280216256236834e-05, |
| "loss": 1.2847154140472412, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.577259475218659, |
| "grad_norm": 0.19184090197086334, |
| "learning_rate": 1.427043740540172e-05, |
| "loss": 1.387587547302246, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.5801749271137027, |
| "grad_norm": 0.54814612865448, |
| "learning_rate": 1.4260596368385856e-05, |
| "loss": 1.3909755945205688, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.5830903790087464, |
| "grad_norm": 0.12275420129299164, |
| "learning_rate": 1.4250693246839092e-05, |
| "loss": 1.2625775337219238, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5860058309037901, |
| "grad_norm": 0.7932881712913513, |
| "learning_rate": 1.4240728143052544e-05, |
| "loss": 1.2152988910675049, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.5889212827988338, |
| "grad_norm": 0.37155717611312866, |
| "learning_rate": 1.4230701159957563e-05, |
| "loss": 1.3423740863800049, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5918367346938775, |
| "grad_norm": 0.18500366806983948, |
| "learning_rate": 1.4220612401124663e-05, |
| "loss": 1.3449385166168213, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5947521865889213, |
| "grad_norm": 0.11731770634651184, |
| "learning_rate": 1.4210461970762447e-05, |
| "loss": 1.1119245290756226, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.597667638483965, |
| "grad_norm": 0.10353056341409683, |
| "learning_rate": 1.4200249973716534e-05, |
| "loss": 1.263884425163269, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6005830903790087, |
| "grad_norm": 0.14419683814048767, |
| "learning_rate": 1.418997651546848e-05, |
| "loss": 1.307144284248352, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6034985422740525, |
| "grad_norm": 0.10403470695018768, |
| "learning_rate": 1.4179641702134683e-05, |
| "loss": 1.1156686544418335, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.6064139941690962, |
| "grad_norm": 0.14356708526611328, |
| "learning_rate": 1.4169245640465292e-05, |
| "loss": 1.1539418697357178, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.60932944606414, |
| "grad_norm": 0.20612405240535736, |
| "learning_rate": 1.415878843784309e-05, |
| "loss": 1.2595444917678833, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 0.11746654659509659, |
| "learning_rate": 1.414827020228241e-05, |
| "loss": 1.2829625606536865, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6151603498542274, |
| "grad_norm": 0.16831901669502258, |
| "learning_rate": 1.4137691042427996e-05, |
| "loss": 1.3437942266464233, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6180758017492711, |
| "grad_norm": 0.35040462017059326, |
| "learning_rate": 1.4127051067553895e-05, |
| "loss": 1.4076067209243774, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.6209912536443148, |
| "grad_norm": 0.061461448669433594, |
| "learning_rate": 1.4116350387562316e-05, |
| "loss": 1.0884675979614258, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.6239067055393586, |
| "grad_norm": 0.15810243785381317, |
| "learning_rate": 1.4105589112982514e-05, |
| "loss": 1.2547569274902344, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6268221574344023, |
| "grad_norm": 0.8622474074363708, |
| "learning_rate": 1.4094767354969625e-05, |
| "loss": 1.3274284601211548, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6297376093294461, |
| "grad_norm": 0.13593973219394684, |
| "learning_rate": 1.4083885225303535e-05, |
| "loss": 1.2320295572280884, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6326530612244898, |
| "grad_norm": 0.07243333756923676, |
| "learning_rate": 1.407294283638772e-05, |
| "loss": 1.4667418003082275, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6355685131195336, |
| "grad_norm": 0.07801775634288788, |
| "learning_rate": 1.406194030124808e-05, |
| "loss": 1.3038822412490845, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6384839650145773, |
| "grad_norm": 0.304385781288147, |
| "learning_rate": 1.4050877733531783e-05, |
| "loss": 1.3447275161743164, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.641399416909621, |
| "grad_norm": 0.10865950584411621, |
| "learning_rate": 1.4039755247506077e-05, |
| "loss": 0.6549509167671204, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6443148688046647, |
| "grad_norm": 0.28575700521469116, |
| "learning_rate": 1.4028572958057122e-05, |
| "loss": 1.1795369386672974, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6472303206997084, |
| "grad_norm": 0.5246424078941345, |
| "learning_rate": 1.4017330980688798e-05, |
| "loss": 1.1711264848709106, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6501457725947521, |
| "grad_norm": 0.18553860485553741, |
| "learning_rate": 1.400602943152151e-05, |
| "loss": 1.2232381105422974, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.6530612244897959, |
| "grad_norm": 0.12490701675415039, |
| "learning_rate": 1.3994668427290992e-05, |
| "loss": 1.3382079601287842, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.6559766763848397, |
| "grad_norm": 0.22397291660308838, |
| "learning_rate": 1.3983248085347099e-05, |
| "loss": 1.3612568378448486, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6588921282798834, |
| "grad_norm": 0.35306331515312195, |
| "learning_rate": 1.3971768523652598e-05, |
| "loss": 1.2464739084243774, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6618075801749271, |
| "grad_norm": 0.2772669494152069, |
| "learning_rate": 1.3960229860781952e-05, |
| "loss": 1.2844020128250122, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.6647230320699709, |
| "grad_norm": 0.10081592947244644, |
| "learning_rate": 1.3948632215920074e-05, |
| "loss": 1.2844829559326172, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.6676384839650146, |
| "grad_norm": 0.4896067678928375, |
| "learning_rate": 1.3936975708861129e-05, |
| "loss": 1.2661151885986328, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.6705539358600583, |
| "grad_norm": 0.09726856648921967, |
| "learning_rate": 1.3925260460007276e-05, |
| "loss": 1.3103440999984741, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.673469387755102, |
| "grad_norm": 0.15830014646053314, |
| "learning_rate": 1.3913486590367426e-05, |
| "loss": 1.2458621263504028, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.6763848396501457, |
| "grad_norm": 0.3230348229408264, |
| "learning_rate": 1.3901654221555998e-05, |
| "loss": 1.534423589706421, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.6793002915451894, |
| "grad_norm": 0.1509629487991333, |
| "learning_rate": 1.3889763475791653e-05, |
| "loss": 1.2820494174957275, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.6822157434402333, |
| "grad_norm": 0.34530624747276306, |
| "learning_rate": 1.3877814475896049e-05, |
| "loss": 1.2601618766784668, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.685131195335277, |
| "grad_norm": 0.10481005907058716, |
| "learning_rate": 1.3865807345292548e-05, |
| "loss": 1.1044316291809082, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6880466472303207, |
| "grad_norm": 0.07815049588680267, |
| "learning_rate": 1.3853742208004967e-05, |
| "loss": 0.741702139377594, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.6909620991253644, |
| "grad_norm": 0.22590938210487366, |
| "learning_rate": 1.3841619188656277e-05, |
| "loss": 1.2955025434494019, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.6938775510204082, |
| "grad_norm": 0.08640377968549728, |
| "learning_rate": 1.3829438412467324e-05, |
| "loss": 1.1016216278076172, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.6967930029154519, |
| "grad_norm": 0.09496122598648071, |
| "learning_rate": 1.3817200005255538e-05, |
| "loss": 1.1232506036758423, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.6997084548104956, |
| "grad_norm": 0.07495642453432083, |
| "learning_rate": 1.380490409343363e-05, |
| "loss": 1.2044416666030884, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7026239067055393, |
| "grad_norm": 0.339239239692688, |
| "learning_rate": 1.3792550804008275e-05, |
| "loss": 1.2485543489456177, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7055393586005831, |
| "grad_norm": 0.17572255432605743, |
| "learning_rate": 1.3780140264578833e-05, |
| "loss": 1.2681964635849, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.7084548104956269, |
| "grad_norm": 0.16934579610824585, |
| "learning_rate": 1.3767672603335994e-05, |
| "loss": 1.4810711145401, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7113702623906706, |
| "grad_norm": 0.04486797749996185, |
| "learning_rate": 1.375514794906047e-05, |
| "loss": 1.046045184135437, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.700762927532196, |
| "learning_rate": 1.374256643112167e-05, |
| "loss": 1.0363354682922363, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.717201166180758, |
| "grad_norm": 0.2569397985935211, |
| "learning_rate": 1.3729928179476355e-05, |
| "loss": 1.3074244260787964, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7201166180758017, |
| "grad_norm": 0.20563913881778717, |
| "learning_rate": 1.3717233324667303e-05, |
| "loss": 1.1921494007110596, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.7230320699708455, |
| "grad_norm": 0.201784148812294, |
| "learning_rate": 1.3704481997821944e-05, |
| "loss": 1.3657381534576416, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7259475218658892, |
| "grad_norm": 0.20627616345882416, |
| "learning_rate": 1.3691674330651038e-05, |
| "loss": 1.062203288078308, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.7288629737609329, |
| "grad_norm": 0.04925013706088066, |
| "learning_rate": 1.3678810455447272e-05, |
| "loss": 1.0565184354782104, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7317784256559767, |
| "grad_norm": 0.2994559407234192, |
| "learning_rate": 1.3665890505083932e-05, |
| "loss": 0.7342221140861511, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7346938775510204, |
| "grad_norm": 0.2312147170305252, |
| "learning_rate": 1.365291461301351e-05, |
| "loss": 1.1462215185165405, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7376093294460642, |
| "grad_norm": 0.1264645904302597, |
| "learning_rate": 1.3639882913266321e-05, |
| "loss": 1.2779966592788696, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7405247813411079, |
| "grad_norm": 0.09908440709114075, |
| "learning_rate": 1.3626795540449146e-05, |
| "loss": 1.0050630569458008, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7434402332361516, |
| "grad_norm": 0.0948040708899498, |
| "learning_rate": 1.3613652629743807e-05, |
| "loss": 0.9955649375915527, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7463556851311953, |
| "grad_norm": 0.442697137594223, |
| "learning_rate": 1.3600454316905794e-05, |
| "loss": 1.2189491987228394, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.749271137026239, |
| "grad_norm": 0.08219840377569199, |
| "learning_rate": 1.3587200738262852e-05, |
| "loss": 1.2169828414916992, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7521865889212828, |
| "grad_norm": 0.39055153727531433, |
| "learning_rate": 1.3573892030713581e-05, |
| "loss": 1.1840598583221436, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7551020408163265, |
| "grad_norm": 0.16979742050170898, |
| "learning_rate": 1.3560528331726012e-05, |
| "loss": 1.2608612775802612, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7580174927113703, |
| "grad_norm": 0.18750780820846558, |
| "learning_rate": 1.3547109779336198e-05, |
| "loss": 1.0730546712875366, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.760932944606414, |
| "grad_norm": 0.16917291283607483, |
| "learning_rate": 1.3533636512146778e-05, |
| "loss": 0.8358052968978882, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.7638483965014577, |
| "grad_norm": 0.21615351736545563, |
| "learning_rate": 1.3520108669325555e-05, |
| "loss": 1.2778382301330566, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.7667638483965015, |
| "grad_norm": 0.2199150174856186, |
| "learning_rate": 1.350652639060405e-05, |
| "loss": 1.3584939241409302, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.7696793002915452, |
| "grad_norm": 0.12701602280139923, |
| "learning_rate": 1.3492889816276057e-05, |
| "loss": 1.2652432918548584, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.7725947521865889, |
| "grad_norm": 0.2043219953775406, |
| "learning_rate": 1.3479199087196211e-05, |
| "loss": 0.9363166093826294, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7755102040816326, |
| "grad_norm": 0.25679811835289, |
| "learning_rate": 1.3465454344778514e-05, |
| "loss": 1.30280601978302, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.7784256559766763, |
| "grad_norm": 0.1782459169626236, |
| "learning_rate": 1.3451655730994879e-05, |
| "loss": 0.8852262496948242, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.7813411078717201, |
| "grad_norm": 0.15585428476333618, |
| "learning_rate": 1.3437803388373673e-05, |
| "loss": 1.2652050256729126, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.7842565597667639, |
| "grad_norm": 0.28724268078804016, |
| "learning_rate": 1.3423897459998234e-05, |
| "loss": 1.5547116994857788, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.7871720116618076, |
| "grad_norm": 0.2500779628753662, |
| "learning_rate": 1.3409938089505396e-05, |
| "loss": 1.2525265216827393, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7900874635568513, |
| "grad_norm": 0.45470234751701355, |
| "learning_rate": 1.3395925421084008e-05, |
| "loss": 1.2771704196929932, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.793002915451895, |
| "grad_norm": 0.29030269384384155, |
| "learning_rate": 1.3381859599473444e-05, |
| "loss": 1.17940354347229, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.7959183673469388, |
| "grad_norm": 0.49152040481567383, |
| "learning_rate": 1.3367740769962097e-05, |
| "loss": 1.2586897611618042, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.7988338192419825, |
| "grad_norm": 0.6251534819602966, |
| "learning_rate": 1.335356907838591e-05, |
| "loss": 1.15794837474823, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8017492711370262, |
| "grad_norm": 1.237188696861267, |
| "learning_rate": 1.3339344671126823e-05, |
| "loss": 1.2396069765090942, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8046647230320699, |
| "grad_norm": 0.18844130635261536, |
| "learning_rate": 1.3325067695111302e-05, |
| "loss": 1.3848127126693726, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8075801749271136, |
| "grad_norm": 0.0720212385058403, |
| "learning_rate": 1.3310738297808797e-05, |
| "loss": 1.2827481031417847, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.8104956268221575, |
| "grad_norm": 0.30795788764953613, |
| "learning_rate": 1.3296356627230233e-05, |
| "loss": 1.2539678812026978, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8134110787172012, |
| "grad_norm": 0.12987054884433746, |
| "learning_rate": 1.328192283192647e-05, |
| "loss": 1.1838477849960327, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8163265306122449, |
| "grad_norm": 0.11866369843482971, |
| "learning_rate": 1.3267437060986776e-05, |
| "loss": 1.2138683795928955, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8192419825072886, |
| "grad_norm": 1.3589751720428467, |
| "learning_rate": 1.3252899464037285e-05, |
| "loss": 1.241382122039795, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.8221574344023324, |
| "grad_norm": 0.11315155029296875, |
| "learning_rate": 1.3238310191239449e-05, |
| "loss": 1.2092612981796265, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.8250728862973761, |
| "grad_norm": 0.16663309931755066, |
| "learning_rate": 1.3223669393288492e-05, |
| "loss": 1.3294919729232788, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.8279883381924198, |
| "grad_norm": 0.18580849468708038, |
| "learning_rate": 1.320897722141185e-05, |
| "loss": 1.165387749671936, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.8309037900874635, |
| "grad_norm": 0.14969834685325623, |
| "learning_rate": 1.3194233827367605e-05, |
| "loss": 1.1585993766784668, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8338192419825073, |
| "grad_norm": 0.18476836383342743, |
| "learning_rate": 1.317943936344293e-05, |
| "loss": 1.2080127000808716, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.8367346938775511, |
| "grad_norm": 0.19693532586097717, |
| "learning_rate": 1.3164593982452502e-05, |
| "loss": 1.4070855379104614, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8396501457725948, |
| "grad_norm": 0.3612503111362457, |
| "learning_rate": 1.3149697837736932e-05, |
| "loss": 1.375995397567749, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8425655976676385, |
| "grad_norm": 0.2689799964427948, |
| "learning_rate": 1.3134751083161177e-05, |
| "loss": 1.5882023572921753, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.8454810495626822, |
| "grad_norm": 0.45044106245040894, |
| "learning_rate": 1.3119753873112952e-05, |
| "loss": 1.530938744544983, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8483965014577259, |
| "grad_norm": 0.15131127834320068, |
| "learning_rate": 1.3104706362501138e-05, |
| "loss": 1.1275839805603027, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8513119533527697, |
| "grad_norm": 0.12577542662620544, |
| "learning_rate": 1.3089608706754179e-05, |
| "loss": 1.4129434823989868, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.8542274052478134, |
| "grad_norm": 0.2110750824213028, |
| "learning_rate": 1.3074461061818475e-05, |
| "loss": 1.1559196710586548, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.21649499237537384, |
| "learning_rate": 1.3059263584156778e-05, |
| "loss": 1.3160138130187988, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.8600583090379009, |
| "grad_norm": 0.24884088337421417, |
| "learning_rate": 1.3044016430746563e-05, |
| "loss": 1.362827181816101, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8629737609329446, |
| "grad_norm": 0.13489077985286713, |
| "learning_rate": 1.3028719759078428e-05, |
| "loss": 0.9931049942970276, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.8658892128279884, |
| "grad_norm": 0.09495119750499725, |
| "learning_rate": 1.3013373727154437e-05, |
| "loss": 1.088317632675171, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.8688046647230321, |
| "grad_norm": 0.08689741790294647, |
| "learning_rate": 1.2997978493486516e-05, |
| "loss": 1.135114312171936, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.8717201166180758, |
| "grad_norm": 0.11740924417972565, |
| "learning_rate": 1.2982534217094805e-05, |
| "loss": 1.1683244705200195, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.8746355685131195, |
| "grad_norm": 0.19883382320404053, |
| "learning_rate": 1.2967041057506012e-05, |
| "loss": 1.200365662574768, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8775510204081632, |
| "grad_norm": 0.1676117181777954, |
| "learning_rate": 1.2951499174751767e-05, |
| "loss": 1.17380952835083, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.880466472303207, |
| "grad_norm": 0.10896378010511398, |
| "learning_rate": 1.2935908729366975e-05, |
| "loss": 1.1691476106643677, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.8833819241982507, |
| "grad_norm": 0.48385846614837646, |
| "learning_rate": 1.2920269882388147e-05, |
| "loss": 1.2547780275344849, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.8862973760932945, |
| "grad_norm": 0.5236583352088928, |
| "learning_rate": 1.290458279535175e-05, |
| "loss": 0.9720197916030884, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.8892128279883382, |
| "grad_norm": 0.14302794635295868, |
| "learning_rate": 1.2888847630292523e-05, |
| "loss": 0.7114431858062744, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.892128279883382, |
| "grad_norm": 0.24016736447811127, |
| "learning_rate": 1.287306454974182e-05, |
| "loss": 1.1511893272399902, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.8950437317784257, |
| "grad_norm": 0.23368032276630402, |
| "learning_rate": 1.2857233716725915e-05, |
| "loss": 1.270735740661621, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.8979591836734694, |
| "grad_norm": 0.31318148970603943, |
| "learning_rate": 1.2841355294764332e-05, |
| "loss": 0.9339938163757324, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.9008746355685131, |
| "grad_norm": 0.14631935954093933, |
| "learning_rate": 1.2825429447868144e-05, |
| "loss": 1.0888878107070923, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9037900874635568, |
| "grad_norm": 0.05644264817237854, |
| "learning_rate": 1.2809456340538295e-05, |
| "loss": 0.6944148540496826, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.9067055393586005, |
| "grad_norm": 0.5780438780784607, |
| "learning_rate": 1.2793436137763877e-05, |
| "loss": 1.4030423164367676, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9096209912536443, |
| "grad_norm": 0.25053542852401733, |
| "learning_rate": 1.2777369005020443e-05, |
| "loss": 1.366930603981018, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.9125364431486881, |
| "grad_norm": 0.668838381767273, |
| "learning_rate": 1.2761255108268305e-05, |
| "loss": 1.4005160331726074, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.9154518950437318, |
| "grad_norm": 0.39348724484443665, |
| "learning_rate": 1.2745094613950798e-05, |
| "loss": 1.3920326232910156, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.9183673469387755, |
| "grad_norm": 0.21188022196292877, |
| "learning_rate": 1.2728887688992571e-05, |
| "loss": 1.2693376541137695, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9212827988338192, |
| "grad_norm": 0.13943858444690704, |
| "learning_rate": 1.2712634500797868e-05, |
| "loss": 1.3852614164352417, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.924198250728863, |
| "grad_norm": 0.09973420947790146, |
| "learning_rate": 1.2696335217248797e-05, |
| "loss": 1.0728514194488525, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.9271137026239067, |
| "grad_norm": 0.0977744311094284, |
| "learning_rate": 1.2679990006703583e-05, |
| "loss": 1.1080187559127808, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.9300291545189504, |
| "grad_norm": 0.09669560194015503, |
| "learning_rate": 1.2663599037994848e-05, |
| "loss": 1.101372480392456, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.9329446064139941, |
| "grad_norm": 0.2537369430065155, |
| "learning_rate": 1.264716248042786e-05, |
| "loss": 1.2607650756835938, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.9358600583090378, |
| "grad_norm": 0.10567066818475723, |
| "learning_rate": 1.263068050377877e-05, |
| "loss": 1.176032304763794, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.9387755102040817, |
| "grad_norm": 0.23190894722938538, |
| "learning_rate": 1.2614153278292888e-05, |
| "loss": 1.569797158241272, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.9416909620991254, |
| "grad_norm": 0.11260157078504562, |
| "learning_rate": 1.259758097468289e-05, |
| "loss": 1.124619960784912, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.9446064139941691, |
| "grad_norm": 0.10838615894317627, |
| "learning_rate": 1.2580963764127086e-05, |
| "loss": 1.0758150815963745, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.9475218658892128, |
| "grad_norm": 0.862457275390625, |
| "learning_rate": 1.2564301818267634e-05, |
| "loss": 0.809301495552063, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9504373177842566, |
| "grad_norm": 0.13666097819805145, |
| "learning_rate": 1.2547595309208762e-05, |
| "loss": 1.1373188495635986, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.9533527696793003, |
| "grad_norm": 0.14616422355175018, |
| "learning_rate": 1.2530844409515015e-05, |
| "loss": 1.0827115774154663, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.956268221574344, |
| "grad_norm": 0.10559694468975067, |
| "learning_rate": 1.2514049292209443e-05, |
| "loss": 0.9751679301261902, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.9591836734693877, |
| "grad_norm": 0.08088317513465881, |
| "learning_rate": 1.2497210130771838e-05, |
| "loss": 1.495046854019165, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.9620991253644315, |
| "grad_norm": 0.6228170990943909, |
| "learning_rate": 1.2480327099136921e-05, |
| "loss": 1.2217864990234375, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9650145772594753, |
| "grad_norm": 0.29220765829086304, |
| "learning_rate": 1.2463400371692567e-05, |
| "loss": 1.3038297891616821, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.967930029154519, |
| "grad_norm": 0.1476386934518814, |
| "learning_rate": 1.2446430123277989e-05, |
| "loss": 1.0814988613128662, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.9708454810495627, |
| "grad_norm": 0.5601685643196106, |
| "learning_rate": 1.2429416529181928e-05, |
| "loss": 1.3198177814483643, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.9737609329446064, |
| "grad_norm": 0.11794130504131317, |
| "learning_rate": 1.2412359765140863e-05, |
| "loss": 1.2900370359420776, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.9766763848396501, |
| "grad_norm": 0.1333070546388626, |
| "learning_rate": 1.2395260007337178e-05, |
| "loss": 1.0969475507736206, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.9795918367346939, |
| "grad_norm": 0.2164296805858612, |
| "learning_rate": 1.2378117432397344e-05, |
| "loss": 1.3217947483062744, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.9825072886297376, |
| "grad_norm": 0.1207147017121315, |
| "learning_rate": 1.2360932217390101e-05, |
| "loss": 1.1721763610839844, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.9854227405247813, |
| "grad_norm": 0.19854536652565002, |
| "learning_rate": 1.2343704539824629e-05, |
| "loss": 0.8384242057800293, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.9883381924198251, |
| "grad_norm": 0.11634889990091324, |
| "learning_rate": 1.2326434577648703e-05, |
| "loss": 0.5937544107437134, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.9912536443148688, |
| "grad_norm": 0.21319809556007385, |
| "learning_rate": 1.2309122509246873e-05, |
| "loss": 1.211629033088684, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9941690962099126, |
| "grad_norm": 0.0654364675283432, |
| "learning_rate": 1.2291768513438603e-05, |
| "loss": 1.155535340309143, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.9970845481049563, |
| "grad_norm": 0.25669339299201965, |
| "learning_rate": 1.2274372769476438e-05, |
| "loss": 1.164899230003357, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.12079296261072159, |
| "learning_rate": 1.2256935457044149e-05, |
| "loss": 1.3323872089385986, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.0029154518950438, |
| "grad_norm": 0.15898126363754272, |
| "learning_rate": 1.223945675625487e-05, |
| "loss": 0.9407209753990173, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.0058309037900874, |
| "grad_norm": 0.27969345450401306, |
| "learning_rate": 1.2221936847649244e-05, |
| "loss": 1.1378577947616577, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.0087463556851313, |
| "grad_norm": 0.25754043459892273, |
| "learning_rate": 1.220437591219356e-05, |
| "loss": 1.4397190809249878, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.0116618075801749, |
| "grad_norm": 0.10848913341760635, |
| "learning_rate": 1.2186774131277878e-05, |
| "loss": 1.1280958652496338, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.0145772594752187, |
| "grad_norm": 0.1306256800889969, |
| "learning_rate": 1.2169131686714156e-05, |
| "loss": 1.099426031112671, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.0174927113702623, |
| "grad_norm": 0.4202571511268616, |
| "learning_rate": 1.2151448760734381e-05, |
| "loss": 1.1389104127883911, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.0204081632653061, |
| "grad_norm": 0.24799339473247528, |
| "learning_rate": 1.2133725535988675e-05, |
| "loss": 1.1550320386886597, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.0233236151603498, |
| "grad_norm": 0.3226027190685272, |
| "learning_rate": 1.211596219554341e-05, |
| "loss": 1.3826884031295776, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.0262390670553936, |
| "grad_norm": 0.16781915724277496, |
| "learning_rate": 1.209815892287933e-05, |
| "loss": 1.2842170000076294, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.0291545189504374, |
| "grad_norm": 0.08502925932407379, |
| "learning_rate": 1.2080315901889638e-05, |
| "loss": 1.3487895727157593, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.032069970845481, |
| "grad_norm": 0.16372652351856232, |
| "learning_rate": 1.2062433316878107e-05, |
| "loss": 1.0846039056777954, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.0349854227405249, |
| "grad_norm": 0.2926742434501648, |
| "learning_rate": 1.204451135255717e-05, |
| "loss": 1.3418132066726685, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.0379008746355685, |
| "grad_norm": 0.13081398606300354, |
| "learning_rate": 1.2026550194046027e-05, |
| "loss": 1.2699744701385498, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.0408163265306123, |
| "grad_norm": 0.3602919578552246, |
| "learning_rate": 1.2008550026868707e-05, |
| "loss": 1.1103326082229614, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.043731778425656, |
| "grad_norm": 0.24668650329113007, |
| "learning_rate": 1.1990511036952182e-05, |
| "loss": 1.1811496019363403, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.0466472303206997, |
| "grad_norm": 0.2009333372116089, |
| "learning_rate": 1.1972433410624415e-05, |
| "loss": 1.3141359090805054, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.0495626822157433, |
| "grad_norm": 0.4131545126438141, |
| "learning_rate": 1.1954317334612466e-05, |
| "loss": 1.1311266422271729, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.0524781341107872, |
| "grad_norm": 0.26808369159698486, |
| "learning_rate": 1.193616299604054e-05, |
| "loss": 1.2641208171844482, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.055393586005831, |
| "grad_norm": 0.18929173052310944, |
| "learning_rate": 1.1917970582428065e-05, |
| "loss": 1.022256851196289, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.0583090379008746, |
| "grad_norm": 0.07950548082590103, |
| "learning_rate": 1.1899740281687752e-05, |
| "loss": 1.1594070196151733, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.0612244897959184, |
| "grad_norm": 0.3975690007209778, |
| "learning_rate": 1.1881472282123659e-05, |
| "loss": 1.09200918674469, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.064139941690962, |
| "grad_norm": 0.1322367936372757, |
| "learning_rate": 1.1863166772429237e-05, |
| "loss": 1.144595980644226, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.0670553935860059, |
| "grad_norm": 0.13084831833839417, |
| "learning_rate": 1.1844823941685388e-05, |
| "loss": 1.233044981956482, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.0699708454810495, |
| "grad_norm": 0.17538310587406158, |
| "learning_rate": 1.1826443979358511e-05, |
| "loss": 0.648325502872467, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.0728862973760933, |
| "grad_norm": 0.1613551825284958, |
| "learning_rate": 1.1808027075298542e-05, |
| "loss": 1.339321255683899, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.075801749271137, |
| "grad_norm": 0.062147416174411774, |
| "learning_rate": 1.1789573419736995e-05, |
| "loss": 1.0158833265304565, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.0787172011661808, |
| "grad_norm": 0.2725241184234619, |
| "learning_rate": 1.1771083203284994e-05, |
| "loss": 1.049664855003357, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.0816326530612246, |
| "grad_norm": 0.14118708670139313, |
| "learning_rate": 1.1752556616931319e-05, |
| "loss": 1.4558746814727783, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.0845481049562682, |
| "grad_norm": 0.12485146522521973, |
| "learning_rate": 1.17339938520404e-05, |
| "loss": 1.067897081375122, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.087463556851312, |
| "grad_norm": 0.14729249477386475, |
| "learning_rate": 1.1715395100350386e-05, |
| "loss": 1.2803950309753418, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.0903790087463556, |
| "grad_norm": 0.2967908978462219, |
| "learning_rate": 1.1696760553971122e-05, |
| "loss": 1.4100807905197144, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.0932944606413995, |
| "grad_norm": 0.18390890955924988, |
| "learning_rate": 1.1678090405382191e-05, |
| "loss": 1.0381572246551514, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.096209912536443, |
| "grad_norm": 0.08851258456707001, |
| "learning_rate": 1.1659384847430916e-05, |
| "loss": 1.2206934690475464, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.099125364431487, |
| "grad_norm": 0.1275774985551834, |
| "learning_rate": 1.1640644073330365e-05, |
| "loss": 1.258091688156128, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.1020408163265305, |
| "grad_norm": 0.3569571077823639, |
| "learning_rate": 1.1621868276657371e-05, |
| "loss": 1.2325845956802368, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.1049562682215743, |
| "grad_norm": 0.2721734642982483, |
| "learning_rate": 1.1603057651350508e-05, |
| "loss": 1.0642601251602173, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.1078717201166182, |
| "grad_norm": 0.2617255449295044, |
| "learning_rate": 1.158421239170811e-05, |
| "loss": 1.3023701906204224, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.1107871720116618, |
| "grad_norm": 0.1031145453453064, |
| "learning_rate": 1.156533269238626e-05, |
| "loss": 0.8144070506095886, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.1137026239067056, |
| "grad_norm": 0.1646541804075241, |
| "learning_rate": 1.1546418748396758e-05, |
| "loss": 1.0213180780410767, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.1166180758017492, |
| "grad_norm": 0.3250854015350342, |
| "learning_rate": 1.1527470755105138e-05, |
| "loss": 0.9498108625411987, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.119533527696793, |
| "grad_norm": 0.10029526799917221, |
| "learning_rate": 1.1508488908228629e-05, |
| "loss": 1.1771409511566162, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.1224489795918366, |
| "grad_norm": 0.09416939318180084, |
| "learning_rate": 1.1489473403834142e-05, |
| "loss": 0.5949094891548157, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.1253644314868805, |
| "grad_norm": 0.20775017142295837, |
| "learning_rate": 1.1470424438336244e-05, |
| "loss": 0.8676192760467529, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.128279883381924, |
| "grad_norm": 0.24049599468708038, |
| "learning_rate": 1.145134220849512e-05, |
| "loss": 1.1979655027389526, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.131195335276968, |
| "grad_norm": 0.320576548576355, |
| "learning_rate": 1.1432226911414561e-05, |
| "loss": 1.150422215461731, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.1341107871720117, |
| "grad_norm": 0.08741223067045212, |
| "learning_rate": 1.1413078744539906e-05, |
| "loss": 1.1655181646347046, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.1370262390670554, |
| "grad_norm": 0.13662189245224, |
| "learning_rate": 1.139389790565601e-05, |
| "loss": 1.1560207605361938, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.1399416909620992, |
| "grad_norm": 0.1589939296245575, |
| "learning_rate": 1.1374684592885214e-05, |
| "loss": 1.3467984199523926, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.29279693961143494, |
| "learning_rate": 1.1355439004685278e-05, |
| "loss": 1.0917768478393555, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.1457725947521866, |
| "grad_norm": 0.5396981835365295, |
| "learning_rate": 1.1336161339847343e-05, |
| "loss": 1.131831169128418, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.1486880466472302, |
| "grad_norm": 1.319527506828308, |
| "learning_rate": 1.1316851797493877e-05, |
| "loss": 1.287348747253418, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.151603498542274, |
| "grad_norm": 0.24090451002120972, |
| "learning_rate": 1.1297510577076617e-05, |
| "loss": 1.196481466293335, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.1545189504373177, |
| "grad_norm": 0.15632812678813934, |
| "learning_rate": 1.1278137878374507e-05, |
| "loss": 1.2842094898223877, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.1574344023323615, |
| "grad_norm": 0.1558282971382141, |
| "learning_rate": 1.1258733901491634e-05, |
| "loss": 1.160306453704834, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.1603498542274053, |
| "grad_norm": 0.0693809762597084, |
| "learning_rate": 1.1239298846855166e-05, |
| "loss": 1.3671103715896606, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.163265306122449, |
| "grad_norm": 0.11606906354427338, |
| "learning_rate": 1.121983291521328e-05, |
| "loss": 1.2540158033370972, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.1661807580174928, |
| "grad_norm": 0.5656346082687378, |
| "learning_rate": 1.1200336307633083e-05, |
| "loss": 1.095619797706604, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1690962099125364, |
| "grad_norm": 0.3416520953178406, |
| "learning_rate": 1.1180809225498542e-05, |
| "loss": 1.33209228515625, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.1720116618075802, |
| "grad_norm": 0.14092491567134857, |
| "learning_rate": 1.11612518705084e-05, |
| "loss": 1.121877670288086, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.1749271137026238, |
| "grad_norm": 0.26185205578804016, |
| "learning_rate": 1.1141664444674091e-05, |
| "loss": 1.3565205335617065, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.1778425655976676, |
| "grad_norm": 0.15331599116325378, |
| "learning_rate": 1.1122047150317665e-05, |
| "loss": 0.7860437631607056, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.1807580174927113, |
| "grad_norm": 0.25274330377578735, |
| "learning_rate": 1.110240019006968e-05, |
| "loss": 0.7633789777755737, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.183673469387755, |
| "grad_norm": 0.1963554322719574, |
| "learning_rate": 1.1082723766867123e-05, |
| "loss": 1.133277177810669, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.186588921282799, |
| "grad_norm": 0.33926016092300415, |
| "learning_rate": 1.1063018083951309e-05, |
| "loss": 1.0211750268936157, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.1895043731778425, |
| "grad_norm": 0.23344306647777557, |
| "learning_rate": 1.1043283344865776e-05, |
| "loss": 1.1373283863067627, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.1924198250728864, |
| "grad_norm": 0.2557908594608307, |
| "learning_rate": 1.1023519753454203e-05, |
| "loss": 0.9404536485671997, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.19533527696793, |
| "grad_norm": 1.4168596267700195, |
| "learning_rate": 1.1003727513858268e-05, |
| "loss": 1.1765224933624268, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.1982507288629738, |
| "grad_norm": 0.13063687086105347, |
| "learning_rate": 1.0983906830515584e-05, |
| "loss": 1.222176432609558, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.2011661807580174, |
| "grad_norm": 0.07739931344985962, |
| "learning_rate": 1.0964057908157548e-05, |
| "loss": 1.151648998260498, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.2040816326530612, |
| "grad_norm": 0.07822076976299286, |
| "learning_rate": 1.094418095180725e-05, |
| "loss": 1.061394453048706, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.2069970845481048, |
| "grad_norm": 0.14568239450454712, |
| "learning_rate": 1.0924276166777349e-05, |
| "loss": 0.7191852927207947, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.2099125364431487, |
| "grad_norm": 0.30981016159057617, |
| "learning_rate": 1.090434375866795e-05, |
| "loss": 0.9558042287826538, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.2128279883381925, |
| "grad_norm": 0.2437950074672699, |
| "learning_rate": 1.0884383933364477e-05, |
| "loss": 1.1506716012954712, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.215743440233236, |
| "grad_norm": 0.24170175194740295, |
| "learning_rate": 1.0864396897035558e-05, |
| "loss": 1.1895190477371216, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.21865889212828, |
| "grad_norm": 0.1518929898738861, |
| "learning_rate": 1.0844382856130886e-05, |
| "loss": 1.2491060495376587, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.2215743440233235, |
| "grad_norm": 0.14055992662906647, |
| "learning_rate": 1.0824342017379089e-05, |
| "loss": 1.4196858406066895, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.2244897959183674, |
| "grad_norm": 0.18487177789211273, |
| "learning_rate": 1.0804274587785595e-05, |
| "loss": 1.0294526815414429, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.227405247813411, |
| "grad_norm": 0.6372827887535095, |
| "learning_rate": 1.0784180774630495e-05, |
| "loss": 0.26844465732574463, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.2303206997084548, |
| "grad_norm": 0.15034730732440948, |
| "learning_rate": 1.0764060785466391e-05, |
| "loss": 1.2424967288970947, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.2332361516034984, |
| "grad_norm": 0.16668657958507538, |
| "learning_rate": 1.0743914828116281e-05, |
| "loss": 1.0989577770233154, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.2361516034985423, |
| "grad_norm": 0.15799511969089508, |
| "learning_rate": 1.0723743110671378e-05, |
| "loss": 1.2244020700454712, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.239067055393586, |
| "grad_norm": 0.09745261073112488, |
| "learning_rate": 1.0703545841488974e-05, |
| "loss": 1.1401562690734863, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.2419825072886297, |
| "grad_norm": 0.5921195149421692, |
| "learning_rate": 1.06833232291903e-05, |
| "loss": 0.7718449234962463, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.2448979591836735, |
| "grad_norm": 0.08858446776866913, |
| "learning_rate": 1.0663075482658355e-05, |
| "loss": 1.074745774269104, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.2478134110787171, |
| "grad_norm": 0.22339816391468048, |
| "learning_rate": 1.0642802811035753e-05, |
| "loss": 0.6682339310646057, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.250728862973761, |
| "grad_norm": 0.22134488821029663, |
| "learning_rate": 1.0622505423722566e-05, |
| "loss": 1.1483386754989624, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.2536443148688048, |
| "grad_norm": 0.34351247549057007, |
| "learning_rate": 1.0602183530374159e-05, |
| "loss": 0.9953691959381104, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.2565597667638484, |
| "grad_norm": 0.1252131313085556, |
| "learning_rate": 1.0581837340899022e-05, |
| "loss": 1.152267575263977, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.259475218658892, |
| "grad_norm": 0.10258015990257263, |
| "learning_rate": 1.0561467065456607e-05, |
| "loss": 1.0798017978668213, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.2623906705539358, |
| "grad_norm": 0.3338652551174164, |
| "learning_rate": 1.0541072914455152e-05, |
| "loss": 0.6286276578903198, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.2653061224489797, |
| "grad_norm": 0.18449436128139496, |
| "learning_rate": 1.0520655098549508e-05, |
| "loss": 1.1572736501693726, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.2682215743440233, |
| "grad_norm": 0.1656051129102707, |
| "learning_rate": 1.0500213828638972e-05, |
| "loss": 1.2729966640472412, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.271137026239067, |
| "grad_norm": 0.1694529801607132, |
| "learning_rate": 1.0479749315865093e-05, |
| "loss": 1.1974416971206665, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.2740524781341107, |
| "grad_norm": 0.07350558042526245, |
| "learning_rate": 1.045926177160951e-05, |
| "loss": 1.127896785736084, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.2769679300291545, |
| "grad_norm": 0.1753559112548828, |
| "learning_rate": 1.0438751407491745e-05, |
| "loss": 1.1373307704925537, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.2798833819241984, |
| "grad_norm": 0.16192442178726196, |
| "learning_rate": 1.0418218435367043e-05, |
| "loss": 1.0873537063598633, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.282798833819242, |
| "grad_norm": 0.2647189497947693, |
| "learning_rate": 1.0397663067324163e-05, |
| "loss": 0.8994747400283813, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.16055135428905487, |
| "learning_rate": 1.03770855156832e-05, |
| "loss": 1.1629761457443237, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.2886297376093294, |
| "grad_norm": 0.1312457174062729, |
| "learning_rate": 1.0356485992993386e-05, |
| "loss": 1.2289665937423706, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.2915451895043732, |
| "grad_norm": 0.3237832486629486, |
| "learning_rate": 1.0335864712030895e-05, |
| "loss": 1.3477158546447754, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.2944606413994169, |
| "grad_norm": 0.11200102418661118, |
| "learning_rate": 1.0315221885796648e-05, |
| "loss": 1.1597537994384766, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.2973760932944607, |
| "grad_norm": 0.1582571268081665, |
| "learning_rate": 1.029455772751411e-05, |
| "loss": 1.0584282875061035, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.3002915451895043, |
| "grad_norm": 0.2713635563850403, |
| "learning_rate": 1.0273872450627086e-05, |
| "loss": 1.065276026725769, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.3032069970845481, |
| "grad_norm": 0.617933988571167, |
| "learning_rate": 1.025316626879752e-05, |
| "loss": 1.1870301961898804, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.306122448979592, |
| "grad_norm": 0.24628496170043945, |
| "learning_rate": 1.0232439395903295e-05, |
| "loss": 1.3716992139816284, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.3090379008746356, |
| "grad_norm": 0.07092081010341644, |
| "learning_rate": 1.0211692046036002e-05, |
| "loss": 1.2022879123687744, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.3119533527696792, |
| "grad_norm": 0.07380987703800201, |
| "learning_rate": 1.019092443349875e-05, |
| "loss": 0.9747592806816101, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.314868804664723, |
| "grad_norm": 0.07589751482009888, |
| "learning_rate": 1.0170136772803948e-05, |
| "loss": 1.033135175704956, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.3177842565597668, |
| "grad_norm": 0.12000124901533127, |
| "learning_rate": 1.0149329278671082e-05, |
| "loss": 1.1944102048873901, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.3206997084548104, |
| "grad_norm": 0.24365442991256714, |
| "learning_rate": 1.0128502166024497e-05, |
| "loss": 0.7611994743347168, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.3236151603498543, |
| "grad_norm": 0.5757351517677307, |
| "learning_rate": 1.0107655649991186e-05, |
| "loss": 1.0334023237228394, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.3265306122448979, |
| "grad_norm": 0.09015009552240372, |
| "learning_rate": 1.0086789945898568e-05, |
| "loss": 1.1387327909469604, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.3294460641399417, |
| "grad_norm": 0.6966755390167236, |
| "learning_rate": 1.0065905269272245e-05, |
| "loss": 1.0652743577957153, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.3323615160349855, |
| "grad_norm": 0.08158166706562042, |
| "learning_rate": 1.0045001835833804e-05, |
| "loss": 1.154505968093872, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.3352769679300291, |
| "grad_norm": 0.17343761026859283, |
| "learning_rate": 1.0024079861498566e-05, |
| "loss": 1.0197257995605469, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.3381924198250728, |
| "grad_norm": 0.3027811050415039, |
| "learning_rate": 1.0003139562373365e-05, |
| "loss": 1.3120397329330444, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.3411078717201166, |
| "grad_norm": 0.7201161980628967, |
| "learning_rate": 9.982181154754323e-06, |
| "loss": 0.6248821020126343, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.3440233236151604, |
| "grad_norm": 0.06654369831085205, |
| "learning_rate": 9.961204855124595e-06, |
| "loss": 1.3484827280044556, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.346938775510204, |
| "grad_norm": 0.3403482437133789, |
| "learning_rate": 9.940210880152157e-06, |
| "loss": 1.023748517036438, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.3498542274052479, |
| "grad_norm": 0.3134101629257202, |
| "learning_rate": 9.91919944668755e-06, |
| "loss": 1.462807536125183, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.3527696793002915, |
| "grad_norm": 0.12223192304372787, |
| "learning_rate": 9.89817077176165e-06, |
| "loss": 1.0908539295196533, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.3556851311953353, |
| "grad_norm": 0.14625874161720276, |
| "learning_rate": 9.877125072583421e-06, |
| "loss": 1.2502838373184204, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.3586005830903791, |
| "grad_norm": 0.2647968828678131, |
| "learning_rate": 9.856062566537677e-06, |
| "loss": 1.3731303215026855, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.3615160349854227, |
| "grad_norm": 0.14242695271968842, |
| "learning_rate": 9.834983471182831e-06, |
| "loss": 1.0232398509979248, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.3644314868804663, |
| "grad_norm": 0.22755105793476105, |
| "learning_rate": 9.813888004248648e-06, |
| "loss": 1.1105183362960815, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.3673469387755102, |
| "grad_norm": 0.10210377722978592, |
| "learning_rate": 9.792776383634002e-06, |
| "loss": 0.9822967648506165, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.370262390670554, |
| "grad_norm": 0.2081102728843689, |
| "learning_rate": 9.771648827404617e-06, |
| "loss": 0.6831743121147156, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.3731778425655976, |
| "grad_norm": 0.195752814412117, |
| "learning_rate": 9.750505553790823e-06, |
| "loss": 1.017356514930725, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.3760932944606414, |
| "grad_norm": 0.149446040391922, |
| "learning_rate": 9.729346781185295e-06, |
| "loss": 1.2844679355621338, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.379008746355685, |
| "grad_norm": 0.08231537789106369, |
| "learning_rate": 9.708172728140804e-06, |
| "loss": 1.2107067108154297, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.3819241982507289, |
| "grad_norm": 0.1436920166015625, |
| "learning_rate": 9.686983613367947e-06, |
| "loss": 0.9730831384658813, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.3848396501457727, |
| "grad_norm": 0.13865897059440613, |
| "learning_rate": 9.665779655732905e-06, |
| "loss": 1.134727954864502, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.3877551020408163, |
| "grad_norm": 0.1278238445520401, |
| "learning_rate": 9.644561074255168e-06, |
| "loss": 1.1596717834472656, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.39067055393586, |
| "grad_norm": 0.13528533279895782, |
| "learning_rate": 9.62332808810528e-06, |
| "loss": 1.0845617055892944, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.3935860058309038, |
| "grad_norm": 0.14649415016174316, |
| "learning_rate": 9.602080916602573e-06, |
| "loss": 1.223073124885559, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.3965014577259476, |
| "grad_norm": 0.1999201625585556, |
| "learning_rate": 9.580819779212905e-06, |
| "loss": 1.0572779178619385, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.3994169096209912, |
| "grad_norm": 0.42912936210632324, |
| "learning_rate": 9.559544895546393e-06, |
| "loss": 1.211446762084961, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.402332361516035, |
| "grad_norm": 0.3703382611274719, |
| "learning_rate": 9.538256485355125e-06, |
| "loss": 1.1024117469787598, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.4052478134110786, |
| "grad_norm": 0.09566738456487656, |
| "learning_rate": 9.516954768530924e-06, |
| "loss": 1.0713633298873901, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.4081632653061225, |
| "grad_norm": 0.13610726594924927, |
| "learning_rate": 9.49563996510306e-06, |
| "loss": 1.2085410356521606, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.4110787172011663, |
| "grad_norm": 0.19745762646198273, |
| "learning_rate": 9.47431229523596e-06, |
| "loss": 1.0144951343536377, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.41399416909621, |
| "grad_norm": 0.41680532693862915, |
| "learning_rate": 9.452971979226972e-06, |
| "loss": 1.0802420377731323, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.4169096209912537, |
| "grad_norm": 0.18726322054862976, |
| "learning_rate": 9.431619237504052e-06, |
| "loss": 1.2159126996994019, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.4198250728862973, |
| "grad_norm": 0.4570455551147461, |
| "learning_rate": 9.410254290623512e-06, |
| "loss": 1.1028673648834229, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.4227405247813412, |
| "grad_norm": 0.1720321923494339, |
| "learning_rate": 9.388877359267732e-06, |
| "loss": 1.053758978843689, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.4256559766763848, |
| "grad_norm": 0.7719082832336426, |
| "learning_rate": 9.367488664242878e-06, |
| "loss": 1.0918673276901245, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.11719834804534912, |
| "learning_rate": 9.346088426476627e-06, |
| "loss": 1.1107982397079468, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.4314868804664722, |
| "grad_norm": 0.26357176899909973, |
| "learning_rate": 9.32467686701589e-06, |
| "loss": 1.3265354633331299, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.434402332361516, |
| "grad_norm": 0.7194681167602539, |
| "learning_rate": 9.303254207024509e-06, |
| "loss": 0.6845600605010986, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.4373177842565599, |
| "grad_norm": 0.19328005611896515, |
| "learning_rate": 9.28182066778099e-06, |
| "loss": 1.1066367626190186, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.4402332361516035, |
| "grad_norm": 0.3166584372520447, |
| "learning_rate": 9.260376470676225e-06, |
| "loss": 1.0711687803268433, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.4431486880466473, |
| "grad_norm": 0.20059515535831451, |
| "learning_rate": 9.238921837211175e-06, |
| "loss": 1.2519899606704712, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.446064139941691, |
| "grad_norm": 0.15826623141765594, |
| "learning_rate": 9.217456988994608e-06, |
| "loss": 1.3235565423965454, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.4489795918367347, |
| "grad_norm": 0.19210676848888397, |
| "learning_rate": 9.1959821477408e-06, |
| "loss": 1.0224212408065796, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.4518950437317784, |
| "grad_norm": 0.26280826330184937, |
| "learning_rate": 9.174497535267257e-06, |
| "loss": 1.1540876626968384, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.4548104956268222, |
| "grad_norm": 0.09911534935235977, |
| "learning_rate": 9.153003373492395e-06, |
| "loss": 1.197079062461853, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.4577259475218658, |
| "grad_norm": 0.15191975235939026, |
| "learning_rate": 9.131499884433285e-06, |
| "loss": 1.2020612955093384, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.4606413994169096, |
| "grad_norm": 0.1272922158241272, |
| "learning_rate": 9.109987290203325e-06, |
| "loss": 1.1222330331802368, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.4635568513119535, |
| "grad_norm": 0.17026354372501373, |
| "learning_rate": 9.088465813009979e-06, |
| "loss": 1.2111908197402954, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.466472303206997, |
| "grad_norm": 0.1192101240158081, |
| "learning_rate": 9.06693567515245e-06, |
| "loss": 1.186848759651184, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.469387755102041, |
| "grad_norm": 0.5374306440353394, |
| "learning_rate": 9.045397099019405e-06, |
| "loss": 1.1735105514526367, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.4723032069970845, |
| "grad_norm": 0.14989781379699707, |
| "learning_rate": 9.02385030708667e-06, |
| "loss": 1.3269665241241455, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.4752186588921283, |
| "grad_norm": 0.23181524872779846, |
| "learning_rate": 9.002295521914934e-06, |
| "loss": 1.234397292137146, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.478134110787172, |
| "grad_norm": 0.8318726420402527, |
| "learning_rate": 8.980732966147451e-06, |
| "loss": 1.2126901149749756, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.4810495626822158, |
| "grad_norm": 0.2093929797410965, |
| "learning_rate": 8.959162862507738e-06, |
| "loss": 1.0737382173538208, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.4839650145772594, |
| "grad_norm": 0.2963290214538574, |
| "learning_rate": 8.937585433797273e-06, |
| "loss": 0.9138633012771606, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.4868804664723032, |
| "grad_norm": 0.2868603467941284, |
| "learning_rate": 8.916000902893199e-06, |
| "loss": 1.3595247268676758, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.489795918367347, |
| "grad_norm": 0.11513882875442505, |
| "learning_rate": 8.894409492746018e-06, |
| "loss": 1.0969007015228271, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.4927113702623906, |
| "grad_norm": 0.15273737907409668, |
| "learning_rate": 8.87281142637729e-06, |
| "loss": 1.0396068096160889, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.4956268221574345, |
| "grad_norm": 0.12743119895458221, |
| "learning_rate": 8.851206926877325e-06, |
| "loss": 1.21293306350708, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.498542274052478, |
| "grad_norm": 0.07293698191642761, |
| "learning_rate": 8.82959621740288e-06, |
| "loss": 0.8554050922393799, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.501457725947522, |
| "grad_norm": 0.1396367996931076, |
| "learning_rate": 8.807979521174866e-06, |
| "loss": 0.8444166779518127, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.5043731778425657, |
| "grad_norm": 0.34662795066833496, |
| "learning_rate": 8.786357061476029e-06, |
| "loss": 1.1405446529388428, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.5072886297376094, |
| "grad_norm": 0.2602401673793793, |
| "learning_rate": 8.764729061648632e-06, |
| "loss": 1.2988492250442505, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.510204081632653, |
| "grad_norm": 0.19908583164215088, |
| "learning_rate": 8.743095745092185e-06, |
| "loss": 1.2301197052001953, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.5131195335276968, |
| "grad_norm": 0.20294634997844696, |
| "learning_rate": 8.721457335261104e-06, |
| "loss": 0.9326356053352356, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.5160349854227406, |
| "grad_norm": 0.5687612295150757, |
| "learning_rate": 8.699814055662417e-06, |
| "loss": 1.187393069267273, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.5189504373177842, |
| "grad_norm": 0.27902352809906006, |
| "learning_rate": 8.678166129853442e-06, |
| "loss": 1.0565565824508667, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.5218658892128278, |
| "grad_norm": 0.06307139247655869, |
| "learning_rate": 8.656513781439512e-06, |
| "loss": 1.0471357107162476, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.5247813411078717, |
| "grad_norm": 0.3132034242153168, |
| "learning_rate": 8.634857234071619e-06, |
| "loss": 1.3265520334243774, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.5276967930029155, |
| "grad_norm": 0.25837764143943787, |
| "learning_rate": 8.613196711444138e-06, |
| "loss": 1.1429646015167236, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.5306122448979593, |
| "grad_norm": 0.08677840977907181, |
| "learning_rate": 8.591532437292502e-06, |
| "loss": 0.9910908937454224, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.533527696793003, |
| "grad_norm": 0.283247172832489, |
| "learning_rate": 8.5698646353909e-06, |
| "loss": 0.8875013589859009, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.5364431486880465, |
| "grad_norm": 0.16179129481315613, |
| "learning_rate": 8.548193529549947e-06, |
| "loss": 1.1073272228240967, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.5393586005830904, |
| "grad_norm": 0.12490551173686981, |
| "learning_rate": 8.526519343614398e-06, |
| "loss": 0.9769071340560913, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.5422740524781342, |
| "grad_norm": 0.25089073181152344, |
| "learning_rate": 8.504842301460815e-06, |
| "loss": 1.069384217262268, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.5451895043731778, |
| "grad_norm": 0.22324740886688232, |
| "learning_rate": 8.483162626995268e-06, |
| "loss": 1.0800434350967407, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.5481049562682214, |
| "grad_norm": 0.358711302280426, |
| "learning_rate": 8.461480544151012e-06, |
| "loss": 0.8311281204223633, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.5510204081632653, |
| "grad_norm": 0.35619816184043884, |
| "learning_rate": 8.439796276886177e-06, |
| "loss": 1.378959059715271, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.553935860058309, |
| "grad_norm": 0.07740774750709534, |
| "learning_rate": 8.418110049181464e-06, |
| "loss": 0.7135167121887207, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.556851311953353, |
| "grad_norm": 0.11709576100111008, |
| "learning_rate": 8.396422085037822e-06, |
| "loss": 1.1297550201416016, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.5597667638483965, |
| "grad_norm": 0.1865878850221634, |
| "learning_rate": 8.374732608474128e-06, |
| "loss": 1.1906490325927734, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.5626822157434401, |
| "grad_norm": 0.16431988775730133, |
| "learning_rate": 8.353041843524886e-06, |
| "loss": 1.1722774505615234, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.565597667638484, |
| "grad_norm": 0.36135971546173096, |
| "learning_rate": 8.331350014237912e-06, |
| "loss": 1.1067001819610596, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.5685131195335278, |
| "grad_norm": 0.3832073211669922, |
| "learning_rate": 8.30965734467201e-06, |
| "loss": 1.2439948320388794, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.2755753993988037, |
| "learning_rate": 8.28796405889466e-06, |
| "loss": 0.6848400831222534, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.574344023323615, |
| "grad_norm": 0.07128661125898361, |
| "learning_rate": 8.266270380979723e-06, |
| "loss": 1.2033002376556396, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.5772594752186588, |
| "grad_norm": 0.16955770552158356, |
| "learning_rate": 8.244576535005093e-06, |
| "loss": 1.2546216249465942, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.5801749271137027, |
| "grad_norm": 0.702198326587677, |
| "learning_rate": 8.22288274505041e-06, |
| "loss": 1.0031241178512573, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.5830903790087465, |
| "grad_norm": 0.09851932525634766, |
| "learning_rate": 8.201189235194729e-06, |
| "loss": 1.171536922454834, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.58600583090379, |
| "grad_norm": 0.5338625907897949, |
| "learning_rate": 8.179496229514217e-06, |
| "loss": 1.0307410955429077, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.5889212827988337, |
| "grad_norm": 0.17403900623321533, |
| "learning_rate": 8.157803952079832e-06, |
| "loss": 1.2256954908370972, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.5918367346938775, |
| "grad_norm": 0.1747167557477951, |
| "learning_rate": 8.136112626955005e-06, |
| "loss": 1.2137948274612427, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.5947521865889214, |
| "grad_norm": 0.07115664333105087, |
| "learning_rate": 8.114422478193336e-06, |
| "loss": 1.0697215795516968, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.597667638483965, |
| "grad_norm": 0.12972617149353027, |
| "learning_rate": 8.09273372983628e-06, |
| "loss": 1.1039892435073853, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.6005830903790086, |
| "grad_norm": 0.13853909075260162, |
| "learning_rate": 8.071046605910804e-06, |
| "loss": 1.186689853668213, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.6034985422740524, |
| "grad_norm": 0.1802920252084732, |
| "learning_rate": 8.049361330427129e-06, |
| "loss": 1.047842025756836, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.6064139941690962, |
| "grad_norm": 0.15627241134643555, |
| "learning_rate": 8.027678127376353e-06, |
| "loss": 1.081397294998169, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.60932944606414, |
| "grad_norm": 0.13871587812900543, |
| "learning_rate": 8.005997220728181e-06, |
| "loss": 1.129719614982605, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.6122448979591837, |
| "grad_norm": 20.326587677001953, |
| "learning_rate": 7.984318834428607e-06, |
| "loss": 1.1785022020339966, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.6151603498542273, |
| "grad_norm": 0.13852129876613617, |
| "learning_rate": 7.962643192397574e-06, |
| "loss": 1.0734182596206665, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.6180758017492711, |
| "grad_norm": 0.6223950982093811, |
| "learning_rate": 7.940970518526686e-06, |
| "loss": 1.1438935995101929, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.620991253644315, |
| "grad_norm": 0.0528414323925972, |
| "learning_rate": 7.919301036676892e-06, |
| "loss": 0.9696015119552612, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.6239067055393586, |
| "grad_norm": 0.13710257411003113, |
| "learning_rate": 7.897634970676166e-06, |
| "loss": 1.1505471467971802, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.6268221574344022, |
| "grad_norm": 0.16004100441932678, |
| "learning_rate": 7.875972544317203e-06, |
| "loss": 1.2167091369628906, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.629737609329446, |
| "grad_norm": 0.45379891991615295, |
| "learning_rate": 7.854313981355101e-06, |
| "loss": 1.131983757019043, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.6326530612244898, |
| "grad_norm": 0.13307584822177887, |
| "learning_rate": 7.832659505505048e-06, |
| "loss": 1.1805908679962158, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.6355685131195337, |
| "grad_norm": 0.2649403214454651, |
| "learning_rate": 7.811009340440022e-06, |
| "loss": 1.2160626649856567, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.6384839650145773, |
| "grad_norm": 0.16499841213226318, |
| "learning_rate": 7.789363709788472e-06, |
| "loss": 1.2312496900558472, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.6413994169096209, |
| "grad_norm": 0.14581745862960815, |
| "learning_rate": 7.767722837132008e-06, |
| "loss": 0.5785539150238037, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.6443148688046647, |
| "grad_norm": 0.40138673782348633, |
| "learning_rate": 7.746086946003103e-06, |
| "loss": 1.102718472480774, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.6472303206997085, |
| "grad_norm": 0.39575713872909546, |
| "learning_rate": 7.724456259882758e-06, |
| "loss": 0.9496442675590515, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.6501457725947521, |
| "grad_norm": 0.16450181603431702, |
| "learning_rate": 7.702831002198225e-06, |
| "loss": 1.1438281536102295, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.6530612244897958, |
| "grad_norm": 0.10068156570196152, |
| "learning_rate": 7.68121139632068e-06, |
| "loss": 1.2390490770339966, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.6559766763848396, |
| "grad_norm": 0.25964057445526123, |
| "learning_rate": 7.65959766556292e-06, |
| "loss": 1.0381125211715698, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.6588921282798834, |
| "grad_norm": 0.43424177169799805, |
| "learning_rate": 7.637990033177057e-06, |
| "loss": 1.109690546989441, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.6618075801749272, |
| "grad_norm": 0.21539334952831268, |
| "learning_rate": 7.616388722352214e-06, |
| "loss": 1.2123034000396729, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.6647230320699709, |
| "grad_norm": 0.20255622267723083, |
| "learning_rate": 7.594793956212212e-06, |
| "loss": 1.217584490776062, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.6676384839650145, |
| "grad_norm": 0.47754237055778503, |
| "learning_rate": 7.573205957813276e-06, |
| "loss": 0.9803376197814941, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.6705539358600583, |
| "grad_norm": 0.09026843309402466, |
| "learning_rate": 7.551624950141726e-06, |
| "loss": 1.1912260055541992, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.6734693877551021, |
| "grad_norm": 0.11982105672359467, |
| "learning_rate": 7.530051156111669e-06, |
| "loss": 1.1396859884262085, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.6763848396501457, |
| "grad_norm": 0.42154011130332947, |
| "learning_rate": 7.508484798562707e-06, |
| "loss": 1.3917794227600098, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.6793002915451893, |
| "grad_norm": 0.34086376428604126, |
| "learning_rate": 7.486926100257621e-06, |
| "loss": 1.1625425815582275, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.6822157434402332, |
| "grad_norm": 0.33954572677612305, |
| "learning_rate": 7.465375283880084e-06, |
| "loss": 1.1317555904388428, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.685131195335277, |
| "grad_norm": 0.15621435642242432, |
| "learning_rate": 7.44383257203236e-06, |
| "loss": 1.0376930236816406, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.6880466472303208, |
| "grad_norm": 0.16445010900497437, |
| "learning_rate": 7.422298187232988e-06, |
| "loss": 0.6347440481185913, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.6909620991253644, |
| "grad_norm": 0.11221948266029358, |
| "learning_rate": 7.4007723519145005e-06, |
| "loss": 1.2130205631256104, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.693877551020408, |
| "grad_norm": 0.10298870503902435, |
| "learning_rate": 7.37925528842113e-06, |
| "loss": 1.0703403949737549, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.6967930029154519, |
| "grad_norm": 0.05989653244614601, |
| "learning_rate": 7.357747219006487e-06, |
| "loss": 1.0500437021255493, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.6997084548104957, |
| "grad_norm": 0.18388091027736664, |
| "learning_rate": 7.336248365831293e-06, |
| "loss": 1.0820516347885132, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.7026239067055393, |
| "grad_norm": 0.30676501989364624, |
| "learning_rate": 7.314758950961069e-06, |
| "loss": 0.8827295303344727, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.7055393586005831, |
| "grad_norm": 0.1762169450521469, |
| "learning_rate": 7.293279196363844e-06, |
| "loss": 1.1642931699752808, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.7084548104956268, |
| "grad_norm": 0.138104647397995, |
| "learning_rate": 7.271809323907868e-06, |
| "loss": 1.3497681617736816, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.7113702623906706, |
| "grad_norm": 0.04815658926963806, |
| "learning_rate": 7.250349555359316e-06, |
| "loss": 0.9686152935028076, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.4449727535247803, |
| "learning_rate": 7.228900112379993e-06, |
| "loss": 0.8205754160881042, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.717201166180758, |
| "grad_norm": 0.19454075396060944, |
| "learning_rate": 7.2074612165250596e-06, |
| "loss": 1.1948063373565674, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.7201166180758016, |
| "grad_norm": 0.1630457043647766, |
| "learning_rate": 7.18603308924072e-06, |
| "loss": 1.122542381286621, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.7230320699708455, |
| "grad_norm": 0.2632548213005066, |
| "learning_rate": 7.164615951861958e-06, |
| "loss": 1.2288137674331665, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.7259475218658893, |
| "grad_norm": 0.185108482837677, |
| "learning_rate": 7.143210025610238e-06, |
| "loss": 1.029456615447998, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.728862973760933, |
| "grad_norm": 0.06753533333539963, |
| "learning_rate": 7.121815531591222e-06, |
| "loss": 0.9876729846000671, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.7317784256559767, |
| "grad_norm": 0.16401244699954987, |
| "learning_rate": 7.100432690792484e-06, |
| "loss": 0.6059045791625977, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.7346938775510203, |
| "grad_norm": 0.2957839369773865, |
| "learning_rate": 7.0790617240812374e-06, |
| "loss": 1.0509564876556396, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.7376093294460642, |
| "grad_norm": 0.13618314266204834, |
| "learning_rate": 7.057702852202037e-06, |
| "loss": 1.1775768995285034, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.740524781341108, |
| "grad_norm": 0.171565443277359, |
| "learning_rate": 7.0363562957745105e-06, |
| "loss": 0.9801825881004333, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.7434402332361516, |
| "grad_norm": 0.09507802128791809, |
| "learning_rate": 7.015022275291084e-06, |
| "loss": 0.969845175743103, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.7463556851311952, |
| "grad_norm": 0.49828192591667175, |
| "learning_rate": 6.993701011114686e-06, |
| "loss": 0.9284896850585938, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.749271137026239, |
| "grad_norm": 0.10986272245645523, |
| "learning_rate": 6.972392723476494e-06, |
| "loss": 1.1610954999923706, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.7521865889212829, |
| "grad_norm": 0.36414283514022827, |
| "learning_rate": 6.9510976324736415e-06, |
| "loss": 0.9902899861335754, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.7551020408163265, |
| "grad_norm": 0.15007393062114716, |
| "learning_rate": 6.929815958066951e-06, |
| "loss": 1.1686747074127197, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.7580174927113703, |
| "grad_norm": 0.09150854498147964, |
| "learning_rate": 6.908547920078671e-06, |
| "loss": 0.9296596050262451, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.760932944606414, |
| "grad_norm": 0.13725019991397858, |
| "learning_rate": 6.887293738190183e-06, |
| "loss": 0.6867948174476624, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.7638483965014577, |
| "grad_norm": 0.2506777346134186, |
| "learning_rate": 6.866053631939756e-06, |
| "loss": 1.1812880039215088, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.7667638483965016, |
| "grad_norm": 0.24459925293922424, |
| "learning_rate": 6.844827820720275e-06, |
| "loss": 1.233087420463562, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.7696793002915452, |
| "grad_norm": 0.18725088238716125, |
| "learning_rate": 6.8236165237769555e-06, |
| "loss": 1.0703694820404053, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.7725947521865888, |
| "grad_norm": 0.08817660808563232, |
| "learning_rate": 6.802419960205095e-06, |
| "loss": 0.9150586724281311, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.7755102040816326, |
| "grad_norm": 0.24206826090812683, |
| "learning_rate": 6.7812383489478216e-06, |
| "loss": 1.2116329669952393, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.7784256559766765, |
| "grad_norm": 0.13627009093761444, |
| "learning_rate": 6.760071908793796e-06, |
| "loss": 0.6978607177734375, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.78134110787172, |
| "grad_norm": 0.19865363836288452, |
| "learning_rate": 6.738920858374991e-06, |
| "loss": 1.0590617656707764, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.784256559766764, |
| "grad_norm": 0.4059164524078369, |
| "learning_rate": 6.717785416164414e-06, |
| "loss": 1.38783860206604, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.7871720116618075, |
| "grad_norm": 0.2919604480266571, |
| "learning_rate": 6.696665800473842e-06, |
| "loss": 1.1487404108047485, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.7900874635568513, |
| "grad_norm": 0.1517525017261505, |
| "learning_rate": 6.675562229451589e-06, |
| "loss": 1.206036925315857, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.7930029154518952, |
| "grad_norm": 0.2847557067871094, |
| "learning_rate": 6.6544749210802305e-06, |
| "loss": 0.8351743817329407, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.7959183673469388, |
| "grad_norm": 0.2792437672615051, |
| "learning_rate": 6.633404093174371e-06, |
| "loss": 0.9937669634819031, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.7988338192419824, |
| "grad_norm": 0.39450135827064514, |
| "learning_rate": 6.612349963378381e-06, |
| "loss": 0.9253970980644226, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.8017492711370262, |
| "grad_norm": 0.26529014110565186, |
| "learning_rate": 6.591312749164154e-06, |
| "loss": 1.1452049016952515, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.80466472303207, |
| "grad_norm": 0.23458294570446014, |
| "learning_rate": 6.570292667828856e-06, |
| "loss": 1.2078217267990112, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.8075801749271136, |
| "grad_norm": 0.13832348585128784, |
| "learning_rate": 6.549289936492693e-06, |
| "loss": 1.2237412929534912, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.8104956268221575, |
| "grad_norm": 0.08728086948394775, |
| "learning_rate": 6.5283047720966505e-06, |
| "loss": 1.1127595901489258, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.813411078717201, |
| "grad_norm": 0.2100764364004135, |
| "learning_rate": 6.5073373914002656e-06, |
| "loss": 1.0868037939071655, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.816326530612245, |
| "grad_norm": 0.13499869406223297, |
| "learning_rate": 6.486388010979388e-06, |
| "loss": 1.119627833366394, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.8192419825072887, |
| "grad_norm": 0.34346649050712585, |
| "learning_rate": 6.465456847223932e-06, |
| "loss": 1.0318715572357178, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.8221574344023324, |
| "grad_norm": 0.07944006472826004, |
| "learning_rate": 6.444544116335655e-06, |
| "loss": 1.1757546663284302, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.825072886297376, |
| "grad_norm": 0.2944159209728241, |
| "learning_rate": 6.423650034325915e-06, |
| "loss": 1.2396355867385864, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.8279883381924198, |
| "grad_norm": 0.18287204205989838, |
| "learning_rate": 6.402774817013442e-06, |
| "loss": 1.097105860710144, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.8309037900874636, |
| "grad_norm": 0.141254261136055, |
| "learning_rate": 6.381918680022112e-06, |
| "loss": 1.0068081617355347, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.8338192419825075, |
| "grad_norm": 0.17386725544929504, |
| "learning_rate": 6.36108183877871e-06, |
| "loss": 1.1032158136367798, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.836734693877551, |
| "grad_norm": 0.22268234193325043, |
| "learning_rate": 6.3402645085107224e-06, |
| "loss": 1.2912282943725586, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.8396501457725947, |
| "grad_norm": 0.411150723695755, |
| "learning_rate": 6.3194669042440976e-06, |
| "loss": 1.129095196723938, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.8425655976676385, |
| "grad_norm": 0.3001119792461395, |
| "learning_rate": 6.298689240801026e-06, |
| "loss": 1.365820050239563, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.8454810495626823, |
| "grad_norm": 0.36252474784851074, |
| "learning_rate": 6.277931732797732e-06, |
| "loss": 1.3998820781707764, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.848396501457726, |
| "grad_norm": 0.29093074798583984, |
| "learning_rate": 6.257194594642254e-06, |
| "loss": 1.0682395696640015, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.8513119533527695, |
| "grad_norm": 0.13126376271247864, |
| "learning_rate": 6.236478040532214e-06, |
| "loss": 1.0302337408065796, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.8542274052478134, |
| "grad_norm": 0.1628250777721405, |
| "learning_rate": 6.215782284452628e-06, |
| "loss": 1.098158359527588, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.20393933355808258, |
| "learning_rate": 6.195107540173687e-06, |
| "loss": 1.1833226680755615, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.860058309037901, |
| "grad_norm": 0.2242426872253418, |
| "learning_rate": 6.174454021248537e-06, |
| "loss": 1.2466531991958618, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.8629737609329446, |
| "grad_norm": 0.1543884128332138, |
| "learning_rate": 6.15382194101109e-06, |
| "loss": 0.9692124724388123, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.8658892128279883, |
| "grad_norm": 0.10594581812620163, |
| "learning_rate": 6.133211512573819e-06, |
| "loss": 1.0277884006500244, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.868804664723032, |
| "grad_norm": 0.1760384440422058, |
| "learning_rate": 6.1126229488255416e-06, |
| "loss": 1.0745232105255127, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.871720116618076, |
| "grad_norm": 0.11243575066328049, |
| "learning_rate": 6.092056462429238e-06, |
| "loss": 1.11955988407135, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.8746355685131195, |
| "grad_norm": 0.3004339337348938, |
| "learning_rate": 6.071512265819841e-06, |
| "loss": 1.1129993200302124, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.8775510204081631, |
| "grad_norm": 0.1870323270559311, |
| "learning_rate": 6.0509905712020554e-06, |
| "loss": 1.1004483699798584, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.880466472303207, |
| "grad_norm": 0.15390393137931824, |
| "learning_rate": 6.030491590548157e-06, |
| "loss": 1.1051290035247803, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.8833819241982508, |
| "grad_norm": 0.17591705918312073, |
| "learning_rate": 6.010015535595802e-06, |
| "loss": 1.19423246383667, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.8862973760932946, |
| "grad_norm": 0.517492413520813, |
| "learning_rate": 5.989562617845843e-06, |
| "loss": 0.7528221011161804, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.8892128279883382, |
| "grad_norm": 0.2763058543205261, |
| "learning_rate": 5.969133048560151e-06, |
| "loss": 0.6028561592102051, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.8921282798833818, |
| "grad_norm": 0.1741061955690384, |
| "learning_rate": 5.948727038759415e-06, |
| "loss": 0.9944829344749451, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.8950437317784257, |
| "grad_norm": 0.3421262204647064, |
| "learning_rate": 5.928344799220985e-06, |
| "loss": 1.118728756904602, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.8979591836734695, |
| "grad_norm": 0.42300957441329956, |
| "learning_rate": 5.907986540476678e-06, |
| "loss": 0.7158623337745667, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.900874635568513, |
| "grad_norm": 0.14869055151939392, |
| "learning_rate": 5.887652472810609e-06, |
| "loss": 1.0393644571304321, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.9037900874635567, |
| "grad_norm": 0.07201150804758072, |
| "learning_rate": 5.86734280625702e-06, |
| "loss": 0.5461652874946594, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.9067055393586005, |
| "grad_norm": 0.6429765820503235, |
| "learning_rate": 5.847057750598111e-06, |
| "loss": 1.1324551105499268, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.9096209912536444, |
| "grad_norm": 0.18680232763290405, |
| "learning_rate": 5.826797515361868e-06, |
| "loss": 1.274292230606079, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.9125364431486882, |
| "grad_norm": 0.1953829973936081, |
| "learning_rate": 5.806562309819909e-06, |
| "loss": 1.2884361743927002, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.9154518950437318, |
| "grad_norm": 0.28342682123184204, |
| "learning_rate": 5.7863523429853055e-06, |
| "loss": 1.279549479484558, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.9183673469387754, |
| "grad_norm": 0.45169350504875183, |
| "learning_rate": 5.766167823610443e-06, |
| "loss": 1.074336051940918, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.9212827988338192, |
| "grad_norm": 0.18884071707725525, |
| "learning_rate": 5.746008960184852e-06, |
| "loss": 1.262738585472107, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.924198250728863, |
| "grad_norm": 0.059031542390584946, |
| "learning_rate": 5.725875960933058e-06, |
| "loss": 1.0195709466934204, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.9271137026239067, |
| "grad_norm": 0.11774204671382904, |
| "learning_rate": 5.705769033812431e-06, |
| "loss": 1.04592764377594, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.9300291545189503, |
| "grad_norm": 0.13104864954948425, |
| "learning_rate": 5.685688386511041e-06, |
| "loss": 1.0482321977615356, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.9329446064139941, |
| "grad_norm": 0.15567655861377716, |
| "learning_rate": 5.665634226445501e-06, |
| "loss": 1.2044618129730225, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.935860058309038, |
| "grad_norm": 0.14479920268058777, |
| "learning_rate": 5.645606760758836e-06, |
| "loss": 1.0985395908355713, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.9387755102040818, |
| "grad_norm": 0.1920030266046524, |
| "learning_rate": 5.625606196318347e-06, |
| "loss": 1.4523109197616577, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.9416909620991254, |
| "grad_norm": 0.2637879252433777, |
| "learning_rate": 5.605632739713456e-06, |
| "loss": 1.0658267736434937, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.944606413994169, |
| "grad_norm": 0.08796999603509903, |
| "learning_rate": 5.585686597253593e-06, |
| "loss": 1.0220710039138794, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.9475218658892128, |
| "grad_norm": 0.4936763644218445, |
| "learning_rate": 5.5657679749660455e-06, |
| "loss": 0.5359926223754883, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.9504373177842567, |
| "grad_norm": 0.25524938106536865, |
| "learning_rate": 5.545877078593849e-06, |
| "loss": 1.0832246541976929, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.9533527696793003, |
| "grad_norm": 0.3815828263759613, |
| "learning_rate": 5.52601411359365e-06, |
| "loss": 1.0333139896392822, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.9562682215743439, |
| "grad_norm": 0.1364160180091858, |
| "learning_rate": 5.506179285133582e-06, |
| "loss": 0.8447660207748413, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.9591836734693877, |
| "grad_norm": 0.22036899626255035, |
| "learning_rate": 5.486372798091161e-06, |
| "loss": 1.4143515825271606, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.9620991253644315, |
| "grad_norm": 0.4314256012439728, |
| "learning_rate": 5.466594857051153e-06, |
| "loss": 0.9990249276161194, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.9650145772594754, |
| "grad_norm": 0.15996676683425903, |
| "learning_rate": 5.4468456663034635e-06, |
| "loss": 1.2198452949523926, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.967930029154519, |
| "grad_norm": 0.19972719252109528, |
| "learning_rate": 5.427125429841039e-06, |
| "loss": 1.0296826362609863, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.9708454810495626, |
| "grad_norm": 0.1828991174697876, |
| "learning_rate": 5.4074343513577536e-06, |
| "loss": 1.2304623126983643, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.9737609329446064, |
| "grad_norm": 0.2502359449863434, |
| "learning_rate": 5.387772634246287e-06, |
| "loss": 1.1169551610946655, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.9766763848396502, |
| "grad_norm": 0.1563616245985031, |
| "learning_rate": 5.36814048159606e-06, |
| "loss": 0.818549633026123, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.9795918367346939, |
| "grad_norm": 0.08790906518697739, |
| "learning_rate": 5.348538096191109e-06, |
| "loss": 1.2132847309112549, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.9825072886297375, |
| "grad_norm": 0.3884468376636505, |
| "learning_rate": 5.328965680507991e-06, |
| "loss": 1.1513258218765259, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.9854227405247813, |
| "grad_norm": 0.24757881462574005, |
| "learning_rate": 5.309423436713714e-06, |
| "loss": 0.6811099052429199, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.9883381924198251, |
| "grad_norm": 0.0917486697435379, |
| "learning_rate": 5.289911566663626e-06, |
| "loss": 0.5249199271202087, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.991253644314869, |
| "grad_norm": 0.3590066432952881, |
| "learning_rate": 5.270430271899342e-06, |
| "loss": 1.1386462450027466, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.9941690962099126, |
| "grad_norm": 0.0781368613243103, |
| "learning_rate": 5.250979753646664e-06, |
| "loss": 1.0840882062911987, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.9970845481049562, |
| "grad_norm": 0.3470701277256012, |
| "learning_rate": 5.231560212813487e-06, |
| "loss": 1.0490968227386475, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.13662609457969666, |
| "learning_rate": 5.212171849987743e-06, |
| "loss": 1.1986355781555176, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.002915451895044, |
| "grad_norm": 0.15793374180793762, |
| "learning_rate": 5.1928148654353196e-06, |
| "loss": 0.921393871307373, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.0058309037900877, |
| "grad_norm": 0.4891752600669861, |
| "learning_rate": 5.17348945909799e-06, |
| "loss": 0.9690005779266357, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.008746355685131, |
| "grad_norm": 0.2033310979604721, |
| "learning_rate": 5.1541958305913536e-06, |
| "loss": 1.3568806648254395, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.011661807580175, |
| "grad_norm": 0.1594112515449524, |
| "learning_rate": 5.134934179202771e-06, |
| "loss": 1.033390998840332, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.0145772594752187, |
| "grad_norm": 0.2081524133682251, |
| "learning_rate": 5.115704703889299e-06, |
| "loss": 1.0304166078567505, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.0174927113702625, |
| "grad_norm": 0.38243576884269714, |
| "learning_rate": 5.096507603275648e-06, |
| "loss": 0.9502314925193787, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.020408163265306, |
| "grad_norm": 0.06100543960928917, |
| "learning_rate": 5.077343075652124e-06, |
| "loss": 1.1048611402511597, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.0233236151603498, |
| "grad_norm": 0.386870414018631, |
| "learning_rate": 5.058211318972581e-06, |
| "loss": 1.2929866313934326, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.0262390670553936, |
| "grad_norm": 0.1502365618944168, |
| "learning_rate": 5.0391125308523744e-06, |
| "loss": 1.2062195539474487, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.0291545189504374, |
| "grad_norm": 0.46698620915412903, |
| "learning_rate": 5.020046908566317e-06, |
| "loss": 1.2675377130508423, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.0320699708454812, |
| "grad_norm": 0.2170051783323288, |
| "learning_rate": 5.001014649046655e-06, |
| "loss": 1.0185376405715942, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.0349854227405246, |
| "grad_norm": 0.5570895671844482, |
| "learning_rate": 4.98201594888102e-06, |
| "loss": 1.1238821744918823, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.0379008746355685, |
| "grad_norm": 0.19649037718772888, |
| "learning_rate": 4.963051004310397e-06, |
| "loss": 1.1577717065811157, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.0408163265306123, |
| "grad_norm": 0.3043438494205475, |
| "learning_rate": 4.944120011227115e-06, |
| "loss": 0.945805549621582, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.043731778425656, |
| "grad_norm": 0.8879981637001038, |
| "learning_rate": 4.925223165172808e-06, |
| "loss": 1.0322425365447998, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.0466472303206995, |
| "grad_norm": 0.26241424679756165, |
| "learning_rate": 4.906360661336394e-06, |
| "loss": 1.2149442434310913, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.0495626822157433, |
| "grad_norm": 0.8886216878890991, |
| "learning_rate": 4.887532694552066e-06, |
| "loss": 1.0274255275726318, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.052478134110787, |
| "grad_norm": 0.21257859468460083, |
| "learning_rate": 4.868739459297286e-06, |
| "loss": 1.1855621337890625, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.055393586005831, |
| "grad_norm": 0.14593669772148132, |
| "learning_rate": 4.8499811496907506e-06, |
| "loss": 0.7928017377853394, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.058309037900875, |
| "grad_norm": 0.06642908602952957, |
| "learning_rate": 4.831257959490425e-06, |
| "loss": 1.0738983154296875, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.061224489795918, |
| "grad_norm": 0.3109600841999054, |
| "learning_rate": 4.812570082091498e-06, |
| "loss": 0.8972907662391663, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.064139941690962, |
| "grad_norm": 0.13277745246887207, |
| "learning_rate": 4.793917710524422e-06, |
| "loss": 1.0650956630706787, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.067055393586006, |
| "grad_norm": 0.14433449506759644, |
| "learning_rate": 4.775301037452898e-06, |
| "loss": 1.1586172580718994, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.0699708454810497, |
| "grad_norm": 0.15220968425273895, |
| "learning_rate": 4.756720255171887e-06, |
| "loss": 0.5742167234420776, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.072886297376093, |
| "grad_norm": 0.126608744263649, |
| "learning_rate": 4.738175555605632e-06, |
| "loss": 1.242780327796936, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.075801749271137, |
| "grad_norm": 0.10246127843856812, |
| "learning_rate": 4.719667130305671e-06, |
| "loss": 0.9981814622879028, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.0787172011661808, |
| "grad_norm": 0.2460668534040451, |
| "learning_rate": 4.701195170448857e-06, |
| "loss": 0.8302922248840332, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.0816326530612246, |
| "grad_norm": 0.155581995844841, |
| "learning_rate": 4.682759866835388e-06, |
| "loss": 1.3268355131149292, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.0845481049562684, |
| "grad_norm": 0.10044138133525848, |
| "learning_rate": 4.664361409886829e-06, |
| "loss": 0.9983614087104797, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.087463556851312, |
| "grad_norm": 0.2085467278957367, |
| "learning_rate": 4.645999989644148e-06, |
| "loss": 1.1001629829406738, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.0903790087463556, |
| "grad_norm": 0.33730220794677734, |
| "learning_rate": 4.627675795765761e-06, |
| "loss": 1.3111716508865356, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.0932944606413995, |
| "grad_norm": 0.2143622636795044, |
| "learning_rate": 4.60938901752556e-06, |
| "loss": 0.8293286561965942, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.0962099125364433, |
| "grad_norm": 0.07966610789299011, |
| "learning_rate": 4.591139843810967e-06, |
| "loss": 1.1742640733718872, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.0991253644314867, |
| "grad_norm": 0.18288615345954895, |
| "learning_rate": 4.572928463120982e-06, |
| "loss": 1.1798888444900513, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.1020408163265305, |
| "grad_norm": 0.2549722194671631, |
| "learning_rate": 4.554755063564226e-06, |
| "loss": 1.0986790657043457, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.1049562682215743, |
| "grad_norm": 0.1803271621465683, |
| "learning_rate": 4.536619832857015e-06, |
| "loss": 1.0121634006500244, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.107871720116618, |
| "grad_norm": 0.33244436979293823, |
| "learning_rate": 4.518522958321409e-06, |
| "loss": 1.2030587196350098, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.110787172011662, |
| "grad_norm": 0.07119657844305038, |
| "learning_rate": 4.500464626883276e-06, |
| "loss": 0.6789675354957581, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.1137026239067054, |
| "grad_norm": 0.3919859230518341, |
| "learning_rate": 4.4824450250703755e-06, |
| "loss": 0.8600730895996094, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.116618075801749, |
| "grad_norm": 0.1530391424894333, |
| "learning_rate": 4.464464339010414e-06, |
| "loss": 0.9321385622024536, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.119533527696793, |
| "grad_norm": 0.12812215089797974, |
| "learning_rate": 4.446522754429127e-06, |
| "loss": 1.1020374298095703, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.122448979591837, |
| "grad_norm": 0.2687873840332031, |
| "learning_rate": 4.4286204566483715e-06, |
| "loss": 0.548167884349823, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.1253644314868803, |
| "grad_norm": 0.351572722196579, |
| "learning_rate": 4.410757630584204e-06, |
| "loss": 0.671511709690094, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.128279883381924, |
| "grad_norm": 0.3009466230869293, |
| "learning_rate": 4.392934460744958e-06, |
| "loss": 1.0809369087219238, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.131195335276968, |
| "grad_norm": 0.1647637039422989, |
| "learning_rate": 4.375151131229369e-06, |
| "loss": 1.0825597047805786, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.1341107871720117, |
| "grad_norm": 0.15290948748588562, |
| "learning_rate": 4.357407825724648e-06, |
| "loss": 1.132341742515564, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.1370262390670556, |
| "grad_norm": 0.30983132123947144, |
| "learning_rate": 4.339704727504581e-06, |
| "loss": 1.115373969078064, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.139941690962099, |
| "grad_norm": 0.1616809368133545, |
| "learning_rate": 4.32204201942766e-06, |
| "loss": 1.2571251392364502, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.44996944069862366, |
| "learning_rate": 4.304419883935167e-06, |
| "loss": 0.7702177166938782, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.1457725947521866, |
| "grad_norm": 0.08497241884469986, |
| "learning_rate": 4.286838503049309e-06, |
| "loss": 1.0834498405456543, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.1486880466472305, |
| "grad_norm": 0.4060671925544739, |
| "learning_rate": 4.26929805837134e-06, |
| "loss": 1.1200850009918213, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.151603498542274, |
| "grad_norm": 0.17709168791770935, |
| "learning_rate": 4.2517987310796595e-06, |
| "loss": 1.1172959804534912, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.1545189504373177, |
| "grad_norm": 0.1522580236196518, |
| "learning_rate": 4.23434070192797e-06, |
| "loss": 1.168565034866333, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.1574344023323615, |
| "grad_norm": 0.1714070737361908, |
| "learning_rate": 4.216924151243395e-06, |
| "loss": 1.1115281581878662, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.1603498542274053, |
| "grad_norm": 0.13482044637203217, |
| "learning_rate": 4.199549258924615e-06, |
| "loss": 1.2671080827713013, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.163265306122449, |
| "grad_norm": 0.1459122747182846, |
| "learning_rate": 4.18221620444002e-06, |
| "loss": 1.172806739807129, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.1661807580174925, |
| "grad_norm": 0.08871738612651825, |
| "learning_rate": 4.1649251668258475e-06, |
| "loss": 1.045624852180481, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.1690962099125364, |
| "grad_norm": 0.3394921123981476, |
| "learning_rate": 4.147676324684335e-06, |
| "loss": 1.1889164447784424, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.17201166180758, |
| "grad_norm": 0.1473836749792099, |
| "learning_rate": 4.130469856181873e-06, |
| "loss": 1.079075813293457, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.174927113702624, |
| "grad_norm": 0.18347686529159546, |
| "learning_rate": 4.113305939047174e-06, |
| "loss": 1.2786171436309814, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.1778425655976674, |
| "grad_norm": 0.16250960528850555, |
| "learning_rate": 4.096184750569422e-06, |
| "loss": 0.677879273891449, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.1807580174927113, |
| "grad_norm": 0.383709192276001, |
| "learning_rate": 4.07910646759645e-06, |
| "loss": 0.6416628360748291, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.183673469387755, |
| "grad_norm": 0.07085460424423218, |
| "learning_rate": 4.062071266532916e-06, |
| "loss": 1.0884201526641846, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.186588921282799, |
| "grad_norm": 0.10339315980672836, |
| "learning_rate": 4.045079323338477e-06, |
| "loss": 0.8533938527107239, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.1895043731778427, |
| "grad_norm": 0.20028476417064667, |
| "learning_rate": 4.0281308135259705e-06, |
| "loss": 0.9680588841438293, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.192419825072886, |
| "grad_norm": 0.3516143560409546, |
| "learning_rate": 4.0112259121596e-06, |
| "loss": 0.7940521240234375, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.19533527696793, |
| "grad_norm": 0.10385473072528839, |
| "learning_rate": 3.994364793853135e-06, |
| "loss": 1.1375114917755127, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.198250728862974, |
| "grad_norm": 0.10895653814077377, |
| "learning_rate": 3.977547632768095e-06, |
| "loss": 1.1559362411499023, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.2011661807580176, |
| "grad_norm": 0.11289890855550766, |
| "learning_rate": 3.960774602611966e-06, |
| "loss": 1.1142271757125854, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.204081632653061, |
| "grad_norm": 0.11957119405269623, |
| "learning_rate": 3.94404587663639e-06, |
| "loss": 0.997885525226593, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.206997084548105, |
| "grad_norm": 0.1454574018716812, |
| "learning_rate": 3.9273616276353904e-06, |
| "loss": 0.6211732625961304, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.2099125364431487, |
| "grad_norm": 0.2732894718647003, |
| "learning_rate": 3.910722027943569e-06, |
| "loss": 0.7947649955749512, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.2128279883381925, |
| "grad_norm": 0.31755542755126953, |
| "learning_rate": 3.894127249434352e-06, |
| "loss": 0.9824427366256714, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.2157434402332363, |
| "grad_norm": 0.31029990315437317, |
| "learning_rate": 3.877577463518183e-06, |
| "loss": 1.0954536199569702, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.2186588921282797, |
| "grad_norm": 0.13882219791412354, |
| "learning_rate": 3.861072841140779e-06, |
| "loss": 1.1737290620803833, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.2215743440233235, |
| "grad_norm": 0.199194073677063, |
| "learning_rate": 3.8446135527813596e-06, |
| "loss": 1.2562403678894043, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.2244897959183674, |
| "grad_norm": 0.09712310880422592, |
| "learning_rate": 3.828199768450866e-06, |
| "loss": 0.887328028678894, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.227405247813411, |
| "grad_norm": 0.3643515110015869, |
| "learning_rate": 3.8118316576902345e-06, |
| "loss": 0.13481314480304718, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.2303206997084546, |
| "grad_norm": 0.4534083604812622, |
| "learning_rate": 3.7955093895686242e-06, |
| "loss": 1.0862985849380493, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.2332361516034984, |
| "grad_norm": 0.15879718959331512, |
| "learning_rate": 3.779233132681675e-06, |
| "loss": 1.045498013496399, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.2361516034985423, |
| "grad_norm": 0.18001393973827362, |
| "learning_rate": 3.7630030551497728e-06, |
| "loss": 1.1538960933685303, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.239067055393586, |
| "grad_norm": 0.08799666166305542, |
| "learning_rate": 3.746819324616308e-06, |
| "loss": 1.0975581407546997, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.24198250728863, |
| "grad_norm": 0.24161297082901, |
| "learning_rate": 3.730682108245944e-06, |
| "loss": 0.6484414339065552, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.2448979591836733, |
| "grad_norm": 0.08378497511148453, |
| "learning_rate": 3.714591572722891e-06, |
| "loss": 0.9581442475318909, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.247813411078717, |
| "grad_norm": 0.10033685714006424, |
| "learning_rate": 3.698547884249187e-06, |
| "loss": 0.6113779544830322, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.250728862973761, |
| "grad_norm": 0.275552362203598, |
| "learning_rate": 3.6825512085429703e-06, |
| "loss": 1.1037795543670654, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.253644314868805, |
| "grad_norm": 0.5268692374229431, |
| "learning_rate": 3.6666017108367837e-06, |
| "loss": 0.8392840027809143, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.256559766763848, |
| "grad_norm": 0.24270810186862946, |
| "learning_rate": 3.6506995558758586e-06, |
| "loss": 1.0857195854187012, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.259475218658892, |
| "grad_norm": 0.11209052801132202, |
| "learning_rate": 3.6348449079164116e-06, |
| "loss": 1.0408934354782104, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.262390670553936, |
| "grad_norm": 0.3595077097415924, |
| "learning_rate": 3.619037930723958e-06, |
| "loss": 0.41006362438201904, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.2653061224489797, |
| "grad_norm": 0.20681369304656982, |
| "learning_rate": 3.603278787571601e-06, |
| "loss": 1.08263099193573, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.2682215743440235, |
| "grad_norm": 0.1791142076253891, |
| "learning_rate": 3.587567641238369e-06, |
| "loss": 1.1789532899856567, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.271137026239067, |
| "grad_norm": 0.15824060142040253, |
| "learning_rate": 3.5719046540075155e-06, |
| "loss": 1.138330101966858, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.2740524781341107, |
| "grad_norm": 0.08995150774717331, |
| "learning_rate": 3.5562899876648556e-06, |
| "loss": 1.0861237049102783, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.2769679300291545, |
| "grad_norm": 0.20422294735908508, |
| "learning_rate": 3.540723803497084e-06, |
| "loss": 1.068771481513977, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.2798833819241984, |
| "grad_norm": 0.29918450117111206, |
| "learning_rate": 3.5252062622901196e-06, |
| "loss": 1.0257431268692017, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.2827988338192418, |
| "grad_norm": 0.2508153021335602, |
| "learning_rate": 3.5097375243274322e-06, |
| "loss": 0.7228989601135254, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 0.20312649011611938, |
| "learning_rate": 3.494317749388401e-06, |
| "loss": 0.9408363103866577, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.2886297376093294, |
| "grad_norm": 0.18280087411403656, |
| "learning_rate": 3.4789470967466528e-06, |
| "loss": 1.1609010696411133, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.2915451895043732, |
| "grad_norm": 0.4031111001968384, |
| "learning_rate": 3.4636257251684247e-06, |
| "loss": 1.1523736715316772, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.294460641399417, |
| "grad_norm": 0.14943495392799377, |
| "learning_rate": 3.4483537929109212e-06, |
| "loss": 1.0938516855239868, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.2973760932944605, |
| "grad_norm": 0.32287096977233887, |
| "learning_rate": 3.433131457720673e-06, |
| "loss": 0.8949427604675293, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.3002915451895043, |
| "grad_norm": 0.13816498219966888, |
| "learning_rate": 3.4179588768319194e-06, |
| "loss": 1.004232406616211, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.303206997084548, |
| "grad_norm": 0.17348824441432953, |
| "learning_rate": 3.4028362069649807e-06, |
| "loss": 1.1232084035873413, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.306122448979592, |
| "grad_norm": 0.2952488362789154, |
| "learning_rate": 3.387763604324628e-06, |
| "loss": 1.2846827507019043, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.3090379008746353, |
| "grad_norm": 0.0930081456899643, |
| "learning_rate": 3.3727412245984863e-06, |
| "loss": 1.0255701541900635, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.311953352769679, |
| "grad_norm": 0.19518348574638367, |
| "learning_rate": 3.3577692229554225e-06, |
| "loss": 0.9602378606796265, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.314868804664723, |
| "grad_norm": 0.08679629117250443, |
| "learning_rate": 3.3428477540439295e-06, |
| "loss": 1.0191975831985474, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.317784256559767, |
| "grad_norm": 0.07790417969226837, |
| "learning_rate": 3.3279769719905438e-06, |
| "loss": 1.1509268283843994, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.3206997084548107, |
| "grad_norm": 0.2912391126155853, |
| "learning_rate": 3.3131570303982517e-06, |
| "loss": 0.6687411665916443, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.323615160349854, |
| "grad_norm": 0.4317520260810852, |
| "learning_rate": 3.2983880823448896e-06, |
| "loss": 0.8183987736701965, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.326530612244898, |
| "grad_norm": 0.11885584890842438, |
| "learning_rate": 3.283670280381581e-06, |
| "loss": 1.1012320518493652, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.3294460641399417, |
| "grad_norm": 0.35252460837364197, |
| "learning_rate": 3.269003776531148e-06, |
| "loss": 0.9789476990699768, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.3323615160349855, |
| "grad_norm": 0.15434707701206207, |
| "learning_rate": 3.2543887222865496e-06, |
| "loss": 1.1043654680252075, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.335276967930029, |
| "grad_norm": 0.16315020620822906, |
| "learning_rate": 3.239825268609309e-06, |
| "loss": 1.0038485527038574, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.3381924198250728, |
| "grad_norm": 0.39029252529144287, |
| "learning_rate": 3.2253135659279558e-06, |
| "loss": 1.1852213144302368, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.3411078717201166, |
| "grad_norm": 0.2913620173931122, |
| "learning_rate": 3.2108537641364786e-06, |
| "loss": 0.45255744457244873, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.3440233236151604, |
| "grad_norm": 0.06582468003034592, |
| "learning_rate": 3.19644601259277e-06, |
| "loss": 1.269538402557373, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.3469387755102042, |
| "grad_norm": 0.5571786761283875, |
| "learning_rate": 3.1820904601170884e-06, |
| "loss": 0.8519521355628967, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.3498542274052476, |
| "grad_norm": 0.31546610593795776, |
| "learning_rate": 3.1677872549905154e-06, |
| "loss": 1.3262689113616943, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.3527696793002915, |
| "grad_norm": 0.09515654295682907, |
| "learning_rate": 3.153536544953433e-06, |
| "loss": 0.9249638319015503, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.3556851311953353, |
| "grad_norm": 0.15578609704971313, |
| "learning_rate": 3.139338477203983e-06, |
| "loss": 1.1823093891143799, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.358600583090379, |
| "grad_norm": 0.2227763533592224, |
| "learning_rate": 3.125193198396564e-06, |
| "loss": 1.2877289056777954, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.3615160349854225, |
| "grad_norm": 0.4745902121067047, |
| "learning_rate": 3.111100854640303e-06, |
| "loss": 0.9719488024711609, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.3644314868804663, |
| "grad_norm": 0.24592548608779907, |
| "learning_rate": 3.097061591497555e-06, |
| "loss": 1.0211539268493652, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.36734693877551, |
| "grad_norm": 0.21700948476791382, |
| "learning_rate": 3.0830755539823942e-06, |
| "loss": 0.9550508260726929, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.370262390670554, |
| "grad_norm": 0.20466458797454834, |
| "learning_rate": 3.0691428865591153e-06, |
| "loss": 0.5767884254455566, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.373177842565598, |
| "grad_norm": 0.14715692400932312, |
| "learning_rate": 3.0552637331407466e-06, |
| "loss": 0.894551694393158, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.376093294460641, |
| "grad_norm": 0.1368647813796997, |
| "learning_rate": 3.0414382370875628e-06, |
| "loss": 1.2126644849777222, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.379008746355685, |
| "grad_norm": 0.2084326297044754, |
| "learning_rate": 3.027666541205592e-06, |
| "loss": 1.1460554599761963, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.381924198250729, |
| "grad_norm": 0.12772594392299652, |
| "learning_rate": 3.013948787745166e-06, |
| "loss": 0.8425911664962769, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.3848396501457727, |
| "grad_norm": 0.21220910549163818, |
| "learning_rate": 3.000285118399425e-06, |
| "loss": 1.0760411024093628, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.387755102040816, |
| "grad_norm": 0.16325032711029053, |
| "learning_rate": 2.9866756743028644e-06, |
| "loss": 1.1195225715637207, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.39067055393586, |
| "grad_norm": 0.1648532897233963, |
| "learning_rate": 2.973120596029882e-06, |
| "loss": 1.0467681884765625, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.3935860058309038, |
| "grad_norm": 0.5487902164459229, |
| "learning_rate": 2.9596200235933215e-06, |
| "loss": 1.1597939729690552, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.3965014577259476, |
| "grad_norm": 0.15476688742637634, |
| "learning_rate": 2.9461740964430176e-06, |
| "loss": 1.0105078220367432, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.3994169096209914, |
| "grad_norm": 1.1137182712554932, |
| "learning_rate": 2.932782953464373e-06, |
| "loss": 1.0070343017578125, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.402332361516035, |
| "grad_norm": 0.3256247043609619, |
| "learning_rate": 2.9194467329769166e-06, |
| "loss": 0.9948145151138306, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.4052478134110786, |
| "grad_norm": 0.14843417704105377, |
| "learning_rate": 2.9061655727328617e-06, |
| "loss": 1.0339670181274414, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.4081632653061225, |
| "grad_norm": 0.14106328785419464, |
| "learning_rate": 2.8929396099157056e-06, |
| "loss": 1.149165391921997, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.4110787172011663, |
| "grad_norm": 0.1781884729862213, |
| "learning_rate": 2.8797689811387944e-06, |
| "loss": 0.9708322286605835, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.4139941690962097, |
| "grad_norm": 0.16324618458747864, |
| "learning_rate": 2.8666538224439207e-06, |
| "loss": 0.9147579669952393, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.4169096209912535, |
| "grad_norm": 0.10199990123510361, |
| "learning_rate": 2.853594269299919e-06, |
| "loss": 1.1740384101867676, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.4198250728862973, |
| "grad_norm": 0.36128106713294983, |
| "learning_rate": 2.8405904566012634e-06, |
| "loss": 0.9795001149177551, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.422740524781341, |
| "grad_norm": 0.11705031245946884, |
| "learning_rate": 2.827642518666673e-06, |
| "loss": 1.0222880840301514, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.425655976676385, |
| "grad_norm": 0.19340762495994568, |
| "learning_rate": 2.814750589237729e-06, |
| "loss": 1.0553447008132935, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.4285714285714284, |
| "grad_norm": 0.09246297180652618, |
| "learning_rate": 2.8019148014774856e-06, |
| "loss": 1.0741846561431885, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.431486880466472, |
| "grad_norm": 0.23843225836753845, |
| "learning_rate": 2.789135287969106e-06, |
| "loss": 1.1993522644042969, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.434402332361516, |
| "grad_norm": 0.7431137561798096, |
| "learning_rate": 2.7764121807144815e-06, |
| "loss": 0.42419517040252686, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.43731778425656, |
| "grad_norm": 0.11922803521156311, |
| "learning_rate": 2.7637456111328773e-06, |
| "loss": 1.0701881647109985, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.4402332361516033, |
| "grad_norm": 0.238107368350029, |
| "learning_rate": 2.7511357100595675e-06, |
| "loss": 1.0204083919525146, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.443148688046647, |
| "grad_norm": 0.18065865337848663, |
| "learning_rate": 2.738582607744491e-06, |
| "loss": 1.1767973899841309, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.446064139941691, |
| "grad_norm": 0.6328040361404419, |
| "learning_rate": 2.7260864338508944e-06, |
| "loss": 1.2465075254440308, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.4489795918367347, |
| "grad_norm": 0.32334592938423157, |
| "learning_rate": 2.71364731745401e-06, |
| "loss": 0.9165597558021545, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.4518950437317786, |
| "grad_norm": 0.29830703139305115, |
| "learning_rate": 2.701265387039703e-06, |
| "loss": 1.0425974130630493, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.454810495626822, |
| "grad_norm": 0.09913703799247742, |
| "learning_rate": 2.688940770503163e-06, |
| "loss": 1.1421351432800293, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.457725947521866, |
| "grad_norm": 0.19002677500247955, |
| "learning_rate": 2.676673595147574e-06, |
| "loss": 1.14607572555542, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.4606413994169096, |
| "grad_norm": 0.17399148643016815, |
| "learning_rate": 2.6644639876827903e-06, |
| "loss": 1.0854803323745728, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.4635568513119535, |
| "grad_norm": 0.18045774102210999, |
| "learning_rate": 2.6523120742240457e-06, |
| "loss": 1.156597375869751, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.466472303206997, |
| "grad_norm": 0.36970221996307373, |
| "learning_rate": 2.6402179802906417e-06, |
| "loss": 1.1326744556427002, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.4693877551020407, |
| "grad_norm": 0.16106556355953217, |
| "learning_rate": 2.6281818308046466e-06, |
| "loss": 1.1174097061157227, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.4723032069970845, |
| "grad_norm": 0.23179616034030914, |
| "learning_rate": 2.6162037500896134e-06, |
| "loss": 1.247542381286621, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.4752186588921283, |
| "grad_norm": 0.20750805735588074, |
| "learning_rate": 2.6042838618692964e-06, |
| "loss": 1.120650291442871, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.478134110787172, |
| "grad_norm": 0.4005797207355499, |
| "learning_rate": 2.5924222892663607e-06, |
| "loss": 1.1234309673309326, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.481049562682216, |
| "grad_norm": 0.11094089597463608, |
| "learning_rate": 2.580619154801124e-06, |
| "loss": 1.0382579565048218, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.4839650145772594, |
| "grad_norm": 0.1598607450723648, |
| "learning_rate": 2.5688745803902863e-06, |
| "loss": 0.8054310083389282, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.486880466472303, |
| "grad_norm": 0.29358312487602234, |
| "learning_rate": 2.557188687345666e-06, |
| "loss": 1.2227270603179932, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.489795918367347, |
| "grad_norm": 0.10478518158197403, |
| "learning_rate": 2.545561596372957e-06, |
| "loss": 1.0256011486053467, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.4927113702623904, |
| "grad_norm": 0.19069114327430725, |
| "learning_rate": 2.533993427570471e-06, |
| "loss": 1.003487467765808, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.4956268221574343, |
| "grad_norm": 0.19944234192371368, |
| "learning_rate": 2.522484300427905e-06, |
| "loss": 1.1340402364730835, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.498542274052478, |
| "grad_norm": 0.206906259059906, |
| "learning_rate": 2.5110343338251055e-06, |
| "loss": 0.7293667793273926, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.501457725947522, |
| "grad_norm": 0.22807729244232178, |
| "learning_rate": 2.499643646030833e-06, |
| "loss": 0.6911664009094238, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.5043731778425657, |
| "grad_norm": 0.12783202528953552, |
| "learning_rate": 2.488312354701552e-06, |
| "loss": 1.0861356258392334, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.5072886297376096, |
| "grad_norm": 0.24884046614170074, |
| "learning_rate": 2.4770405768802087e-06, |
| "loss": 1.2009036540985107, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.510204081632653, |
| "grad_norm": 0.19883911311626434, |
| "learning_rate": 2.4658284289950235e-06, |
| "loss": 1.171090006828308, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.513119533527697, |
| "grad_norm": 0.2198370397090912, |
| "learning_rate": 2.454676026858288e-06, |
| "loss": 0.6773008704185486, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.5160349854227406, |
| "grad_norm": 0.3970673084259033, |
| "learning_rate": 2.443583485665172e-06, |
| "loss": 0.9177547693252563, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.518950437317784, |
| "grad_norm": 0.14196209609508514, |
| "learning_rate": 2.432550919992524e-06, |
| "loss": 1.0238224267959595, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.521865889212828, |
| "grad_norm": 0.08479610830545425, |
| "learning_rate": 2.4215784437977023e-06, |
| "loss": 1.0351308584213257, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.5247813411078717, |
| "grad_norm": 0.2791972756385803, |
| "learning_rate": 2.4106661704173856e-06, |
| "loss": 1.2357579469680786, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.5276967930029155, |
| "grad_norm": 0.300520658493042, |
| "learning_rate": 2.3998142125664094e-06, |
| "loss": 0.9955886602401733, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.5306122448979593, |
| "grad_norm": 0.07155195623636246, |
| "learning_rate": 2.3890226823365984e-06, |
| "loss": 0.9533568024635315, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.533527696793003, |
| "grad_norm": 0.37421008944511414, |
| "learning_rate": 2.3782916911956072e-06, |
| "loss": 0.7588440179824829, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.5364431486880465, |
| "grad_norm": 0.21846982836723328, |
| "learning_rate": 2.3676213499857742e-06, |
| "loss": 1.0482406616210938, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.5393586005830904, |
| "grad_norm": 0.22150775790214539, |
| "learning_rate": 2.357011768922975e-06, |
| "loss": 0.9425265789031982, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.542274052478134, |
| "grad_norm": 0.0946943610906601, |
| "learning_rate": 2.3464630575954748e-06, |
| "loss": 1.0236523151397705, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.5451895043731776, |
| "grad_norm": 0.2336379438638687, |
| "learning_rate": 2.3359753249628156e-06, |
| "loss": 0.9605098962783813, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.5481049562682214, |
| "grad_norm": 0.38517579436302185, |
| "learning_rate": 2.3255486793546735e-06, |
| "loss": 0.7055401802062988, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.5510204081632653, |
| "grad_norm": 0.22488614916801453, |
| "learning_rate": 2.3151832284697437e-06, |
| "loss": 1.3222585916519165, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.553935860058309, |
| "grad_norm": 0.14808881282806396, |
| "learning_rate": 2.304879079374634e-06, |
| "loss": 0.6318288445472717, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.556851311953353, |
| "grad_norm": 0.12122584134340286, |
| "learning_rate": 2.2946363385027555e-06, |
| "loss": 1.0979853868484497, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.5597667638483967, |
| "grad_norm": 0.17218822240829468, |
| "learning_rate": 2.2844551116532164e-06, |
| "loss": 1.1333314180374146, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.56268221574344, |
| "grad_norm": 0.2076103240251541, |
| "learning_rate": 2.274335503989743e-06, |
| "loss": 1.1102957725524902, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.565597667638484, |
| "grad_norm": 0.3147886395454407, |
| "learning_rate": 2.2642776200395825e-06, |
| "loss": 1.0110862255096436, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.568513119533528, |
| "grad_norm": 0.199388787150383, |
| "learning_rate": 2.2542815636924273e-06, |
| "loss": 1.1791144609451294, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.14399054646492004, |
| "learning_rate": 2.2443474381993418e-06, |
| "loss": 0.6136134266853333, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.574344023323615, |
| "grad_norm": 0.12786594033241272, |
| "learning_rate": 2.2344753461716924e-06, |
| "loss": 1.169732928276062, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.577259475218659, |
| "grad_norm": 0.42270779609680176, |
| "learning_rate": 2.2246653895800945e-06, |
| "loss": 1.167303442955017, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.5801749271137027, |
| "grad_norm": 0.3366575539112091, |
| "learning_rate": 2.2149176697533547e-06, |
| "loss": 0.7395915985107422, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.5830903790087465, |
| "grad_norm": 0.11204802244901657, |
| "learning_rate": 2.2052322873774243e-06, |
| "loss": 1.130765676498413, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.5860058309037903, |
| "grad_norm": 0.40100663900375366, |
| "learning_rate": 2.195609342494358e-06, |
| "loss": 0.9160555601119995, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.5889212827988337, |
| "grad_norm": 0.3878629505634308, |
| "learning_rate": 2.1860489345012882e-06, |
| "loss": 1.1737711429595947, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.5918367346938775, |
| "grad_norm": 0.2504361569881439, |
| "learning_rate": 2.1765511621493837e-06, |
| "loss": 1.1497868299484253, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.5947521865889214, |
| "grad_norm": 0.399038165807724, |
| "learning_rate": 2.1671161235428466e-06, |
| "loss": 1.0515235662460327, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.5976676384839648, |
| "grad_norm": 0.18093329668045044, |
| "learning_rate": 2.1577439161378857e-06, |
| "loss": 1.0114405155181885, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.6005830903790086, |
| "grad_norm": 0.20376266539096832, |
| "learning_rate": 2.1484346367417174e-06, |
| "loss": 1.1349772214889526, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.6034985422740524, |
| "grad_norm": 0.12697869539260864, |
| "learning_rate": 2.139188381511565e-06, |
| "loss": 1.0220611095428467, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.6064139941690962, |
| "grad_norm": 0.17522640526294708, |
| "learning_rate": 2.1300052459536577e-06, |
| "loss": 1.04948890209198, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.60932944606414, |
| "grad_norm": 0.33081164956092834, |
| "learning_rate": 2.120885324922257e-06, |
| "loss": 1.067612648010254, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.612244897959184, |
| "grad_norm": 0.19511879980564117, |
| "learning_rate": 2.1118287126186663e-06, |
| "loss": 1.1198432445526123, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.6151603498542273, |
| "grad_norm": 0.12612418830394745, |
| "learning_rate": 2.102835502590264e-06, |
| "loss": 0.9212133884429932, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.618075801749271, |
| "grad_norm": 1.4945578575134277, |
| "learning_rate": 2.0939057877295337e-06, |
| "loss": 0.9755832552909851, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.620991253644315, |
| "grad_norm": 0.11096255481243134, |
| "learning_rate": 2.085039660273107e-06, |
| "loss": 0.8870418071746826, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.6239067055393583, |
| "grad_norm": 0.16551688313484192, |
| "learning_rate": 2.076237211800807e-06, |
| "loss": 1.1013219356536865, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.626822157434402, |
| "grad_norm": 0.12267225235700607, |
| "learning_rate": 2.067498533234708e-06, |
| "loss": 1.1636854410171509, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.629737609329446, |
| "grad_norm": 0.21022585034370422, |
| "learning_rate": 2.0588237148381937e-06, |
| "loss": 1.0870646238327026, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.63265306122449, |
| "grad_norm": 0.12315444648265839, |
| "learning_rate": 2.05021284621502e-06, |
| "loss": 1.0031044483184814, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.6355685131195337, |
| "grad_norm": 0.08722248673439026, |
| "learning_rate": 2.0416660163084007e-06, |
| "loss": 1.1768810749053955, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.6384839650145775, |
| "grad_norm": 0.14608271420001984, |
| "learning_rate": 2.0331833134000806e-06, |
| "loss": 1.1812292337417603, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.641399416909621, |
| "grad_norm": 0.12209862470626831, |
| "learning_rate": 2.0247648251094187e-06, |
| "loss": 0.5496333241462708, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.6443148688046647, |
| "grad_norm": 0.14420591294765472, |
| "learning_rate": 2.0164106383924995e-06, |
| "loss": 1.0734022855758667, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.6472303206997085, |
| "grad_norm": 0.34557104110717773, |
| "learning_rate": 2.008120839541217e-06, |
| "loss": 0.8214896321296692, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.650145772594752, |
| "grad_norm": 0.19864369928836823, |
| "learning_rate": 1.9998955141823947e-06, |
| "loss": 1.1074302196502686, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.6530612244897958, |
| "grad_norm": 0.1151181161403656, |
| "learning_rate": 1.9917347472768996e-06, |
| "loss": 1.1880613565444946, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.6559766763848396, |
| "grad_norm": 0.3938349783420563, |
| "learning_rate": 1.983638623118759e-06, |
| "loss": 0.8221843242645264, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.6588921282798834, |
| "grad_norm": 0.4980735182762146, |
| "learning_rate": 1.9756072253342956e-06, |
| "loss": 1.0243555307388306, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.6618075801749272, |
| "grad_norm": 0.2903914451599121, |
| "learning_rate": 1.967640636881263e-06, |
| "loss": 1.1823608875274658, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.664723032069971, |
| "grad_norm": 0.1528269499540329, |
| "learning_rate": 1.9597389400479843e-06, |
| "loss": 1.1882878541946411, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.6676384839650145, |
| "grad_norm": 0.37738537788391113, |
| "learning_rate": 1.9519022164525086e-06, |
| "loss": 0.8332970142364502, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.6705539358600583, |
| "grad_norm": 0.10077593475580215, |
| "learning_rate": 1.9441305470417622e-06, |
| "loss": 1.1155685186386108, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.673469387755102, |
| "grad_norm": 0.24888084828853607, |
| "learning_rate": 1.936424012090716e-06, |
| "loss": 1.0899043083190918, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.6763848396501455, |
| "grad_norm": 0.3049887418746948, |
| "learning_rate": 1.9287826912015588e-06, |
| "loss": 1.3089343309402466, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.6793002915451893, |
| "grad_norm": 0.15812550485134125, |
| "learning_rate": 1.9212066633028635e-06, |
| "loss": 1.0993826389312744, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.682215743440233, |
| "grad_norm": 0.265886515378952, |
| "learning_rate": 1.9136960066487884e-06, |
| "loss": 1.0602340698242188, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.685131195335277, |
| "grad_norm": 0.8439386487007141, |
| "learning_rate": 1.9062507988182545e-06, |
| "loss": 1.0067952871322632, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.688046647230321, |
| "grad_norm": 0.45330727100372314, |
| "learning_rate": 1.8988711167141542e-06, |
| "loss": 0.5957139134407043, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.6909620991253647, |
| "grad_norm": 0.14824670553207397, |
| "learning_rate": 1.8915570365625508e-06, |
| "loss": 1.1712740659713745, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.693877551020408, |
| "grad_norm": 0.10511742532253265, |
| "learning_rate": 1.8843086339118943e-06, |
| "loss": 1.0602518320083618, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.696793002915452, |
| "grad_norm": 0.07894819229841232, |
| "learning_rate": 1.8771259836322376e-06, |
| "loss": 1.014635682106018, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.6997084548104957, |
| "grad_norm": 0.10334635525941849, |
| "learning_rate": 1.8700091599144688e-06, |
| "loss": 1.0106903314590454, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.702623906705539, |
| "grad_norm": 0.30136221647262573, |
| "learning_rate": 1.8629582362695395e-06, |
| "loss": 0.673401951789856, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.705539358600583, |
| "grad_norm": 0.5134400129318237, |
| "learning_rate": 1.8559732855277067e-06, |
| "loss": 1.1158447265625, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.7084548104956268, |
| "grad_norm": 0.35808032751083374, |
| "learning_rate": 1.8490543798377848e-06, |
| "loss": 1.2872017621994019, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.7113702623906706, |
| "grad_norm": 0.04801107197999954, |
| "learning_rate": 1.8422015906663964e-06, |
| "loss": 0.932016909122467, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.7142857142857144, |
| "grad_norm": 0.34277820587158203, |
| "learning_rate": 1.8354149887972297e-06, |
| "loss": 0.6936520338058472, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.7172011661807582, |
| "grad_norm": 0.16731053590774536, |
| "learning_rate": 1.8286946443303187e-06, |
| "loss": 1.1427615880966187, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.7201166180758016, |
| "grad_norm": 0.8489914536476135, |
| "learning_rate": 1.822040626681308e-06, |
| "loss": 1.0948349237442017, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.7230320699708455, |
| "grad_norm": 0.41851627826690674, |
| "learning_rate": 1.8154530045807438e-06, |
| "loss": 1.157147765159607, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.7259475218658893, |
| "grad_norm": 0.09261982142925262, |
| "learning_rate": 1.808931846073361e-06, |
| "loss": 1.0182065963745117, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.7288629737609327, |
| "grad_norm": 0.07328807562589645, |
| "learning_rate": 1.8024772185173758e-06, |
| "loss": 0.9535019397735596, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.7317784256559765, |
| "grad_norm": 0.3953118324279785, |
| "learning_rate": 1.7960891885837988e-06, |
| "loss": 0.5561579465866089, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.7346938775510203, |
| "grad_norm": 0.7391979694366455, |
| "learning_rate": 1.7897678222557402e-06, |
| "loss": 0.9951037764549255, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.737609329446064, |
| "grad_norm": 0.16622287034988403, |
| "learning_rate": 1.7835131848277288e-06, |
| "loss": 1.129691243171692, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.740524781341108, |
| "grad_norm": 0.08795658499002457, |
| "learning_rate": 1.7773253409050398e-06, |
| "loss": 0.9720866680145264, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.743440233236152, |
| "grad_norm": 0.10475818812847137, |
| "learning_rate": 1.7712043544030265e-06, |
| "loss": 0.9624143242835999, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.746355685131195, |
| "grad_norm": 0.5169785618782043, |
| "learning_rate": 1.7651502885464582e-06, |
| "loss": 0.7830743789672852, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.749271137026239, |
| "grad_norm": 0.06864479184150696, |
| "learning_rate": 1.7591632058688719e-06, |
| "loss": 1.1376532316207886, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.752186588921283, |
| "grad_norm": 4.637813091278076, |
| "learning_rate": 1.7532431682119205e-06, |
| "loss": 0.8696690797805786, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.7551020408163263, |
| "grad_norm": 0.15929657220840454, |
| "learning_rate": 1.7473902367247361e-06, |
| "loss": 1.1236258745193481, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.75801749271137, |
| "grad_norm": 0.3590356707572937, |
| "learning_rate": 1.7416044718633025e-06, |
| "loss": 0.8365395665168762, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.760932944606414, |
| "grad_norm": 0.1510230451822281, |
| "learning_rate": 1.735885933389825e-06, |
| "loss": 0.6292239427566528, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.7638483965014577, |
| "grad_norm": 0.18348506093025208, |
| "learning_rate": 1.730234680372116e-06, |
| "loss": 1.1290793418884277, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.7667638483965016, |
| "grad_norm": 0.16462060809135437, |
| "learning_rate": 1.7246507711829852e-06, |
| "loss": 1.1606987714767456, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.7696793002915454, |
| "grad_norm": 0.16783565282821655, |
| "learning_rate": 1.719134263499633e-06, |
| "loss": 0.9577206373214722, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.772594752186589, |
| "grad_norm": 0.08972535282373428, |
| "learning_rate": 1.7136852143030605e-06, |
| "loss": 0.9086419343948364, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.7755102040816326, |
| "grad_norm": 0.25966984033584595, |
| "learning_rate": 1.7083036798774771e-06, |
| "loss": 1.16250479221344, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.7784256559766765, |
| "grad_norm": 0.14714005589485168, |
| "learning_rate": 1.7029897158097191e-06, |
| "loss": 0.6218932867050171, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.78134110787172, |
| "grad_norm": 0.1505810022354126, |
| "learning_rate": 1.6977433769886777e-06, |
| "loss": 0.9435967206954956, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.7842565597667637, |
| "grad_norm": 0.5554741621017456, |
| "learning_rate": 1.6925647176047304e-06, |
| "loss": 1.2954356670379639, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.7871720116618075, |
| "grad_norm": 0.7726877331733704, |
| "learning_rate": 1.6874537911491804e-06, |
| "loss": 1.100317120552063, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.7900874635568513, |
| "grad_norm": 0.1900632381439209, |
| "learning_rate": 1.682410650413707e-06, |
| "loss": 1.1734505891799927, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.793002915451895, |
| "grad_norm": 0.2996356189250946, |
| "learning_rate": 1.6774353474898176e-06, |
| "loss": 0.6496275067329407, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.795918367346939, |
| "grad_norm": 0.28916487097740173, |
| "learning_rate": 1.6725279337683096e-06, |
| "loss": 0.8404643535614014, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.7988338192419824, |
| "grad_norm": 0.30399462580680847, |
| "learning_rate": 1.6676884599387447e-06, |
| "loss": 0.8097843527793884, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.801749271137026, |
| "grad_norm": 0.15744291245937347, |
| "learning_rate": 1.6629169759889167e-06, |
| "loss": 1.1007176637649536, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.80466472303207, |
| "grad_norm": 0.22451713681221008, |
| "learning_rate": 1.6582135312043415e-06, |
| "loss": 1.1043728590011597, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.8075801749271134, |
| "grad_norm": 0.16485294699668884, |
| "learning_rate": 1.6535781741677468e-06, |
| "loss": 1.1978418827056885, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.8104956268221573, |
| "grad_norm": 0.11872020363807678, |
| "learning_rate": 1.6490109527585685e-06, |
| "loss": 1.0319398641586304, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.813411078717201, |
| "grad_norm": 0.22041387856006622, |
| "learning_rate": 1.6445119141524586e-06, |
| "loss": 1.0383124351501465, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.816326530612245, |
| "grad_norm": 0.1371716856956482, |
| "learning_rate": 1.6400811048207957e-06, |
| "loss": 1.0704172849655151, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.8192419825072887, |
| "grad_norm": 0.33869630098342896, |
| "learning_rate": 1.6357185705302059e-06, |
| "loss": 0.9032880663871765, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.8221574344023326, |
| "grad_norm": 0.19506464898586273, |
| "learning_rate": 1.6314243563420908e-06, |
| "loss": 1.1649752855300903, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.825072886297376, |
| "grad_norm": 0.16767188906669617, |
| "learning_rate": 1.627198506612162e-06, |
| "loss": 1.197486162185669, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.82798833819242, |
| "grad_norm": 0.17042168974876404, |
| "learning_rate": 1.62304106498998e-06, |
| "loss": 1.065731167793274, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.8309037900874636, |
| "grad_norm": 0.25560781359672546, |
| "learning_rate": 1.6189520744185072e-06, |
| "loss": 0.9224144220352173, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.8338192419825075, |
| "grad_norm": 0.20863035321235657, |
| "learning_rate": 1.614931577133663e-06, |
| "loss": 1.0565248727798462, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.836734693877551, |
| "grad_norm": 0.19189637899398804, |
| "learning_rate": 1.6109796146638871e-06, |
| "loss": 1.232025384902954, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.8396501457725947, |
| "grad_norm": 0.6458204984664917, |
| "learning_rate": 1.6070962278297113e-06, |
| "loss": 1.0065245628356934, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.8425655976676385, |
| "grad_norm": 0.3259865939617157, |
| "learning_rate": 1.6032814567433348e-06, |
| "loss": 1.2361031770706177, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.8454810495626823, |
| "grad_norm": 0.4714111089706421, |
| "learning_rate": 1.5995353408082157e-06, |
| "loss": 1.3339447975158691, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.848396501457726, |
| "grad_norm": 0.16928227245807648, |
| "learning_rate": 1.5958579187186582e-06, |
| "loss": 1.0442076921463013, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.8513119533527695, |
| "grad_norm": 0.3731814920902252, |
| "learning_rate": 1.5922492284594174e-06, |
| "loss": 0.878253698348999, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.8542274052478134, |
| "grad_norm": 0.6527604460716248, |
| "learning_rate": 1.5887093073053036e-06, |
| "loss": 1.0772031545639038, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.18542839586734772, |
| "learning_rate": 1.5852381918207995e-06, |
| "loss": 1.116060733795166, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.860058309037901, |
| "grad_norm": 0.250535786151886, |
| "learning_rate": 1.5818359178596806e-06, |
| "loss": 1.1924026012420654, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.8629737609329444, |
| "grad_norm": 0.07601413875818253, |
| "learning_rate": 1.5785025205646468e-06, |
| "loss": 0.9614888429641724, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.8658892128279883, |
| "grad_norm": 0.17522846162319183, |
| "learning_rate": 1.5752380343669574e-06, |
| "loss": 1.0021862983703613, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.868804664723032, |
| "grad_norm": 0.22332464158535004, |
| "learning_rate": 1.5720424929860793e-06, |
| "loss": 1.0522475242614746, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.871720116618076, |
| "grad_norm": 0.39566364884376526, |
| "learning_rate": 1.5689159294293333e-06, |
| "loss": 1.0991871356964111, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.8746355685131197, |
| "grad_norm": 0.3006777763366699, |
| "learning_rate": 1.5658583759915563e-06, |
| "loss": 1.068638801574707, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.877551020408163, |
| "grad_norm": 0.18835684657096863, |
| "learning_rate": 1.5628698642547674e-06, |
| "loss": 1.0682188272476196, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.880466472303207, |
| "grad_norm": 0.13527542352676392, |
| "learning_rate": 1.5599504250878434e-06, |
| "loss": 1.0796337127685547, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.883381924198251, |
| "grad_norm": 0.2289610654115677, |
| "learning_rate": 1.5571000886461946e-06, |
| "loss": 1.1682178974151611, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.8862973760932946, |
| "grad_norm": 0.3208562731742859, |
| "learning_rate": 1.5543188843714597e-06, |
| "loss": 0.6415768265724182, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.889212827988338, |
| "grad_norm": 0.2707623541355133, |
| "learning_rate": 1.551606840991198e-06, |
| "loss": 0.5584684014320374, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.892128279883382, |
| "grad_norm": 0.24681639671325684, |
| "learning_rate": 1.5489639865185929e-06, |
| "loss": 0.9024500846862793, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.8950437317784257, |
| "grad_norm": 0.2885083556175232, |
| "learning_rate": 1.5463903482521637e-06, |
| "loss": 1.0408830642700195, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.8979591836734695, |
| "grad_norm": 0.2863474190235138, |
| "learning_rate": 1.543885952775484e-06, |
| "loss": 0.5923194289207458, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.9008746355685133, |
| "grad_norm": 0.13149987161159515, |
| "learning_rate": 1.5414508259569033e-06, |
| "loss": 1.0203630924224854, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.9037900874635567, |
| "grad_norm": 0.08542142808437347, |
| "learning_rate": 1.5390849929492853e-06, |
| "loss": 0.4749288260936737, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.9067055393586005, |
| "grad_norm": 0.39572906494140625, |
| "learning_rate": 1.5367884781897442e-06, |
| "loss": 0.9975032210350037, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.9096209912536444, |
| "grad_norm": 0.3944467604160309, |
| "learning_rate": 1.5345613053993947e-06, |
| "loss": 1.2269786596298218, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.912536443148688, |
| "grad_norm": 0.14900818467140198, |
| "learning_rate": 1.5324034975831053e-06, |
| "loss": 1.2356706857681274, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.9154518950437316, |
| "grad_norm": 0.31048882007598877, |
| "learning_rate": 1.53031507702926e-06, |
| "loss": 1.218428611755371, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.9183673469387754, |
| "grad_norm": 0.1689174771308899, |
| "learning_rate": 1.5282960653095309e-06, |
| "loss": 0.9620698094367981, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.9212827988338192, |
| "grad_norm": 0.2305694818496704, |
| "learning_rate": 1.5263464832786536e-06, |
| "loss": 1.2038404941558838, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.924198250728863, |
| "grad_norm": 0.12036718428134918, |
| "learning_rate": 1.5244663510742102e-06, |
| "loss": 0.9968715310096741, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.927113702623907, |
| "grad_norm": 0.12467171996831894, |
| "learning_rate": 1.5226556881164256e-06, |
| "loss": 1.0186277627944946, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.9300291545189503, |
| "grad_norm": 0.13296104967594147, |
| "learning_rate": 1.5209145131079634e-06, |
| "loss": 1.026340365409851, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.932944606413994, |
| "grad_norm": 0.12233509868383408, |
| "learning_rate": 1.5192428440337316e-06, |
| "loss": 1.182348608970642, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.935860058309038, |
| "grad_norm": 0.1486111879348755, |
| "learning_rate": 1.5176406981607024e-06, |
| "loss": 1.0666353702545166, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.938775510204082, |
| "grad_norm": 0.5397063493728638, |
| "learning_rate": 1.5161080920377289e-06, |
| "loss": 1.389245629310608, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.941690962099125, |
| "grad_norm": 0.15026716887950897, |
| "learning_rate": 1.5146450414953738e-06, |
| "loss": 1.0400997400283813, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.944606413994169, |
| "grad_norm": 0.11009442806243896, |
| "learning_rate": 1.5132515616457505e-06, |
| "loss": 1.001649260520935, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.947521865889213, |
| "grad_norm": 0.7643895745277405, |
| "learning_rate": 1.5119276668823628e-06, |
| "loss": 0.37964844703674316, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.9504373177842567, |
| "grad_norm": 0.2546994984149933, |
| "learning_rate": 1.510673370879957e-06, |
| "loss": 1.0618635416030884, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.9533527696793005, |
| "grad_norm": 0.15609286725521088, |
| "learning_rate": 1.5094886865943835e-06, |
| "loss": 1.013123869895935, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.956268221574344, |
| "grad_norm": 0.09666828066110611, |
| "learning_rate": 1.5083736262624577e-06, |
| "loss": 0.7794107794761658, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.9591836734693877, |
| "grad_norm": 0.07339915633201599, |
| "learning_rate": 1.5073282014018395e-06, |
| "loss": 1.3735166788101196, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.9620991253644315, |
| "grad_norm": 0.6088920831680298, |
| "learning_rate": 1.5063524228109107e-06, |
| "loss": 0.8808611035346985, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.9650145772594754, |
| "grad_norm": 0.1744547188282013, |
| "learning_rate": 1.5054463005686626e-06, |
| "loss": 1.1831696033477783, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.9679300291545188, |
| "grad_norm": 0.24790845811367035, |
| "learning_rate": 1.5046098440345955e-06, |
| "loss": 1.00650155544281, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.9708454810495626, |
| "grad_norm": 0.18026836216449738, |
| "learning_rate": 1.5038430618486194e-06, |
| "loss": 1.1893560886383057, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.9737609329446064, |
| "grad_norm": 0.1259116381406784, |
| "learning_rate": 1.5031459619309653e-06, |
| "loss": 1.0219632387161255, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.9766763848396502, |
| "grad_norm": 0.15073135495185852, |
| "learning_rate": 1.502518551482103e-06, |
| "loss": 0.7194128036499023, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.979591836734694, |
| "grad_norm": 0.05049153417348862, |
| "learning_rate": 1.5019608369826692e-06, |
| "loss": 1.1609373092651367, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.9825072886297375, |
| "grad_norm": 0.11255478858947754, |
| "learning_rate": 1.501472824193396e-06, |
| "loss": 1.1452926397323608, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.9854227405247813, |
| "grad_norm": 0.16929762065410614, |
| "learning_rate": 1.5010545181550563e-06, |
| "loss": 0.5922563076019287, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.988338192419825, |
| "grad_norm": 0.1267116516828537, |
| "learning_rate": 1.5007059231884077e-06, |
| "loss": 0.49650248885154724, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.991253644314869, |
| "grad_norm": 0.1838807910680771, |
| "learning_rate": 1.5004270428941505e-06, |
| "loss": 1.1091796159744263, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.9941690962099123, |
| "grad_norm": 0.08408603817224503, |
| "learning_rate": 1.500217880152889e-06, |
| "loss": 1.0519981384277344, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.997084548104956, |
| "grad_norm": 0.36840710043907166, |
| "learning_rate": 1.5000784371251037e-06, |
| "loss": 0.9989621639251709, |
| "step": 2056 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.15688389539718628, |
| "learning_rate": 1.5000087152511266e-06, |
| "loss": 1.1339861154556274, |
| "step": 2058 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 2058, |
| "total_flos": 3.1865440491043553e+18, |
| "train_loss": 1.1440774658359985, |
| "train_runtime": 18974.7516, |
| "train_samples_per_second": 1.735, |
| "train_steps_per_second": 0.108 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 2058, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.1865440491043553e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|