| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 774, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007751937984496124, |
| "grad_norm": 2.372762680053711, |
| "learning_rate": 1.935483870967742e-07, |
| "loss": 4.068140983581543, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.015503875968992248, |
| "grad_norm": 0.1989186704158783, |
| "learning_rate": 5.806451612903226e-07, |
| "loss": 2.024549722671509, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.023255813953488372, |
| "grad_norm": 0.287252277135849, |
| "learning_rate": 9.67741935483871e-07, |
| "loss": 1.9417322874069214, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.031007751937984496, |
| "grad_norm": 0.3818705379962921, |
| "learning_rate": 1.3548387096774193e-06, |
| "loss": 1.9495620727539062, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03875968992248062, |
| "grad_norm": 0.15045583248138428, |
| "learning_rate": 1.7419354838709678e-06, |
| "loss": 2.319340705871582, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.046511627906976744, |
| "grad_norm": 2.149204969406128, |
| "learning_rate": 2.129032258064516e-06, |
| "loss": 3.75003981590271, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05426356589147287, |
| "grad_norm": 0.2591269314289093, |
| "learning_rate": 2.516129032258065e-06, |
| "loss": 1.8552712202072144, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06201550387596899, |
| "grad_norm": 0.7406859397888184, |
| "learning_rate": 2.9032258064516128e-06, |
| "loss": 2.239339590072632, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06976744186046512, |
| "grad_norm": 0.9925192594528198, |
| "learning_rate": 3.2903225806451615e-06, |
| "loss": 2.0903704166412354, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.07751937984496124, |
| "grad_norm": 0.29998043179512024, |
| "learning_rate": 3.6774193548387094e-06, |
| "loss": 2.208868980407715, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08527131782945736, |
| "grad_norm": 2.925621747970581, |
| "learning_rate": 4.064516129032259e-06, |
| "loss": 3.090871810913086, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.09302325581395349, |
| "grad_norm": 0.2913108766078949, |
| "learning_rate": 4.451612903225806e-06, |
| "loss": 1.8751976490020752, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.10077519379844961, |
| "grad_norm": 0.645453929901123, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 1.761456847190857, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.10852713178294573, |
| "grad_norm": 0.7114430665969849, |
| "learning_rate": 5.2258064516129035e-06, |
| "loss": 1.8018805980682373, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.11627906976744186, |
| "grad_norm": 0.7619907259941101, |
| "learning_rate": 5.612903225806452e-06, |
| "loss": 1.5239174365997314, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12403100775193798, |
| "grad_norm": 0.16530756652355194, |
| "learning_rate": 6e-06, |
| "loss": 1.5915422439575195, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.13178294573643412, |
| "grad_norm": 0.16290850937366486, |
| "learning_rate": 6.3870967741935485e-06, |
| "loss": 2.3932559490203857, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.13953488372093023, |
| "grad_norm": 0.23374152183532715, |
| "learning_rate": 6.774193548387097e-06, |
| "loss": 1.4996192455291748, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.14728682170542637, |
| "grad_norm": 0.26606833934783936, |
| "learning_rate": 7.161290322580645e-06, |
| "loss": 1.2402312755584717, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.15503875968992248, |
| "grad_norm": 0.12572336196899414, |
| "learning_rate": 7.548387096774193e-06, |
| "loss": 1.5890406370162964, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16279069767441862, |
| "grad_norm": 0.7024409770965576, |
| "learning_rate": 7.935483870967743e-06, |
| "loss": 1.1217167377471924, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.17054263565891473, |
| "grad_norm": 0.1792139858007431, |
| "learning_rate": 8.32258064516129e-06, |
| "loss": 1.3888567686080933, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.17829457364341086, |
| "grad_norm": 0.16807198524475098, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 1.746845006942749, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.18604651162790697, |
| "grad_norm": 0.1879073679447174, |
| "learning_rate": 9.096774193548387e-06, |
| "loss": 0.9335119128227234, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1937984496124031, |
| "grad_norm": 0.2711307406425476, |
| "learning_rate": 9.483870967741934e-06, |
| "loss": 0.6987147331237793, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.20155038759689922, |
| "grad_norm": 0.1467754989862442, |
| "learning_rate": 9.870967741935485e-06, |
| "loss": 1.1381912231445312, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.20930232558139536, |
| "grad_norm": 0.5655927062034607, |
| "learning_rate": 1.0258064516129032e-05, |
| "loss": 0.581324577331543, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.21705426356589147, |
| "grad_norm": 0.503146767616272, |
| "learning_rate": 1.0645161290322582e-05, |
| "loss": 1.3841195106506348, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2248062015503876, |
| "grad_norm": 0.7577893733978271, |
| "learning_rate": 1.1032258064516129e-05, |
| "loss": 1.4041730165481567, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.23255813953488372, |
| "grad_norm": 0.7149530649185181, |
| "learning_rate": 1.1419354838709677e-05, |
| "loss": 1.1680717468261719, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.24031007751937986, |
| "grad_norm": 0.14106588065624237, |
| "learning_rate": 1.1806451612903226e-05, |
| "loss": 1.2515933513641357, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.24806201550387597, |
| "grad_norm": 0.19908978044986725, |
| "learning_rate": 1.2193548387096773e-05, |
| "loss": 1.310453176498413, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2558139534883721, |
| "grad_norm": 0.2537965476512909, |
| "learning_rate": 1.2580645161290324e-05, |
| "loss": 1.2055346965789795, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.26356589147286824, |
| "grad_norm": 0.14298808574676514, |
| "learning_rate": 1.2967741935483872e-05, |
| "loss": 1.178272008895874, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2713178294573643, |
| "grad_norm": 0.12258580327033997, |
| "learning_rate": 1.335483870967742e-05, |
| "loss": 1.33051335811615, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.27906976744186046, |
| "grad_norm": 0.1763555109500885, |
| "learning_rate": 1.3741935483870968e-05, |
| "loss": 1.0556892156600952, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2868217054263566, |
| "grad_norm": 0.28760379552841187, |
| "learning_rate": 1.4129032258064516e-05, |
| "loss": 1.1506415605545044, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.29457364341085274, |
| "grad_norm": 0.3192862868309021, |
| "learning_rate": 1.4516129032258065e-05, |
| "loss": 1.1051461696624756, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3023255813953488, |
| "grad_norm": 0.17246203124523163, |
| "learning_rate": 1.4903225806451612e-05, |
| "loss": 1.3540679216384888, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.31007751937984496, |
| "grad_norm": 0.07415715605020523, |
| "learning_rate": 1.529032258064516e-05, |
| "loss": 1.1473115682601929, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3178294573643411, |
| "grad_norm": 0.16899384558200836, |
| "learning_rate": 1.567741935483871e-05, |
| "loss": 1.3307069540023804, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.32558139534883723, |
| "grad_norm": 0.16342990100383759, |
| "learning_rate": 1.6064516129032258e-05, |
| "loss": 0.9781491756439209, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.12549830973148346, |
| "learning_rate": 1.6451612903225807e-05, |
| "loss": 1.1001088619232178, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.34108527131782945, |
| "grad_norm": 0.13267701864242554, |
| "learning_rate": 1.6838709677419356e-05, |
| "loss": 1.399463415145874, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3488372093023256, |
| "grad_norm": 0.4117628037929535, |
| "learning_rate": 1.7225806451612906e-05, |
| "loss": 0.8339037299156189, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.35658914728682173, |
| "grad_norm": 0.3214583992958069, |
| "learning_rate": 1.761290322580645e-05, |
| "loss": 1.1209487915039062, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3643410852713178, |
| "grad_norm": 0.1995217651128769, |
| "learning_rate": 1.8e-05, |
| "loss": 1.0714277029037476, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.37209302325581395, |
| "grad_norm": 0.0989437848329544, |
| "learning_rate": 1.838709677419355e-05, |
| "loss": 1.192923903465271, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3798449612403101, |
| "grad_norm": 0.1220928430557251, |
| "learning_rate": 1.87741935483871e-05, |
| "loss": 0.7750011086463928, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3875968992248062, |
| "grad_norm": 0.13039161264896393, |
| "learning_rate": 1.9161290322580645e-05, |
| "loss": 0.6638532280921936, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3953488372093023, |
| "grad_norm": 0.10157541930675507, |
| "learning_rate": 1.9548387096774194e-05, |
| "loss": 1.3122025728225708, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.40310077519379844, |
| "grad_norm": 0.08540091663599014, |
| "learning_rate": 1.9935483870967743e-05, |
| "loss": 1.203364372253418, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.4108527131782946, |
| "grad_norm": 0.1469828337430954, |
| "learning_rate": 2.032258064516129e-05, |
| "loss": 1.0173336267471313, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.4186046511627907, |
| "grad_norm": 0.09850579500198364, |
| "learning_rate": 2.0709677419354838e-05, |
| "loss": 1.0237982273101807, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.4263565891472868, |
| "grad_norm": 0.08415781706571579, |
| "learning_rate": 2.1096774193548387e-05, |
| "loss": 1.0086685419082642, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.43410852713178294, |
| "grad_norm": 0.08691313862800598, |
| "learning_rate": 2.1483870967741936e-05, |
| "loss": 0.9078419208526611, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.4418604651162791, |
| "grad_norm": 0.1410425454378128, |
| "learning_rate": 2.1870967741935485e-05, |
| "loss": 1.0696989297866821, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.4496124031007752, |
| "grad_norm": 0.11058277636766434, |
| "learning_rate": 2.2258064516129034e-05, |
| "loss": 1.0235304832458496, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.4573643410852713, |
| "grad_norm": 0.17280496656894684, |
| "learning_rate": 2.2645161290322584e-05, |
| "loss": 0.8430470824241638, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 0.0914328321814537, |
| "learning_rate": 2.303225806451613e-05, |
| "loss": 1.2717251777648926, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4728682170542636, |
| "grad_norm": 0.10640335828065872, |
| "learning_rate": 2.341935483870968e-05, |
| "loss": 1.0239697694778442, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.4806201550387597, |
| "grad_norm": 0.10921759903430939, |
| "learning_rate": 2.3806451612903228e-05, |
| "loss": 1.2588107585906982, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4883720930232558, |
| "grad_norm": 0.21347090601921082, |
| "learning_rate": 2.4193548387096773e-05, |
| "loss": 1.149160385131836, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.49612403100775193, |
| "grad_norm": 0.16161420941352844, |
| "learning_rate": 2.4580645161290323e-05, |
| "loss": 1.1978895664215088, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5038759689922481, |
| "grad_norm": 0.11207153648138046, |
| "learning_rate": 2.4967741935483872e-05, |
| "loss": 1.1477546691894531, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5116279069767442, |
| "grad_norm": 0.13203421235084534, |
| "learning_rate": 2.535483870967742e-05, |
| "loss": 0.684913694858551, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5193798449612403, |
| "grad_norm": 0.16211755573749542, |
| "learning_rate": 2.5741935483870967e-05, |
| "loss": 1.135626196861267, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5271317829457365, |
| "grad_norm": 0.19277559220790863, |
| "learning_rate": 2.6129032258064516e-05, |
| "loss": 1.1265920400619507, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5348837209302325, |
| "grad_norm": 0.12664702534675598, |
| "learning_rate": 2.6516129032258065e-05, |
| "loss": 1.3438581228256226, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.5426356589147286, |
| "grad_norm": 0.07170073688030243, |
| "learning_rate": 2.690322580645161e-05, |
| "loss": 1.2673969268798828, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5503875968992248, |
| "grad_norm": 0.1509476602077484, |
| "learning_rate": 2.7290322580645163e-05, |
| "loss": 0.6191468834877014, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.5581395348837209, |
| "grad_norm": 0.08468706905841827, |
| "learning_rate": 2.7677419354838713e-05, |
| "loss": 1.3681960105895996, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.5658914728682171, |
| "grad_norm": 0.08381734788417816, |
| "learning_rate": 2.806451612903226e-05, |
| "loss": 1.0066568851470947, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.5736434108527132, |
| "grad_norm": 0.0786685049533844, |
| "learning_rate": 2.8451612903225808e-05, |
| "loss": 0.993072509765625, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.5813953488372093, |
| "grad_norm": 0.16647084057331085, |
| "learning_rate": 2.8838709677419357e-05, |
| "loss": 0.9888545274734497, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5891472868217055, |
| "grad_norm": 0.0718846470117569, |
| "learning_rate": 2.9225806451612906e-05, |
| "loss": 1.2481988668441772, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5968992248062015, |
| "grad_norm": 0.054430440068244934, |
| "learning_rate": 2.961290322580645e-05, |
| "loss": 1.2282058000564575, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6046511627906976, |
| "grad_norm": 0.08397103101015091, |
| "learning_rate": 3e-05, |
| "loss": 0.7997589111328125, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6124031007751938, |
| "grad_norm": 0.16056904196739197, |
| "learning_rate": 2.9994088909931925e-05, |
| "loss": 0.9445045590400696, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.6201550387596899, |
| "grad_norm": 0.0699552372097969, |
| "learning_rate": 2.9976361120666653e-05, |
| "loss": 1.3003367185592651, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.627906976744186, |
| "grad_norm": 0.326346755027771, |
| "learning_rate": 2.9946833069938937e-05, |
| "loss": 1.0931003093719482, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.6356589147286822, |
| "grad_norm": 0.05613398924469948, |
| "learning_rate": 2.990553213703777e-05, |
| "loss": 1.252951741218567, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.6434108527131783, |
| "grad_norm": 0.06693823635578156, |
| "learning_rate": 2.985249661741952e-05, |
| "loss": 0.9843292236328125, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.6511627906976745, |
| "grad_norm": 0.05880317464470863, |
| "learning_rate": 2.9787775687199204e-05, |
| "loss": 1.2224986553192139, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.6589147286821705, |
| "grad_norm": 0.22196456789970398, |
| "learning_rate": 2.9711429357552954e-05, |
| "loss": 0.6978975534439087, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.11266355961561203, |
| "learning_rate": 2.9623528419073845e-05, |
| "loss": 1.1819331645965576, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.6744186046511628, |
| "grad_norm": 0.23993322253227234, |
| "learning_rate": 2.952415437613278e-05, |
| "loss": 0.9013431668281555, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.6821705426356589, |
| "grad_norm": 0.16824588179588318, |
| "learning_rate": 2.9413399371305162e-05, |
| "loss": 0.9518213272094727, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.689922480620155, |
| "grad_norm": 0.10848796367645264, |
| "learning_rate": 2.9291366099933583e-05, |
| "loss": 0.9333250522613525, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.6976744186046512, |
| "grad_norm": 0.07721789926290512, |
| "learning_rate": 2.9158167714905638e-05, |
| "loss": 1.0462111234664917, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7054263565891473, |
| "grad_norm": 0.07368086278438568, |
| "learning_rate": 2.9013927721735172e-05, |
| "loss": 1.1160787343978882, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.7131782945736435, |
| "grad_norm": 0.12432865053415298, |
| "learning_rate": 2.885877986404432e-05, |
| "loss": 0.9108925461769104, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.7209302325581395, |
| "grad_norm": 0.0682280957698822, |
| "learning_rate": 2.86928679995524e-05, |
| "loss": 1.3199962377548218, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.7286821705426356, |
| "grad_norm": 0.0608358271420002, |
| "learning_rate": 2.8516345966686766e-05, |
| "loss": 1.0051369667053223, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.7364341085271318, |
| "grad_norm": 0.16042953729629517, |
| "learning_rate": 2.832937744193922e-05, |
| "loss": 1.0462324619293213, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7441860465116279, |
| "grad_norm": 0.09749440848827362, |
| "learning_rate": 2.8132135788100312e-05, |
| "loss": 1.1752909421920776, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.751937984496124, |
| "grad_norm": 0.057570479810237885, |
| "learning_rate": 2.7924803893512144e-05, |
| "loss": 1.2336088418960571, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.7596899224806202, |
| "grad_norm": 0.05938956141471863, |
| "learning_rate": 2.770757400248891e-05, |
| "loss": 1.2357627153396606, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.7674418604651163, |
| "grad_norm": 0.1574079990386963, |
| "learning_rate": 2.7480647537062183e-05, |
| "loss": 0.6303899884223938, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.7751937984496124, |
| "grad_norm": 0.7175618410110474, |
| "learning_rate": 2.7244234910216427e-05, |
| "loss": 1.449771761894226, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7829457364341085, |
| "grad_norm": 0.12645360827445984, |
| "learning_rate": 2.699855533078781e-05, |
| "loss": 1.136543869972229, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.7906976744186046, |
| "grad_norm": 0.1046365350484848, |
| "learning_rate": 2.6743836600207206e-05, |
| "loss": 1.2583595514297485, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.7984496124031008, |
| "grad_norm": 0.06069144234061241, |
| "learning_rate": 2.6480314901275954e-05, |
| "loss": 1.073771595954895, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.8062015503875969, |
| "grad_norm": 0.08813668042421341, |
| "learning_rate": 2.620823457917009e-05, |
| "loss": 0.8364643454551697, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.813953488372093, |
| "grad_norm": 0.1366778016090393, |
| "learning_rate": 2.592784791487625e-05, |
| "loss": 1.355994701385498, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8217054263565892, |
| "grad_norm": 0.06360755860805511, |
| "learning_rate": 2.5639414891269164e-05, |
| "loss": 1.226666808128357, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.8294573643410853, |
| "grad_norm": 0.05345217138528824, |
| "learning_rate": 2.534320295204785e-05, |
| "loss": 0.9844304919242859, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.8372093023255814, |
| "grad_norm": 0.13714972138404846, |
| "learning_rate": 2.5039486753753788e-05, |
| "loss": 1.0947370529174805, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.8449612403100775, |
| "grad_norm": 0.08509152382612228, |
| "learning_rate": 2.4728547911101212e-05, |
| "loss": 1.341536283493042, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.8527131782945736, |
| "grad_norm": 0.06335103511810303, |
| "learning_rate": 2.4410674735855555e-05, |
| "loss": 1.2149854898452759, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8604651162790697, |
| "grad_norm": 0.07615257799625397, |
| "learning_rate": 2.4086161969502158e-05, |
| "loss": 0.5183571577072144, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.8682170542635659, |
| "grad_norm": 0.05618668347597122, |
| "learning_rate": 2.3755310509953208e-05, |
| "loss": 1.2107856273651123, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.875968992248062, |
| "grad_norm": 0.08672861009836197, |
| "learning_rate": 2.34184271325462e-05, |
| "loss": 1.2980958223342896, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.8837209302325582, |
| "grad_norm": 0.2590503692626953, |
| "learning_rate": 2.3075824205592707e-05, |
| "loss": 0.9205237030982971, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.8914728682170543, |
| "grad_norm": 0.08758358657360077, |
| "learning_rate": 2.2727819400741172e-05, |
| "loss": 1.058215618133545, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8992248062015504, |
| "grad_norm": 0.07180935889482498, |
| "learning_rate": 2.2374735398422276e-05, |
| "loss": 0.8909934759140015, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.9069767441860465, |
| "grad_norm": 0.07425983250141144, |
| "learning_rate": 2.2016899588650032e-05, |
| "loss": 1.3261955976486206, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.9147286821705426, |
| "grad_norm": 0.10620886832475662, |
| "learning_rate": 2.165464376745598e-05, |
| "loss": 0.7832080125808716, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.9224806201550387, |
| "grad_norm": 0.11008700728416443, |
| "learning_rate": 2.1288303829238058e-05, |
| "loss": 1.243245244026184, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 0.15069428086280823, |
| "learning_rate": 2.091821945530925e-05, |
| "loss": 1.0843262672424316, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.937984496124031, |
| "grad_norm": 0.20396780967712402, |
| "learning_rate": 2.0544733798934988e-05, |
| "loss": 1.2043696641921997, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.9457364341085271, |
| "grad_norm": 0.1518591046333313, |
| "learning_rate": 2.0168193167151183e-05, |
| "loss": 1.0192402601242065, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.9534883720930233, |
| "grad_norm": 0.08409406244754791, |
| "learning_rate": 1.9788946699658032e-05, |
| "loss": 0.7911899089813232, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.9612403100775194, |
| "grad_norm": 0.1363200843334198, |
| "learning_rate": 1.9407346045087278e-05, |
| "loss": 0.7742171883583069, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.9689922480620154, |
| "grad_norm": 0.07617780566215515, |
| "learning_rate": 1.902374503494311e-05, |
| "loss": 1.2457553148269653, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9767441860465116, |
| "grad_norm": 0.0721699595451355, |
| "learning_rate": 1.863849935551905e-05, |
| "loss": 1.016923427581787, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.9844961240310077, |
| "grad_norm": 0.14891768991947174, |
| "learning_rate": 1.825196621809499e-05, |
| "loss": 0.9890077710151672, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.9922480620155039, |
| "grad_norm": 0.28671520948410034, |
| "learning_rate": 1.7864504027720297e-05, |
| "loss": 0.8091475367546082, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.04788220301270485, |
| "learning_rate": 1.747647205088991e-05, |
| "loss": 1.1878516674041748, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.0077519379844961, |
| "grad_norm": 0.06899169087409973, |
| "learning_rate": 1.7088230082421763e-05, |
| "loss": 0.7679018378257751, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0155038759689923, |
| "grad_norm": 0.0531589537858963, |
| "learning_rate": 1.67001381118443e-05, |
| "loss": 1.128183126449585, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.0232558139534884, |
| "grad_norm": 0.36434194445610046, |
| "learning_rate": 1.6312555989603446e-05, |
| "loss": 0.7761940956115723, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.0310077519379846, |
| "grad_norm": 0.09103479981422424, |
| "learning_rate": 1.5925843093398552e-05, |
| "loss": 0.6045004725456238, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.0387596899224807, |
| "grad_norm": 0.13370856642723083, |
| "learning_rate": 1.554035799495667e-05, |
| "loss": 0.8422982096672058, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.0465116279069768, |
| "grad_norm": 0.0862463042140007, |
| "learning_rate": 1.515645812755415e-05, |
| "loss": 1.0801072120666504, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.054263565891473, |
| "grad_norm": 0.08025830239057541, |
| "learning_rate": 1.4774499454593878e-05, |
| "loss": 0.5319198966026306, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.062015503875969, |
| "grad_norm": 0.12001097202301025, |
| "learning_rate": 1.4394836139545363e-05, |
| "loss": 1.100441575050354, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.069767441860465, |
| "grad_norm": 0.0670214369893074, |
| "learning_rate": 1.4017820217553832e-05, |
| "loss": 0.9006233215332031, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.0775193798449612, |
| "grad_norm": 0.05677079036831856, |
| "learning_rate": 1.3643801269022732e-05, |
| "loss": 0.9063330292701721, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.0852713178294573, |
| "grad_norm": 0.06150190904736519, |
| "learning_rate": 1.3273126095472361e-05, |
| "loss": 0.6792492270469666, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0930232558139534, |
| "grad_norm": 0.11940456926822662, |
| "learning_rate": 1.2906138397975178e-05, |
| "loss": 0.5880073308944702, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.1007751937984496, |
| "grad_norm": 0.059427861124277115, |
| "learning_rate": 1.2543178458465887e-05, |
| "loss": 0.6459496021270752, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.1085271317829457, |
| "grad_norm": 0.14358076453208923, |
| "learning_rate": 1.2184582824221902e-05, |
| "loss": 0.6029883027076721, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.1162790697674418, |
| "grad_norm": 0.12145813554525375, |
| "learning_rate": 1.1830683995806694e-05, |
| "loss": 0.5689815282821655, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.124031007751938, |
| "grad_norm": 0.06693974882364273, |
| "learning_rate": 1.14818101187653e-05, |
| "loss": 0.9926015138626099, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.1317829457364341, |
| "grad_norm": 0.05298677831888199, |
| "learning_rate": 1.113828467935807e-05, |
| "loss": 0.599225640296936, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.1395348837209303, |
| "grad_norm": 0.1428927630186081, |
| "learning_rate": 1.080042620461448e-05, |
| "loss": 1.0777664184570312, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.1472868217054264, |
| "grad_norm": 0.33007150888442993, |
| "learning_rate": 1.0468547966985433e-05, |
| "loss": 0.592754065990448, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.1550387596899225, |
| "grad_norm": 0.09632085263729095, |
| "learning_rate": 1.0142957693867676e-05, |
| "loss": 0.8235950469970703, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.1627906976744187, |
| "grad_norm": 0.05365417152643204, |
| "learning_rate": 9.823957282269788e-06, |
| "loss": 0.6445293426513672, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1705426356589148, |
| "grad_norm": 0.05418482422828674, |
| "learning_rate": 9.51184251888427e-06, |
| "loss": 0.8274694085121155, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.178294573643411, |
| "grad_norm": 0.08142586797475815, |
| "learning_rate": 9.206902805825313e-06, |
| "loss": 0.6243199110031128, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.1860465116279069, |
| "grad_norm": 0.09854786843061447, |
| "learning_rate": 8.909420892286522e-06, |
| "loss": 0.9028869867324829, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.193798449612403, |
| "grad_norm": 0.12125203758478165, |
| "learning_rate": 8.619672612367426e-06, |
| "loss": 1.018742322921753, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.2015503875968991, |
| "grad_norm": 0.05682411789894104, |
| "learning_rate": 8.337926629311901e-06, |
| "loss": 0.8793404698371887, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.2093023255813953, |
| "grad_norm": 0.08018923550844193, |
| "learning_rate": 8.064444186395577e-06, |
| "loss": 0.5548562407493591, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.2170542635658914, |
| "grad_norm": 0.4036023020744324, |
| "learning_rate": 7.799478864693305e-06, |
| "loss": 0.40799465775489807, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.2248062015503876, |
| "grad_norm": 0.09002506732940674, |
| "learning_rate": 7.543276347951186e-06, |
| "loss": 0.854184627532959, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.2325581395348837, |
| "grad_norm": 0.05118729546666145, |
| "learning_rate": 7.2960741947813325e-06, |
| "loss": 0.7614866495132446, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.2403100775193798, |
| "grad_norm": 0.048252079635858536, |
| "learning_rate": 7.058101618390395e-06, |
| "loss": 0.9597308039665222, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.248062015503876, |
| "grad_norm": 0.08246824890375137, |
| "learning_rate": 6.829579274046253e-06, |
| "loss": 1.0625540018081665, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.255813953488372, |
| "grad_norm": 0.10916761308908463, |
| "learning_rate": 6.61071905447989e-06, |
| "loss": 0.5724548101425171, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.2635658914728682, |
| "grad_norm": 0.0720956102013588, |
| "learning_rate": 6.40172389341212e-06, |
| "loss": 0.9105185866355896, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.2713178294573644, |
| "grad_norm": 0.1327294260263443, |
| "learning_rate": 6.202787577387384e-06, |
| "loss": 0.6492634415626526, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.2790697674418605, |
| "grad_norm": 0.055963922291994095, |
| "learning_rate": 6.014094566089114e-06, |
| "loss": 0.6872971653938293, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.2868217054263567, |
| "grad_norm": 0.055926598608493805, |
| "learning_rate": 5.835819821303189e-06, |
| "loss": 0.8940346240997314, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.2945736434108528, |
| "grad_norm": 0.11941412091255188, |
| "learning_rate": 5.6681286446881695e-06, |
| "loss": 0.517791748046875, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.302325581395349, |
| "grad_norm": 0.05569684877991676, |
| "learning_rate": 5.511176524502653e-06, |
| "loss": 0.9667401313781738, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.310077519379845, |
| "grad_norm": 0.04035505652427673, |
| "learning_rate": 5.36510899143194e-06, |
| "loss": 0.6123257875442505, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.3178294573643412, |
| "grad_norm": 0.053604647517204285, |
| "learning_rate": 5.2300614836476425e-06, |
| "loss": 0.8501400947570801, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.3255813953488373, |
| "grad_norm": 0.08587184548377991, |
| "learning_rate": 5.106159221225361e-06, |
| "loss": 0.8949810266494751, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.08081138134002686, |
| "learning_rate": 4.993517090036914e-06, |
| "loss": 0.8400573134422302, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.3410852713178294, |
| "grad_norm": 0.5886430740356445, |
| "learning_rate": 4.89223953522472e-06, |
| "loss": 0.24276283383369446, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.3488372093023255, |
| "grad_norm": 0.09961400926113129, |
| "learning_rate": 4.802420464357147e-06, |
| "loss": 0.7755904793739319, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.3565891472868217, |
| "grad_norm": 0.07265698909759521, |
| "learning_rate": 4.724143160354624e-06, |
| "loss": 0.9579305648803711, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.3643410852713178, |
| "grad_norm": 0.057041656225919724, |
| "learning_rate": 4.6574802042672275e-06, |
| "loss": 0.6767364740371704, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.372093023255814, |
| "grad_norm": 0.17740975320339203, |
| "learning_rate": 4.602493407975375e-06, |
| "loss": 0.43329671025276184, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.37984496124031, |
| "grad_norm": 0.10877375304698944, |
| "learning_rate": 4.559233756875995e-06, |
| "loss": 0.9381350874900818, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.3875968992248062, |
| "grad_norm": 0.06163076311349869, |
| "learning_rate": 4.5277413626073554e-06, |
| "loss": 0.6927696466445923, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.3953488372093024, |
| "grad_norm": 0.05219251289963722, |
| "learning_rate": 4.508045425856358e-06, |
| "loss": 0.7913583517074585, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.4031007751937985, |
| "grad_norm": 0.09336534887552261, |
| "learning_rate": 4.500164209282782e-06, |
| "loss": 0.7886365056037903, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.4108527131782946, |
| "grad_norm": 0.05429435521364212, |
| "learning_rate": 4.504105020585611e-06, |
| "loss": 0.7922376394271851, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.4186046511627908, |
| "grad_norm": 0.07881563901901245, |
| "learning_rate": 4.519864205727111e-06, |
| "loss": 0.4149991273880005, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.4263565891472867, |
| "grad_norm": 0.050976116210222244, |
| "learning_rate": 4.547427152320965e-06, |
| "loss": 0.40345892310142517, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.4341085271317828, |
| "grad_norm": 0.13834504783153534, |
| "learning_rate": 4.586768303181312e-06, |
| "loss": 0.8296991586685181, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.441860465116279, |
| "grad_norm": 0.14809437096118927, |
| "learning_rate": 4.637851180020136e-06, |
| "loss": 0.7817572355270386, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.449612403100775, |
| "grad_norm": 0.04827815294265747, |
| "learning_rate": 4.7006284172710145e-06, |
| "loss": 0.7629826664924622, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.4573643410852712, |
| "grad_norm": 0.09472133964300156, |
| "learning_rate": 4.775041806007891e-06, |
| "loss": 0.6470780372619629, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.4651162790697674, |
| "grad_norm": 0.23365041613578796, |
| "learning_rate": 4.861022347918125e-06, |
| "loss": 0.608534038066864, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.4728682170542635, |
| "grad_norm": 0.04631495103240013, |
| "learning_rate": 4.958490319279778e-06, |
| "loss": 0.7042877078056335, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.4806201550387597, |
| "grad_norm": 0.10502998530864716, |
| "learning_rate": 5.067355344883837e-06, |
| "loss": 0.7883775234222412, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.4883720930232558, |
| "grad_norm": 0.1601502150297165, |
| "learning_rate": 5.187516481832796e-06, |
| "loss": 0.6707104444503784, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.496124031007752, |
| "grad_norm": 0.07128335535526276, |
| "learning_rate": 5.318862313137916e-06, |
| "loss": 0.5789697170257568, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.503875968992248, |
| "grad_norm": 0.051307253539562225, |
| "learning_rate": 5.461271051028392e-06, |
| "loss": 0.7344382405281067, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.5116279069767442, |
| "grad_norm": 0.06438810378313065, |
| "learning_rate": 5.614610649876592e-06, |
| "loss": 0.5874720811843872, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.5193798449612403, |
| "grad_norm": 0.12045904248952866, |
| "learning_rate": 5.778738928634702e-06, |
| "loss": 0.6812347769737244, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.5271317829457365, |
| "grad_norm": 0.12421064078807831, |
| "learning_rate": 5.953503702669238e-06, |
| "loss": 0.7551456689834595, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.5348837209302326, |
| "grad_norm": 0.09568459540605545, |
| "learning_rate": 6.138742924871177e-06, |
| "loss": 0.6949661374092102, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.5426356589147288, |
| "grad_norm": 0.1447361260652542, |
| "learning_rate": 6.334284835910859e-06, |
| "loss": 0.7189986705780029, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.550387596899225, |
| "grad_norm": 0.16750486195087433, |
| "learning_rate": 6.53994812349836e-06, |
| "loss": 0.9427580237388611, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.558139534883721, |
| "grad_norm": 0.171998992562294, |
| "learning_rate": 6.755542090501669e-06, |
| "loss": 0.5628240704536438, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.5658914728682172, |
| "grad_norm": 0.04393970966339111, |
| "learning_rate": 6.98086683176673e-06, |
| "loss": 0.8915067315101624, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.5736434108527133, |
| "grad_norm": 0.06884656846523285, |
| "learning_rate": 7.215713419475466e-06, |
| "loss": 0.8232840299606323, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.5813953488372094, |
| "grad_norm": 0.06587037444114685, |
| "learning_rate": 7.459864096869907e-06, |
| "loss": 0.2791080176830292, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.5891472868217056, |
| "grad_norm": 0.07907220721244812, |
| "learning_rate": 7.713092480162712e-06, |
| "loss": 0.7711836695671082, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.5968992248062015, |
| "grad_norm": 0.07207150012254715, |
| "learning_rate": 7.975163768446994e-06, |
| "loss": 0.7907366156578064, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.6046511627906976, |
| "grad_norm": 0.09880537539720535, |
| "learning_rate": 8.245834961410696e-06, |
| "loss": 0.49653783440589905, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.6124031007751938, |
| "grad_norm": 0.05552729591727257, |
| "learning_rate": 8.524855084653766e-06, |
| "loss": 0.44712162017822266, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.62015503875969, |
| "grad_norm": 0.05235432833433151, |
| "learning_rate": 8.811965422399043e-06, |
| "loss": 0.7449421882629395, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.627906976744186, |
| "grad_norm": 0.06001957878470421, |
| "learning_rate": 9.106899757381288e-06, |
| "loss": 0.7992556095123291, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.6356589147286822, |
| "grad_norm": 0.09635739773511887, |
| "learning_rate": 9.409384617691782e-06, |
| "loss": 0.5354418754577637, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.6434108527131783, |
| "grad_norm": 0.06418698281049728, |
| "learning_rate": 9.719139530349551e-06, |
| "loss": 0.8580225110054016, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.6511627906976745, |
| "grad_norm": 0.06972765177488327, |
| "learning_rate": 1.0035877281364364e-05, |
| "loss": 0.6384758949279785, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.6589147286821704, |
| "grad_norm": 0.08585374802350998, |
| "learning_rate": 1.0359304182050086e-05, |
| "loss": 0.88965904712677, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.09853579103946686, |
| "learning_rate": 1.068912034134155e-05, |
| "loss": 0.6674047708511353, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.6744186046511627, |
| "grad_norm": 0.09648997336626053, |
| "learning_rate": 1.1025019943862524e-05, |
| "loss": 0.8478899002075195, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.6821705426356588, |
| "grad_norm": 0.07512516528367996, |
| "learning_rate": 1.1366691533486839e-05, |
| "loss": 0.6321271061897278, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.689922480620155, |
| "grad_norm": 0.052200138568878174, |
| "learning_rate": 1.171381830212979e-05, |
| "loss": 0.6788074970245361, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.697674418604651, |
| "grad_norm": 0.05563069507479668, |
| "learning_rate": 1.2066078383502049e-05, |
| "loss": 0.6934828758239746, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.7054263565891472, |
| "grad_norm": 0.07190771400928497, |
| "learning_rate": 1.242314515155367e-05, |
| "loss": 0.617615282535553, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.7131782945736433, |
| "grad_norm": 0.0535132996737957, |
| "learning_rate": 1.2784687523331521e-05, |
| "loss": 0.7270308136940002, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.7209302325581395, |
| "grad_norm": 0.07645192742347717, |
| "learning_rate": 1.3150370265969243e-05, |
| "loss": 0.5209411978721619, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.7286821705426356, |
| "grad_norm": 0.05032151937484741, |
| "learning_rate": 1.3519854307525166e-05, |
| "loss": 0.8537179231643677, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.7364341085271318, |
| "grad_norm": 0.0833229050040245, |
| "learning_rate": 1.3892797051379974e-05, |
| "loss": 0.8147948980331421, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.744186046511628, |
| "grad_norm": 0.05039038509130478, |
| "learning_rate": 1.4268852693902394e-05, |
| "loss": 0.8937851190567017, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.751937984496124, |
| "grad_norm": 0.046379536390304565, |
| "learning_rate": 1.4647672545088743e-05, |
| "loss": 0.9495609402656555, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.7596899224806202, |
| "grad_norm": 0.15748457610607147, |
| "learning_rate": 1.5028905351878626e-05, |
| "loss": 0.6565471887588501, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.7674418604651163, |
| "grad_norm": 0.06636267155408859, |
| "learning_rate": 1.5412197623847304e-05, |
| "loss": 0.6486067175865173, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.7751937984496124, |
| "grad_norm": 0.2050524801015854, |
| "learning_rate": 1.5797193960972498e-05, |
| "loss": 0.44677096605300903, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.7829457364341086, |
| "grad_norm": 0.06217222660779953, |
| "learning_rate": 1.6183537383171904e-05, |
| "loss": 0.5162793397903442, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.7906976744186047, |
| "grad_norm": 0.040993690490722656, |
| "learning_rate": 1.6570869661305794e-05, |
| "loss": 0.6910752654075623, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.7984496124031009, |
| "grad_norm": 0.08061198890209198, |
| "learning_rate": 1.6958831649337716e-05, |
| "loss": 0.8616082668304443, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.806201550387597, |
| "grad_norm": 0.04601367563009262, |
| "learning_rate": 1.7347063617345443e-05, |
| "loss": 0.7291563153266907, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.8139534883720931, |
| "grad_norm": 0.047357648611068726, |
| "learning_rate": 1.773520558507325e-05, |
| "loss": 0.6221119165420532, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.8217054263565893, |
| "grad_norm": 0.051967669278383255, |
| "learning_rate": 1.812289765571636e-05, |
| "loss": 0.3711574971675873, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.8294573643410854, |
| "grad_norm": 0.06112854182720184, |
| "learning_rate": 1.8509780349628006e-05, |
| "loss": 1.0664476156234741, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.8372093023255816, |
| "grad_norm": 0.05628122389316559, |
| "learning_rate": 1.889549493763964e-05, |
| "loss": 0.517056405544281, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.8449612403100775, |
| "grad_norm": 0.08481237292289734, |
| "learning_rate": 1.9279683773685313e-05, |
| "loss": 0.6204915046691895, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.8527131782945736, |
| "grad_norm": 0.06115817651152611, |
| "learning_rate": 1.9661990626421812e-05, |
| "loss": 0.5971916913986206, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.8604651162790697, |
| "grad_norm": 0.05057134851813316, |
| "learning_rate": 2.0042061009536956e-05, |
| "loss": 0.8469266295433044, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.8682170542635659, |
| "grad_norm": 0.056389980018138885, |
| "learning_rate": 2.041954251043988e-05, |
| "loss": 0.5026958584785461, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.875968992248062, |
| "grad_norm": 0.24311350286006927, |
| "learning_rate": 2.079408511702847e-05, |
| "loss": 0.7969828248023987, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.8837209302325582, |
| "grad_norm": 0.06835257261991501, |
| "learning_rate": 2.1165341542231086e-05, |
| "loss": 0.8827776908874512, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.8914728682170543, |
| "grad_norm": 0.08240044862031937, |
| "learning_rate": 2.15329675460214e-05, |
| "loss": 0.5578112006187439, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.8992248062015504, |
| "grad_norm": 0.18629829585552216, |
| "learning_rate": 2.189662225460808e-05, |
| "loss": 0.6527209281921387, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.9069767441860463, |
| "grad_norm": 0.20755064487457275, |
| "learning_rate": 2.2255968476503105e-05, |
| "loss": 0.5279259085655212, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.9147286821705425, |
| "grad_norm": 0.10236147791147232, |
| "learning_rate": 2.2610673015175764e-05, |
| "loss": 0.5332280397415161, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.9224806201550386, |
| "grad_norm": 0.06894674897193909, |
| "learning_rate": 2.296040697800243e-05, |
| "loss": 0.5644171237945557, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.9302325581395348, |
| "grad_norm": 0.06815814226865768, |
| "learning_rate": 2.3304846081225598e-05, |
| "loss": 0.4328697621822357, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.937984496124031, |
| "grad_norm": 0.05157966539263725, |
| "learning_rate": 2.3643670950639452e-05, |
| "loss": 1.0288827419281006, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.945736434108527, |
| "grad_norm": 0.09934229403734207, |
| "learning_rate": 2.3976567417723124e-05, |
| "loss": 0.7295251488685608, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.9534883720930232, |
| "grad_norm": 0.048644986003637314, |
| "learning_rate": 2.4303226810947168e-05, |
| "loss": 0.6065406203269958, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.9612403100775193, |
| "grad_norm": 0.04483645409345627, |
| "learning_rate": 2.462334624198297e-05, |
| "loss": 0.9802693724632263, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.9689922480620154, |
| "grad_norm": 0.04317508637905121, |
| "learning_rate": 2.4936628886549827e-05, |
| "loss": 0.7508119344711304, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.9767441860465116, |
| "grad_norm": 0.04037319868803024, |
| "learning_rate": 2.524278425963931e-05, |
| "loss": 0.9507177472114563, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.9844961240310077, |
| "grad_norm": 0.09510686993598938, |
| "learning_rate": 2.5541528484861597e-05, |
| "loss": 0.7446141242980957, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.9922480620155039, |
| "grad_norm": 0.05790317803621292, |
| "learning_rate": 2.583258455766412e-05, |
| "loss": 1.0172169208526611, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.06235863268375397, |
| "learning_rate": 2.6115682602178452e-05, |
| "loss": 0.8593966364860535, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.007751937984496, |
| "grad_norm": 0.10687481611967087, |
| "learning_rate": 2.6390560121457266e-05, |
| "loss": 0.49814847111701965, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.0155038759689923, |
| "grad_norm": 0.06080131232738495, |
| "learning_rate": 2.665696224086932e-05, |
| "loss": 0.6426661610603333, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.0232558139534884, |
| "grad_norm": 0.05089246854186058, |
| "learning_rate": 2.6914641944426832e-05, |
| "loss": 0.3929331600666046, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.0310077519379846, |
| "grad_norm": 0.052005868405103683, |
| "learning_rate": 2.7163360303826028e-05, |
| "loss": 0.7454217672348022, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.0387596899224807, |
| "grad_norm": 0.08798030763864517, |
| "learning_rate": 2.7402886699988654e-05, |
| "loss": 0.555336594581604, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.046511627906977, |
| "grad_norm": 0.08520969748497009, |
| "learning_rate": 2.7632999036898793e-05, |
| "loss": 0.7960832118988037, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.054263565891473, |
| "grad_norm": 0.11796343326568604, |
| "learning_rate": 2.7853483947536958e-05, |
| "loss": 0.32746729254722595, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.062015503875969, |
| "grad_norm": 0.08845008909702301, |
| "learning_rate": 2.806413699172034e-05, |
| "loss": 0.6935387253761292, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.0697674418604652, |
| "grad_norm": 0.0798339694738388, |
| "learning_rate": 2.8264762845665833e-05, |
| "loss": 0.43687596917152405, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.0775193798449614, |
| "grad_norm": 0.13349516689777374, |
| "learning_rate": 2.8455175483100068e-05, |
| "loss": 0.5209518671035767, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.0852713178294575, |
| "grad_norm": 0.09369954466819763, |
| "learning_rate": 2.8635198347748558e-05, |
| "loss": 0.6955545544624329, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.0930232558139537, |
| "grad_norm": 0.05283479019999504, |
| "learning_rate": 2.8804664517043884e-05, |
| "loss": 0.5326330661773682, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.10077519379845, |
| "grad_norm": 0.08794603496789932, |
| "learning_rate": 2.896341685690131e-05, |
| "loss": 0.2991466224193573, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.108527131782946, |
| "grad_norm": 0.061321720480918884, |
| "learning_rate": 2.911130816741817e-05, |
| "loss": 0.6528932452201843, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.116279069767442, |
| "grad_norm": 0.054165348410606384, |
| "learning_rate": 2.9248201319362058e-05, |
| "loss": 0.4507254660129547, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.124031007751938, |
| "grad_norm": 0.049841977655887604, |
| "learning_rate": 2.937396938132106e-05, |
| "loss": 0.5575969219207764, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.1317829457364343, |
| "grad_norm": 0.0796908587217331, |
| "learning_rate": 2.94884957373984e-05, |
| "loss": 0.34075814485549927, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.13953488372093, |
| "grad_norm": 0.0508221872150898, |
| "learning_rate": 2.959167419534217e-05, |
| "loss": 0.6530574560165405, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.147286821705426, |
| "grad_norm": 0.04539173096418381, |
| "learning_rate": 2.968340908500995e-05, |
| "loss": 0.6907740831375122, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.1550387596899223, |
| "grad_norm": 0.058205924928188324, |
| "learning_rate": 2.9763615347076983e-05, |
| "loss": 0.44485923647880554, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.1627906976744184, |
| "grad_norm": 0.06929825246334076, |
| "learning_rate": 2.9832218611905778e-05, |
| "loss": 0.8006160259246826, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.1705426356589146, |
| "grad_norm": 0.04289000853896141, |
| "learning_rate": 2.98891552685038e-05, |
| "loss": 0.4540764391422272, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.1782945736434107, |
| "grad_norm": 0.09553489089012146, |
| "learning_rate": 2.993437252350551e-05, |
| "loss": 0.6768143177032471, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.186046511627907, |
| "grad_norm": 0.07250722497701645, |
| "learning_rate": 2.9967828450123938e-05, |
| "loss": 0.37812501192092896, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.193798449612403, |
| "grad_norm": 0.06566791236400604, |
| "learning_rate": 2.998949202702644e-05, |
| "loss": 0.42279544472694397, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.201550387596899, |
| "grad_norm": 0.04562269151210785, |
| "learning_rate": 2.9999343167098628e-05, |
| "loss": 0.5368779301643372, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.2093023255813953, |
| "grad_norm": 0.18373429775238037, |
| "learning_rate": 2.999737273606972e-05, |
| "loss": 0.2325422167778015, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.2170542635658914, |
| "grad_norm": 0.089279904961586, |
| "learning_rate": 2.9983582560982107e-05, |
| "loss": 0.5718221664428711, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.2248062015503876, |
| "grad_norm": 0.05974709987640381, |
| "learning_rate": 2.9957985428497287e-05, |
| "loss": 0.20123861730098724, |
| "step": 574 |
| }, |
| { |
| "epoch": 2.2325581395348837, |
| "grad_norm": 0.09997298568487167, |
| "learning_rate": 2.992060507303966e-05, |
| "loss": 0.4844609797000885, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.24031007751938, |
| "grad_norm": 0.06058935075998306, |
| "learning_rate": 2.98714761547893e-05, |
| "loss": 0.6003465056419373, |
| "step": 578 |
| }, |
| { |
| "epoch": 2.248062015503876, |
| "grad_norm": 0.10959988087415695, |
| "learning_rate": 2.981064422754395e-05, |
| "loss": 0.49536406993865967, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.255813953488372, |
| "grad_norm": 0.03991486132144928, |
| "learning_rate": 2.9738165696480244e-05, |
| "loss": 0.27629104256629944, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.2635658914728682, |
| "grad_norm": 0.054911598563194275, |
| "learning_rate": 2.9654107765853097e-05, |
| "loss": 0.8991298675537109, |
| "step": 584 |
| }, |
| { |
| "epoch": 2.2713178294573644, |
| "grad_norm": 0.15896278619766235, |
| "learning_rate": 2.955854837668194e-05, |
| "loss": 0.4965202808380127, |
| "step": 586 |
| }, |
| { |
| "epoch": 2.2790697674418605, |
| "grad_norm": 0.04574244096875191, |
| "learning_rate": 2.9451576134481485e-05, |
| "loss": 0.8411018252372742, |
| "step": 588 |
| }, |
| { |
| "epoch": 2.2868217054263567, |
| "grad_norm": 0.049831654876470566, |
| "learning_rate": 2.9333290227104026e-05, |
| "loss": 0.39420267939567566, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.294573643410853, |
| "grad_norm": 0.04978864639997482, |
| "learning_rate": 2.9203800332769538e-05, |
| "loss": 0.6820889115333557, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.302325581395349, |
| "grad_norm": 0.047215450555086136, |
| "learning_rate": 2.906322651836873e-05, |
| "loss": 0.5932551622390747, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.310077519379845, |
| "grad_norm": 0.225591778755188, |
| "learning_rate": 2.891169912813347e-05, |
| "loss": 0.6146702170372009, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.317829457364341, |
| "grad_norm": 0.039774637669324875, |
| "learning_rate": 2.8749358662777702e-05, |
| "loss": 0.5908256769180298, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.3255813953488373, |
| "grad_norm": 0.04213051497936249, |
| "learning_rate": 2.857635564922104e-05, |
| "loss": 0.3715899884700775, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.15360774099826813, |
| "learning_rate": 2.8392850501015662e-05, |
| "loss": 0.5040525794029236, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.3410852713178296, |
| "grad_norm": 0.03753961622714996, |
| "learning_rate": 2.819901336960611e-05, |
| "loss": 0.6956257820129395, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.3488372093023258, |
| "grad_norm": 0.11748861521482468, |
| "learning_rate": 2.7995023986559807e-05, |
| "loss": 0.5580706000328064, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.356589147286822, |
| "grad_norm": 0.07652665674686432, |
| "learning_rate": 2.7781071496914573e-05, |
| "loss": 0.7142091989517212, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.3643410852713176, |
| "grad_norm": 0.08042655140161514, |
| "learning_rate": 2.755735428379772e-05, |
| "loss": 0.6314153075218201, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.3720930232558137, |
| "grad_norm": 0.07687707245349884, |
| "learning_rate": 2.7324079784479287e-05, |
| "loss": 0.37621596455574036, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.37984496124031, |
| "grad_norm": 0.0547901950776577, |
| "learning_rate": 2.7081464298030026e-05, |
| "loss": 0.43838873505592346, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.387596899224806, |
| "grad_norm": 0.046424973756074905, |
| "learning_rate": 2.6829732784762465e-05, |
| "loss": 0.6145251989364624, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.395348837209302, |
| "grad_norm": 0.054426416754722595, |
| "learning_rate": 2.656911865764097e-05, |
| "loss": 0.8150652050971985, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.4031007751937983, |
| "grad_norm": 0.04885483533143997, |
| "learning_rate": 2.629986356585431e-05, |
| "loss": 0.5932292342185974, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.4108527131782944, |
| "grad_norm": 0.08931045234203339, |
| "learning_rate": 2.602221717075134e-05, |
| "loss": 0.27623146772384644, |
| "step": 622 |
| }, |
| { |
| "epoch": 2.4186046511627906, |
| "grad_norm": 0.198171466588974, |
| "learning_rate": 2.5736436914347484e-05, |
| "loss": 0.49001190066337585, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.4263565891472867, |
| "grad_norm": 0.06653870642185211, |
| "learning_rate": 2.544278778061694e-05, |
| "loss": 0.362051784992218, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.434108527131783, |
| "grad_norm": 0.045915715396404266, |
| "learning_rate": 2.514154204979152e-05, |
| "loss": 0.7866628766059875, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.441860465116279, |
| "grad_norm": 0.17191638052463531, |
| "learning_rate": 2.483297904589437e-05, |
| "loss": 0.4470142424106598, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.449612403100775, |
| "grad_norm": 0.05101427063345909, |
| "learning_rate": 2.451738487774237e-05, |
| "loss": 0.5275189876556396, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.4573643410852712, |
| "grad_norm": 0.21946240961551666, |
| "learning_rate": 2.419505217365756e-05, |
| "loss": 0.4540758728981018, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.4651162790697674, |
| "grad_norm": 0.04593559727072716, |
| "learning_rate": 2.3866279810133385e-05, |
| "loss": 0.6065298914909363, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.4728682170542635, |
| "grad_norm": 0.059740740805864334, |
| "learning_rate": 2.3531372634707517e-05, |
| "loss": 0.4904002249240875, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.4806201550387597, |
| "grad_norm": 0.050732191652059555, |
| "learning_rate": 2.3190641183298133e-05, |
| "loss": 0.5315344929695129, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.488372093023256, |
| "grad_norm": 0.08542405068874359, |
| "learning_rate": 2.2844401392265773e-05, |
| "loss": 0.5050536394119263, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.496124031007752, |
| "grad_norm": 0.05612539127469063, |
| "learning_rate": 2.2492974305467652e-05, |
| "loss": 0.6769103407859802, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.503875968992248, |
| "grad_norm": 0.08466104418039322, |
| "learning_rate": 2.213668577657632e-05, |
| "loss": 0.6569755673408508, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.511627906976744, |
| "grad_norm": 0.06182940676808357, |
| "learning_rate": 2.177586616693837e-05, |
| "loss": 0.8741461038589478, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.5193798449612403, |
| "grad_norm": 0.10157312452793121, |
| "learning_rate": 2.141085003925353e-05, |
| "loss": 0.25412437319755554, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.5271317829457365, |
| "grad_norm": 0.048401493579149246, |
| "learning_rate": 2.1041975847358226e-05, |
| "loss": 0.5729293823242188, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.5348837209302326, |
| "grad_norm": 0.04819415509700775, |
| "learning_rate": 2.066958562240101e-05, |
| "loss": 0.38875052332878113, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.5426356589147288, |
| "grad_norm": 0.055878717452287674, |
| "learning_rate": 2.029402465570114e-05, |
| "loss": 0.5112055540084839, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.550387596899225, |
| "grad_norm": 0.07780204713344574, |
| "learning_rate": 1.9915641178584115e-05, |
| "loss": 0.6484737992286682, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.558139534883721, |
| "grad_norm": 0.05842196196317673, |
| "learning_rate": 1.953478603949128e-05, |
| "loss": 0.6154912114143372, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.565891472868217, |
| "grad_norm": 0.060033708810806274, |
| "learning_rate": 1.9151812378662645e-05, |
| "loss": 0.6602538228034973, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.5736434108527133, |
| "grad_norm": 0.06244450435042381, |
| "learning_rate": 1.8767075300694745e-05, |
| "loss": 0.3542620837688446, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.5813953488372094, |
| "grad_norm": 0.04792879521846771, |
| "learning_rate": 1.8380931545277047e-05, |
| "loss": 0.5548843145370483, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.5891472868217056, |
| "grad_norm": 0.06219867989420891, |
| "learning_rate": 1.7993739156412294e-05, |
| "loss": 0.4414609968662262, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.5968992248062017, |
| "grad_norm": 0.20640681684017181, |
| "learning_rate": 1.7605857150427447e-05, |
| "loss": 0.5872040390968323, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.604651162790698, |
| "grad_norm": 0.10361964255571365, |
| "learning_rate": 1.7217645183082965e-05, |
| "loss": 0.42644187808036804, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.612403100775194, |
| "grad_norm": 0.04928445816040039, |
| "learning_rate": 1.682946321608938e-05, |
| "loss": 0.7362091541290283, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.62015503875969, |
| "grad_norm": 0.047589972615242004, |
| "learning_rate": 1.6441671183339962e-05, |
| "loss": 0.6339392066001892, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.6279069767441863, |
| "grad_norm": 0.0819360688328743, |
| "learning_rate": 1.605462865716936e-05, |
| "loss": 0.9860416650772095, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.6356589147286824, |
| "grad_norm": 0.04943787679076195, |
| "learning_rate": 1.566869451494735e-05, |
| "loss": 0.8548033237457275, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.6434108527131785, |
| "grad_norm": 0.04377022013068199, |
| "learning_rate": 1.5284226606317093e-05, |
| "loss": 0.44147443771362305, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.6511627906976747, |
| "grad_norm": 0.07998047769069672, |
| "learning_rate": 1.4901581421386185e-05, |
| "loss": 0.6076138615608215, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.6589147286821704, |
| "grad_norm": 0.06454984098672867, |
| "learning_rate": 1.4521113760178527e-05, |
| "loss": 0.5302262902259827, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.04626493155956268, |
| "learning_rate": 1.414317640365306e-05, |
| "loss": 0.34584927558898926, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.6744186046511627, |
| "grad_norm": 0.051621273159980774, |
| "learning_rate": 1.3768119786594849e-05, |
| "loss": 0.409042626619339, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.682170542635659, |
| "grad_norm": 0.04060710594058037, |
| "learning_rate": 1.3396291672681374e-05, |
| "loss": 0.6091200113296509, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.689922480620155, |
| "grad_norm": 0.08434680104255676, |
| "learning_rate": 1.3028036832025761e-05, |
| "loss": 0.5412120819091797, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.697674418604651, |
| "grad_norm": 0.08353015780448914, |
| "learning_rate": 1.2663696721495657e-05, |
| "loss": 0.4864289462566376, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.705426356589147, |
| "grad_norm": 0.05606602877378464, |
| "learning_rate": 1.2303609168104158e-05, |
| "loss": 0.6230313777923584, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.7131782945736433, |
| "grad_norm": 0.05251370370388031, |
| "learning_rate": 1.1948108055766654e-05, |
| "loss": 0.5568596124649048, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.7209302325581395, |
| "grad_norm": 0.06997047364711761, |
| "learning_rate": 1.159752301571363e-05, |
| "loss": 0.6038510799407959, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.7286821705426356, |
| "grad_norm": 0.055278290063142776, |
| "learning_rate": 1.1252179120846778e-05, |
| "loss": 0.4581022262573242, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.7364341085271318, |
| "grad_norm": 0.09902060031890869, |
| "learning_rate": 1.0912396584321704e-05, |
| "loss": 0.3432044982910156, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.744186046511628, |
| "grad_norm": 0.039606984704732895, |
| "learning_rate": 1.0578490462636758e-05, |
| "loss": 0.47076573967933655, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.751937984496124, |
| "grad_norm": 0.0477338470518589, |
| "learning_rate": 1.0250770363503242e-05, |
| "loss": 0.5192364454269409, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.75968992248062, |
| "grad_norm": 0.05086624622344971, |
| "learning_rate": 9.929540158767916e-06, |
| "loss": 0.5344876050949097, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.7674418604651163, |
| "grad_norm": 0.054383717477321625, |
| "learning_rate": 9.615097702653961e-06, |
| "loss": 0.43612009286880493, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.7751937984496124, |
| "grad_norm": 0.14214906096458435, |
| "learning_rate": 9.307734555581662e-06, |
| "loss": 0.3253142535686493, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.7829457364341086, |
| "grad_norm": 0.0564068928360939, |
| "learning_rate": 9.007735713824893e-06, |
| "loss": 0.6681756973266602, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.7906976744186047, |
| "grad_norm": 0.06708779186010361, |
| "learning_rate": 8.715379345254077e-06, |
| "loss": 0.5442537665367126, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.798449612403101, |
| "grad_norm": 0.0515008307993412, |
| "learning_rate": 8.43093653141064e-06, |
| "loss": 0.4479949176311493, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.806201550387597, |
| "grad_norm": 0.05638190731406212, |
| "learning_rate": 8.15467101615213e-06, |
| "loss": 0.5014258623123169, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.813953488372093, |
| "grad_norm": 0.10201210528612137, |
| "learning_rate": 7.886838961101036e-06, |
| "loss": 0.46058180928230286, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.8217054263565893, |
| "grad_norm": 0.04419756308197975, |
| "learning_rate": 7.627688708124106e-06, |
| "loss": 0.5526507496833801, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.8294573643410854, |
| "grad_norm": 0.04845201596617699, |
| "learning_rate": 7.377460549062367e-06, |
| "loss": 0.5275202989578247, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.8372093023255816, |
| "grad_norm": 0.04706037789583206, |
| "learning_rate": 7.13638650292535e-06, |
| "loss": 0.507082998752594, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.8449612403100772, |
| "grad_norm": 0.09409839659929276, |
| "learning_rate": 6.904690100756159e-06, |
| "loss": 0.47032397985458374, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.8527131782945734, |
| "grad_norm": 0.052645258605480194, |
| "learning_rate": 6.682586178366833e-06, |
| "loss": 0.5226138830184937, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.8604651162790695, |
| "grad_norm": 0.1135181188583374, |
| "learning_rate": 6.470280677136127e-06, |
| "loss": 0.47321808338165283, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.8682170542635657, |
| "grad_norm": 0.17483681440353394, |
| "learning_rate": 6.267970453054588e-06, |
| "loss": 0.3441992700099945, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.875968992248062, |
| "grad_norm": 0.0963001623749733, |
| "learning_rate": 6.075843094193802e-06, |
| "loss": 0.4783623516559601, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.883720930232558, |
| "grad_norm": 0.06458084285259247, |
| "learning_rate": 5.894076746769099e-06, |
| "loss": 0.55689537525177, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.891472868217054, |
| "grad_norm": 0.08305259048938751, |
| "learning_rate": 5.722839949957098e-06, |
| "loss": 0.46318957209587097, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.89922480620155, |
| "grad_norm": 0.04614809900522232, |
| "learning_rate": 5.56229147962116e-06, |
| "loss": 0.5878427624702454, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.9069767441860463, |
| "grad_norm": 0.06380455940961838, |
| "learning_rate": 5.412580201089678e-06, |
| "loss": 0.3637682795524597, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.9147286821705425, |
| "grad_norm": 0.08269240707159042, |
| "learning_rate": 5.273844931123672e-06, |
| "loss": 0.5682251453399658, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.9224806201550386, |
| "grad_norm": 0.04752229154109955, |
| "learning_rate": 5.146214309201799e-06, |
| "loss": 0.4354322850704193, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.9302325581395348, |
| "grad_norm": 0.08373939245939255, |
| "learning_rate": 5.029806678241959e-06, |
| "loss": 0.801591694355011, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.937984496124031, |
| "grad_norm": 0.047429829835891724, |
| "learning_rate": 4.924729974870227e-06, |
| "loss": 0.6289284229278564, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.945736434108527, |
| "grad_norm": 0.07736939191818237, |
| "learning_rate": 4.831081629338789e-06, |
| "loss": 0.6334935426712036, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.953488372093023, |
| "grad_norm": 0.04397986829280853, |
| "learning_rate": 4.74894847518571e-06, |
| "loss": 0.6884579658508301, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.9612403100775193, |
| "grad_norm": 0.05032786726951599, |
| "learning_rate": 4.678406668720287e-06, |
| "loss": 0.7347701191902161, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.9689922480620154, |
| "grad_norm": 0.11958435922861099, |
| "learning_rate": 4.61952161840865e-06, |
| "loss": 0.24074648320674896, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.9767441860465116, |
| "grad_norm": 0.078817218542099, |
| "learning_rate": 4.572347924225084e-06, |
| "loss": 0.32004672288894653, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.9844961240310077, |
| "grad_norm": 0.036485981196165085, |
| "learning_rate": 4.536929327025298e-06, |
| "loss": 0.3227121829986572, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.992248062015504, |
| "grad_norm": 0.041053567081689835, |
| "learning_rate": 4.51329866798861e-06, |
| "loss": 0.6057068109512329, |
| "step": 772 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.11451369524002075, |
| "learning_rate": 4.501477858166617e-06, |
| "loss": 0.16319799423217773, |
| "step": 774 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 774, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 99999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.2487544184132076e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|