| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2077, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00048155059290916753, |
| "grad_norm": 9.47897720336914, |
| "learning_rate": 0.0, |
| "loss": 2.5979, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009631011858183351, |
| "grad_norm": 13.405498504638672, |
| "learning_rate": 3.1746031746031746e-06, |
| "loss": 2.8498, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0014446517787275025, |
| "grad_norm": 6.596500873565674, |
| "learning_rate": 6.349206349206349e-06, |
| "loss": 1.8474, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0019262023716366701, |
| "grad_norm": 8.559901237487793, |
| "learning_rate": 9.523809523809523e-06, |
| "loss": 2.3071, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0024077529645458377, |
| "grad_norm": 7.220107555389404, |
| "learning_rate": 1.2698412698412699e-05, |
| "loss": 2.4772, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002889303557455005, |
| "grad_norm": 9.46921157836914, |
| "learning_rate": 1.5873015873015872e-05, |
| "loss": 2.0344, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0033708541503641725, |
| "grad_norm": 7.574646949768066, |
| "learning_rate": 1.9047619047619046e-05, |
| "loss": 2.9056, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0038524047432733403, |
| "grad_norm": 10.66491985321045, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 2.5144, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004333955336182508, |
| "grad_norm": 5.920387268066406, |
| "learning_rate": 2.5396825396825397e-05, |
| "loss": 2.1156, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004815505929091675, |
| "grad_norm": 5.609206676483154, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 2.2243, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005297056522000842, |
| "grad_norm": 6.140780448913574, |
| "learning_rate": 3.1746031746031745e-05, |
| "loss": 2.0492, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00577860711491001, |
| "grad_norm": 6.842236042022705, |
| "learning_rate": 3.492063492063492e-05, |
| "loss": 1.2736, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.006260157707819178, |
| "grad_norm": 9.14393424987793, |
| "learning_rate": 3.809523809523809e-05, |
| "loss": 1.8261, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006741708300728345, |
| "grad_norm": 8.761028289794922, |
| "learning_rate": 4.126984126984127e-05, |
| "loss": 1.5156, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.007223258893637513, |
| "grad_norm": 7.065422534942627, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 2.0811, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0077048094865466805, |
| "grad_norm": 5.040136814117432, |
| "learning_rate": 4.761904761904762e-05, |
| "loss": 1.8507, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.008186360079455848, |
| "grad_norm": 5.030274868011475, |
| "learning_rate": 5.0793650793650794e-05, |
| "loss": 1.3307, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.008667910672365015, |
| "grad_norm": 6.842209339141846, |
| "learning_rate": 5.396825396825397e-05, |
| "loss": 1.6775, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.009149461265274182, |
| "grad_norm": 3.107877016067505, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 1.6994, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.00963101185818335, |
| "grad_norm": 8.357791900634766, |
| "learning_rate": 6.0317460317460316e-05, |
| "loss": 2.017, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.010112562451092518, |
| "grad_norm": 5.375106334686279, |
| "learning_rate": 6.349206349206349e-05, |
| "loss": 1.2195, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010594113044001685, |
| "grad_norm": 8.254910469055176, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 1.7287, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.011075663636910853, |
| "grad_norm": 1.862160086631775, |
| "learning_rate": 6.984126984126984e-05, |
| "loss": 0.9883, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01155721422982002, |
| "grad_norm": 4.388360500335693, |
| "learning_rate": 7.301587301587302e-05, |
| "loss": 0.8999, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.012038764822729187, |
| "grad_norm": 3.909996509552002, |
| "learning_rate": 7.619047619047618e-05, |
| "loss": 1.0851, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.012520315415638356, |
| "grad_norm": 6.695411205291748, |
| "learning_rate": 7.936507936507937e-05, |
| "loss": 1.3302, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.013001866008547523, |
| "grad_norm": 5.960219383239746, |
| "learning_rate": 8.253968253968255e-05, |
| "loss": 1.111, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01348341660145669, |
| "grad_norm": 4.400773525238037, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 0.7887, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013964967194365858, |
| "grad_norm": 5.381070137023926, |
| "learning_rate": 8.888888888888889e-05, |
| "loss": 2.0415, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.014446517787275025, |
| "grad_norm": 2.605618715286255, |
| "learning_rate": 9.206349206349206e-05, |
| "loss": 1.0637, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014928068380184192, |
| "grad_norm": 4.758686065673828, |
| "learning_rate": 9.523809523809524e-05, |
| "loss": 0.815, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.015409618973093361, |
| "grad_norm": 3.952995777130127, |
| "learning_rate": 9.841269841269841e-05, |
| "loss": 1.5256, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01589116956600253, |
| "grad_norm": 6.927408695220947, |
| "learning_rate": 0.00010158730158730159, |
| "loss": 1.2863, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.016372720158911697, |
| "grad_norm": 3.77854323387146, |
| "learning_rate": 0.00010476190476190477, |
| "loss": 1.2303, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.016854270751820864, |
| "grad_norm": 3.4671757221221924, |
| "learning_rate": 0.00010793650793650794, |
| "loss": 0.8535, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01733582134473003, |
| "grad_norm": 3.3394312858581543, |
| "learning_rate": 0.00011111111111111112, |
| "loss": 1.7921, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.017817371937639197, |
| "grad_norm": 3.3127732276916504, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 1.0765, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.018298922530548364, |
| "grad_norm": 3.850238800048828, |
| "learning_rate": 0.00011746031746031746, |
| "loss": 1.2267, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.018780473123457535, |
| "grad_norm": 2.822495460510254, |
| "learning_rate": 0.00012063492063492063, |
| "loss": 0.9256, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0192620237163667, |
| "grad_norm": 3.967958688735962, |
| "learning_rate": 0.0001238095238095238, |
| "loss": 1.9019, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01974357430927587, |
| "grad_norm": 3.5611379146575928, |
| "learning_rate": 0.00012698412698412698, |
| "loss": 0.8901, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.020225124902185036, |
| "grad_norm": 40.8494758605957, |
| "learning_rate": 0.00013015873015873017, |
| "loss": 1.0527, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.020706675495094203, |
| "grad_norm": 3.99414324760437, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 1.1458, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02118822608800337, |
| "grad_norm": 3.9491536617279053, |
| "learning_rate": 0.0001365079365079365, |
| "loss": 1.125, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.02166977668091254, |
| "grad_norm": 2.9987173080444336, |
| "learning_rate": 0.00013968253968253967, |
| "loss": 1.0546, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.022151327273821707, |
| "grad_norm": 3.976832628250122, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 1.1407, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.022632877866730874, |
| "grad_norm": 10.798648834228516, |
| "learning_rate": 0.00014603174603174603, |
| "loss": 0.6169, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02311442845964004, |
| "grad_norm": 3.4334845542907715, |
| "learning_rate": 0.00014920634920634923, |
| "loss": 1.1717, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.023595979052549208, |
| "grad_norm": 4.682596206665039, |
| "learning_rate": 0.00015238095238095237, |
| "loss": 0.9471, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.024077529645458375, |
| "grad_norm": 6.495236396789551, |
| "learning_rate": 0.00015555555555555556, |
| "loss": 1.2659, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.024559080238367545, |
| "grad_norm": 4.563192367553711, |
| "learning_rate": 0.00015873015873015873, |
| "loss": 1.5277, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.025040630831276712, |
| "grad_norm": 2.5679526329040527, |
| "learning_rate": 0.00016190476190476192, |
| "loss": 1.5084, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.02552218142418588, |
| "grad_norm": 3.2736315727233887, |
| "learning_rate": 0.0001650793650793651, |
| "loss": 0.925, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.026003732017095046, |
| "grad_norm": 3.326249837875366, |
| "learning_rate": 0.00016825396825396826, |
| "loss": 0.7348, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.026485282610004213, |
| "grad_norm": 4.137823104858398, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 1.5321, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02696683320291338, |
| "grad_norm": 5.634565353393555, |
| "learning_rate": 0.00017460317460317462, |
| "loss": 0.8261, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02744838379582255, |
| "grad_norm": 5.409355163574219, |
| "learning_rate": 0.00017777777777777779, |
| "loss": 1.1813, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.027929934388731717, |
| "grad_norm": 3.174149513244629, |
| "learning_rate": 0.00018095238095238095, |
| "loss": 0.979, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.028411484981640884, |
| "grad_norm": 3.773308038711548, |
| "learning_rate": 0.00018412698412698412, |
| "loss": 0.9456, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.02889303557455005, |
| "grad_norm": 4.3084306716918945, |
| "learning_rate": 0.00018730158730158731, |
| "loss": 1.0249, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.029374586167459218, |
| "grad_norm": 1.6852308511734009, |
| "learning_rate": 0.00019047619047619048, |
| "loss": 0.9553, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.029856136760368385, |
| "grad_norm": 4.570802211761475, |
| "learning_rate": 0.00019365079365079365, |
| "loss": 0.8408, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.030337687353277555, |
| "grad_norm": 4.909348011016846, |
| "learning_rate": 0.00019682539682539682, |
| "loss": 1.1777, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.030819237946186722, |
| "grad_norm": 3.654968500137329, |
| "learning_rate": 0.0002, |
| "loss": 1.04, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03130078853909589, |
| "grad_norm": 3.1528286933898926, |
| "learning_rate": 0.00019999987833918285, |
| "loss": 0.824, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03178233913200506, |
| "grad_norm": 4.7957024574279785, |
| "learning_rate": 0.00019999951335702735, |
| "loss": 1.0617, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03226388972491422, |
| "grad_norm": 3.9064583778381348, |
| "learning_rate": 0.00019999890505442158, |
| "loss": 0.854, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03274544031782339, |
| "grad_norm": 4.40252161026001, |
| "learning_rate": 0.0001999980534328457, |
| "loss": 1.219, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03322699091073256, |
| "grad_norm": 10.936585426330566, |
| "learning_rate": 0.0001999969584943719, |
| "loss": 1.3314, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03370854150364173, |
| "grad_norm": 5.040441036224365, |
| "learning_rate": 0.00019999562024166438, |
| "loss": 0.7201, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03419009209655089, |
| "grad_norm": 3.9674477577209473, |
| "learning_rate": 0.0001999940386779794, |
| "loss": 0.6603, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03467164268946006, |
| "grad_norm": 2.814729928970337, |
| "learning_rate": 0.00019999221380716527, |
| "loss": 0.829, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.03515319328236923, |
| "grad_norm": 4.869168281555176, |
| "learning_rate": 0.00019999014563366226, |
| "loss": 1.014, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.035634743875278395, |
| "grad_norm": 6.23966121673584, |
| "learning_rate": 0.00019998783416250268, |
| "loss": 1.0555, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.036116294468187565, |
| "grad_norm": 4.9502458572387695, |
| "learning_rate": 0.0001999852793993109, |
| "loss": 1.2713, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03659784506109673, |
| "grad_norm": 4.012925148010254, |
| "learning_rate": 0.00019998248135030315, |
| "loss": 1.0993, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0370793956540059, |
| "grad_norm": 7.407429218292236, |
| "learning_rate": 0.00019997944002228774, |
| "loss": 1.2082, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03756094624691507, |
| "grad_norm": 6.095324993133545, |
| "learning_rate": 0.00019997615542266482, |
| "loss": 0.9925, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.03804249683982423, |
| "grad_norm": 3.40785813331604, |
| "learning_rate": 0.00019997262755942655, |
| "loss": 0.7746, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0385240474327334, |
| "grad_norm": 9.24764633178711, |
| "learning_rate": 0.000199968856441157, |
| "loss": 0.7769, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03900559802564257, |
| "grad_norm": 7.706933975219727, |
| "learning_rate": 0.0001999648420770321, |
| "loss": 1.0516, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.03948714861855174, |
| "grad_norm": 3.0405354499816895, |
| "learning_rate": 0.0001999605844768197, |
| "loss": 1.1086, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0399686992114609, |
| "grad_norm": 5.7575225830078125, |
| "learning_rate": 0.00019995608365087946, |
| "loss": 1.1402, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04045024980437007, |
| "grad_norm": 4.292261600494385, |
| "learning_rate": 0.0001999513396101628, |
| "loss": 1.0188, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.04093180039727924, |
| "grad_norm": 4.21382474899292, |
| "learning_rate": 0.00019994635236621306, |
| "loss": 0.82, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.041413350990188405, |
| "grad_norm": 2.9905776977539062, |
| "learning_rate": 0.00019994112193116528, |
| "loss": 1.0823, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.041894901583097575, |
| "grad_norm": 2.917043685913086, |
| "learning_rate": 0.00019993564831774618, |
| "loss": 0.9047, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.04237645217600674, |
| "grad_norm": 2.6178548336029053, |
| "learning_rate": 0.00019992993153927432, |
| "loss": 0.8579, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04285800276891591, |
| "grad_norm": 5.876251220703125, |
| "learning_rate": 0.00019992397160965982, |
| "loss": 0.8726, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04333955336182508, |
| "grad_norm": 15.610270500183105, |
| "learning_rate": 0.0001999177685434045, |
| "loss": 1.3112, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.04382110395473424, |
| "grad_norm": 3.1741201877593994, |
| "learning_rate": 0.00019991132235560176, |
| "loss": 1.0837, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.044302654547643414, |
| "grad_norm": 3.8556501865386963, |
| "learning_rate": 0.00019990463306193652, |
| "loss": 0.947, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.04478420514055258, |
| "grad_norm": 4.010826110839844, |
| "learning_rate": 0.00019989770067868533, |
| "loss": 0.726, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.04526575573346175, |
| "grad_norm": 3.4739131927490234, |
| "learning_rate": 0.00019989052522271622, |
| "loss": 0.9653, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04574730632637091, |
| "grad_norm": 2.507850170135498, |
| "learning_rate": 0.00019988310671148848, |
| "loss": 0.8077, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04622885691928008, |
| "grad_norm": 3.4519567489624023, |
| "learning_rate": 0.00019987544516305311, |
| "loss": 0.897, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04671040751218925, |
| "grad_norm": 3.6631219387054443, |
| "learning_rate": 0.00019986754059605222, |
| "loss": 0.912, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.047191958105098415, |
| "grad_norm": 3.770810842514038, |
| "learning_rate": 0.00019985939302971938, |
| "loss": 0.8759, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.047673508698007586, |
| "grad_norm": 5.926509857177734, |
| "learning_rate": 0.00019985100248387933, |
| "loss": 1.2971, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04815505929091675, |
| "grad_norm": 3.8625011444091797, |
| "learning_rate": 0.00019984236897894816, |
| "loss": 0.5941, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04863660988382592, |
| "grad_norm": 5.259921073913574, |
| "learning_rate": 0.000199833492535933, |
| "loss": 0.8503, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04911816047673509, |
| "grad_norm": 4.988558769226074, |
| "learning_rate": 0.00019982437317643217, |
| "loss": 1.1631, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04959971106964425, |
| "grad_norm": 7.225802898406982, |
| "learning_rate": 0.00019981501092263503, |
| "loss": 1.3616, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.050081261662553424, |
| "grad_norm": 3.8858981132507324, |
| "learning_rate": 0.00019980540579732196, |
| "loss": 0.6591, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.05056281225546259, |
| "grad_norm": 3.4471282958984375, |
| "learning_rate": 0.00019979555782386434, |
| "loss": 0.7594, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.05104436284837176, |
| "grad_norm": 4.011205196380615, |
| "learning_rate": 0.00019978546702622443, |
| "loss": 1.0898, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.05152591344128093, |
| "grad_norm": 3.6832756996154785, |
| "learning_rate": 0.00019977513342895532, |
| "loss": 0.6194, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05200746403419009, |
| "grad_norm": 6.4003376960754395, |
| "learning_rate": 0.00019976455705720083, |
| "loss": 0.7407, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.05248901462709926, |
| "grad_norm": 2.832329034805298, |
| "learning_rate": 0.0001997537379366956, |
| "loss": 1.4024, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.052970565220008425, |
| "grad_norm": 4.148341178894043, |
| "learning_rate": 0.00019974267609376494, |
| "loss": 1.3058, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.053452115812917596, |
| "grad_norm": 6.119814872741699, |
| "learning_rate": 0.00019973137155532462, |
| "loss": 0.8581, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.05393366640582676, |
| "grad_norm": 8.218791961669922, |
| "learning_rate": 0.00019971982434888107, |
| "loss": 0.9871, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05441521699873593, |
| "grad_norm": 2.336301326751709, |
| "learning_rate": 0.00019970803450253114, |
| "loss": 1.2561, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0548967675916451, |
| "grad_norm": 10.881673812866211, |
| "learning_rate": 0.0001996960020449621, |
| "loss": 1.3601, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.055378318184554264, |
| "grad_norm": 1.6918381452560425, |
| "learning_rate": 0.00019968372700545145, |
| "loss": 0.9491, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.055859868777463434, |
| "grad_norm": 3.8625855445861816, |
| "learning_rate": 0.00019967120941386709, |
| "loss": 1.3005, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0563414193703726, |
| "grad_norm": 4.829530715942383, |
| "learning_rate": 0.000199658449300667, |
| "loss": 0.9137, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05682296996328177, |
| "grad_norm": 6.8195719718933105, |
| "learning_rate": 0.00019964544669689928, |
| "loss": 1.6891, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.05730452055619094, |
| "grad_norm": 4.785418510437012, |
| "learning_rate": 0.00019963220163420214, |
| "loss": 1.4208, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0577860711491001, |
| "grad_norm": 3.4616124629974365, |
| "learning_rate": 0.0001996187141448036, |
| "loss": 0.6869, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05826762174200927, |
| "grad_norm": 4.4392924308776855, |
| "learning_rate": 0.0001996049842615217, |
| "loss": 0.7986, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.058749172334918436, |
| "grad_norm": 3.8779265880584717, |
| "learning_rate": 0.0001995910120177642, |
| "loss": 0.7087, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.059230722927827606, |
| "grad_norm": 6.253561496734619, |
| "learning_rate": 0.00019957679744752859, |
| "loss": 1.2118, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.05971227352073677, |
| "grad_norm": 5.026823043823242, |
| "learning_rate": 0.00019956234058540195, |
| "loss": 0.8501, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.06019382411364594, |
| "grad_norm": 4.044717788696289, |
| "learning_rate": 0.00019954764146656105, |
| "loss": 0.5874, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06067537470655511, |
| "grad_norm": 3.3693103790283203, |
| "learning_rate": 0.00019953270012677195, |
| "loss": 0.5689, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.061156925299464274, |
| "grad_norm": 3.7752881050109863, |
| "learning_rate": 0.00019951751660239015, |
| "loss": 0.9354, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.061638475892373444, |
| "grad_norm": 4.32492208480835, |
| "learning_rate": 0.00019950209093036052, |
| "loss": 0.8582, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.06212002648528261, |
| "grad_norm": 4.800130367279053, |
| "learning_rate": 0.000199486423148217, |
| "loss": 1.0017, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.06260157707819178, |
| "grad_norm": 2.9463722705841064, |
| "learning_rate": 0.00019947051329408276, |
| "loss": 1.076, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06308312767110094, |
| "grad_norm": 1.6262086629867554, |
| "learning_rate": 0.00019945436140666981, |
| "loss": 1.2582, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06356467826401012, |
| "grad_norm": 7.490293979644775, |
| "learning_rate": 0.0001994379675252793, |
| "loss": 1.4111, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06404622885691928, |
| "grad_norm": 2.8741366863250732, |
| "learning_rate": 0.00019942133168980103, |
| "loss": 1.4064, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.06452777944982845, |
| "grad_norm": 4.327383995056152, |
| "learning_rate": 0.00019940445394071355, |
| "loss": 1.0118, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.06500933004273761, |
| "grad_norm": 2.9505062103271484, |
| "learning_rate": 0.0001993873343190842, |
| "loss": 0.9643, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06549088063564679, |
| "grad_norm": 3.9040603637695312, |
| "learning_rate": 0.00019936997286656855, |
| "loss": 1.075, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.06597243122855595, |
| "grad_norm": 4.2163896560668945, |
| "learning_rate": 0.00019935236962541092, |
| "loss": 1.1182, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06645398182146511, |
| "grad_norm": 3.8160250186920166, |
| "learning_rate": 0.00019933452463844376, |
| "loss": 1.1374, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.06693553241437429, |
| "grad_norm": 4.615425109863281, |
| "learning_rate": 0.00019931643794908772, |
| "loss": 0.7229, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06741708300728345, |
| "grad_norm": 5.407428741455078, |
| "learning_rate": 0.00019929810960135172, |
| "loss": 1.258, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06789863360019262, |
| "grad_norm": 2.296764612197876, |
| "learning_rate": 0.00019927953963983254, |
| "loss": 0.8528, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.06838018419310178, |
| "grad_norm": 2.644909381866455, |
| "learning_rate": 0.00019926072810971492, |
| "loss": 1.2323, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06886173478601096, |
| "grad_norm": 3.273026943206787, |
| "learning_rate": 0.00019924167505677137, |
| "loss": 1.1116, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06934328537892012, |
| "grad_norm": 2.8930141925811768, |
| "learning_rate": 0.00019922238052736215, |
| "loss": 0.5674, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06982483597182929, |
| "grad_norm": 11.064878463745117, |
| "learning_rate": 0.00019920284456843498, |
| "loss": 1.054, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.07030638656473846, |
| "grad_norm": 7.081421375274658, |
| "learning_rate": 0.00019918306722752505, |
| "loss": 1.2431, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.07078793715764763, |
| "grad_norm": 7.764263153076172, |
| "learning_rate": 0.00019916304855275497, |
| "loss": 1.4613, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.07126948775055679, |
| "grad_norm": 8.821840286254883, |
| "learning_rate": 0.00019914278859283445, |
| "loss": 1.0356, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.07175103834346597, |
| "grad_norm": 4.817983150482178, |
| "learning_rate": 0.0001991222873970604, |
| "loss": 0.6992, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.07223258893637513, |
| "grad_norm": 2.4728589057922363, |
| "learning_rate": 0.00019910154501531663, |
| "loss": 0.7112, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0727141395292843, |
| "grad_norm": 9.176413536071777, |
| "learning_rate": 0.0001990805614980739, |
| "loss": 0.8131, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.07319569012219346, |
| "grad_norm": 4.006031036376953, |
| "learning_rate": 0.00019905933689638955, |
| "loss": 0.9721, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.07367724071510263, |
| "grad_norm": 3.409881114959717, |
| "learning_rate": 0.00019903787126190772, |
| "loss": 0.9142, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.0741587913080118, |
| "grad_norm": 4.4478373527526855, |
| "learning_rate": 0.00019901616464685888, |
| "loss": 0.9991, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.07464034190092096, |
| "grad_norm": 5.569812774658203, |
| "learning_rate": 0.00019899421710405996, |
| "loss": 0.9713, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07512189249383014, |
| "grad_norm": 3.1611955165863037, |
| "learning_rate": 0.00019897202868691407, |
| "loss": 0.839, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0756034430867393, |
| "grad_norm": 3.547825574874878, |
| "learning_rate": 0.00019894959944941038, |
| "loss": 0.6568, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.07608499367964847, |
| "grad_norm": 4.14447546005249, |
| "learning_rate": 0.0001989269294461242, |
| "loss": 0.7445, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07656654427255763, |
| "grad_norm": 3.5642924308776855, |
| "learning_rate": 0.0001989040187322164, |
| "loss": 1.1176, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0770480948654668, |
| "grad_norm": 3.2829089164733887, |
| "learning_rate": 0.00019888086736343384, |
| "loss": 1.2296, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07752964545837597, |
| "grad_norm": 3.4885025024414062, |
| "learning_rate": 0.0001988574753961087, |
| "loss": 0.827, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07801119605128513, |
| "grad_norm": 4.431697845458984, |
| "learning_rate": 0.00019883384288715874, |
| "loss": 0.9405, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07849274664419431, |
| "grad_norm": 4.44993782043457, |
| "learning_rate": 0.000198809969894087, |
| "loss": 0.9186, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.07897429723710347, |
| "grad_norm": 1.9886109828948975, |
| "learning_rate": 0.0001987858564749816, |
| "loss": 1.2175, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07945584783001264, |
| "grad_norm": 1.904360055923462, |
| "learning_rate": 0.00019876150268851572, |
| "loss": 0.9003, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.0799373984229218, |
| "grad_norm": 1.732532024383545, |
| "learning_rate": 0.00019873690859394737, |
| "loss": 0.8792, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.08041894901583098, |
| "grad_norm": 5.374270439147949, |
| "learning_rate": 0.0001987120742511193, |
| "loss": 1.2873, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.08090049960874014, |
| "grad_norm": 4.2816572189331055, |
| "learning_rate": 0.0001986869997204589, |
| "loss": 0.855, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0813820502016493, |
| "grad_norm": 2.6494359970092773, |
| "learning_rate": 0.00019866168506297788, |
| "loss": 0.9457, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.08186360079455848, |
| "grad_norm": 4.345731258392334, |
| "learning_rate": 0.00019863613034027224, |
| "loss": 0.8582, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08234515138746765, |
| "grad_norm": 4.3999342918396, |
| "learning_rate": 0.00019861033561452223, |
| "loss": 0.7079, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.08282670198037681, |
| "grad_norm": 4.363504409790039, |
| "learning_rate": 0.00019858430094849195, |
| "loss": 0.808, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.08330825257328599, |
| "grad_norm": 2.4355199337005615, |
| "learning_rate": 0.0001985580264055294, |
| "loss": 0.833, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.08378980316619515, |
| "grad_norm": 3.622019052505493, |
| "learning_rate": 0.00019853151204956616, |
| "loss": 0.8358, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.08427135375910431, |
| "grad_norm": 4.48416805267334, |
| "learning_rate": 0.00019850475794511749, |
| "loss": 0.9484, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08475290435201348, |
| "grad_norm": 4.607672214508057, |
| "learning_rate": 0.00019847776415728185, |
| "loss": 0.7357, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.08523445494492266, |
| "grad_norm": 2.3138136863708496, |
| "learning_rate": 0.000198450530751741, |
| "loss": 1.2186, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08571600553783182, |
| "grad_norm": 3.8758018016815186, |
| "learning_rate": 0.00019842305779475968, |
| "loss": 0.9932, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08619755613074098, |
| "grad_norm": 3.34867000579834, |
| "learning_rate": 0.00019839534535318558, |
| "loss": 1.1485, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.08667910672365016, |
| "grad_norm": 1.9022514820098877, |
| "learning_rate": 0.00019836739349444899, |
| "loss": 0.7949, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08716065731655932, |
| "grad_norm": 3.0250067710876465, |
| "learning_rate": 0.00019833920228656292, |
| "loss": 0.9396, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08764220790946849, |
| "grad_norm": 3.734602689743042, |
| "learning_rate": 0.0001983107717981226, |
| "loss": 1.1539, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.08812375850237765, |
| "grad_norm": 2.532639741897583, |
| "learning_rate": 0.00019828210209830562, |
| "loss": 0.836, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08860530909528683, |
| "grad_norm": 3.711615800857544, |
| "learning_rate": 0.00019825319325687154, |
| "loss": 0.9482, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08908685968819599, |
| "grad_norm": 4.490362167358398, |
| "learning_rate": 0.00019822404534416182, |
| "loss": 1.0918, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08956841028110515, |
| "grad_norm": 2.6988461017608643, |
| "learning_rate": 0.00019819465843109963, |
| "loss": 0.9532, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.09004996087401433, |
| "grad_norm": 4.821238040924072, |
| "learning_rate": 0.00019816503258918969, |
| "loss": 0.6697, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.0905315114669235, |
| "grad_norm": 5.357283592224121, |
| "learning_rate": 0.00019813516789051808, |
| "loss": 0.8587, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.09101306205983266, |
| "grad_norm": 4.648171901702881, |
| "learning_rate": 0.0001981050644077521, |
| "loss": 1.11, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.09149461265274182, |
| "grad_norm": 3.6666064262390137, |
| "learning_rate": 0.00019807472221414002, |
| "loss": 0.6605, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.091976163245651, |
| "grad_norm": 5.507065773010254, |
| "learning_rate": 0.00019804414138351094, |
| "loss": 0.9696, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.09245771383856016, |
| "grad_norm": 2.8789749145507812, |
| "learning_rate": 0.00019801332199027467, |
| "loss": 1.1445, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.09293926443146933, |
| "grad_norm": 3.538658380508423, |
| "learning_rate": 0.00019798226410942146, |
| "loss": 1.2154, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.0934208150243785, |
| "grad_norm": 5.083014011383057, |
| "learning_rate": 0.00019795096781652182, |
| "loss": 1.3403, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.09390236561728767, |
| "grad_norm": 4.016115665435791, |
| "learning_rate": 0.00019791943318772643, |
| "loss": 1.0433, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.09438391621019683, |
| "grad_norm": 3.3291447162628174, |
| "learning_rate": 0.00019788766029976587, |
| "loss": 1.0221, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.09486546680310601, |
| "grad_norm": 4.776575565338135, |
| "learning_rate": 0.0001978556492299504, |
| "loss": 0.9671, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09534701739601517, |
| "grad_norm": 2.6845924854278564, |
| "learning_rate": 0.00019782340005616996, |
| "loss": 0.9646, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.09582856798892433, |
| "grad_norm": 3.3309099674224854, |
| "learning_rate": 0.0001977909128568937, |
| "loss": 0.8776, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.0963101185818335, |
| "grad_norm": 5.5021586418151855, |
| "learning_rate": 0.00019775818771117, |
| "loss": 1.0165, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09679166917474268, |
| "grad_norm": 4.549814224243164, |
| "learning_rate": 0.00019772522469862626, |
| "loss": 0.803, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09727321976765184, |
| "grad_norm": 4.1988067626953125, |
| "learning_rate": 0.00019769202389946863, |
| "loss": 1.2152, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.097754770360561, |
| "grad_norm": 2.6205949783325195, |
| "learning_rate": 0.0001976585853944818, |
| "loss": 0.8637, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09823632095347018, |
| "grad_norm": 2.774397850036621, |
| "learning_rate": 0.0001976249092650289, |
| "loss": 1.0912, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09871787154637934, |
| "grad_norm": 1.7028182744979858, |
| "learning_rate": 0.00019759099559305124, |
| "loss": 0.8919, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.0991994221392885, |
| "grad_norm": 3.3020026683807373, |
| "learning_rate": 0.00019755684446106812, |
| "loss": 1.0172, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09968097273219767, |
| "grad_norm": 3.2804148197174072, |
| "learning_rate": 0.00019752245595217662, |
| "loss": 1.3593, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.10016252332510685, |
| "grad_norm": 2.095794677734375, |
| "learning_rate": 0.00019748783015005144, |
| "loss": 1.0033, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.10064407391801601, |
| "grad_norm": 5.280991077423096, |
| "learning_rate": 0.00019745296713894465, |
| "loss": 0.8974, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.10112562451092517, |
| "grad_norm": 2.9938979148864746, |
| "learning_rate": 0.00019741786700368548, |
| "loss": 0.8715, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.10160717510383435, |
| "grad_norm": 6.764945030212402, |
| "learning_rate": 0.00019738252982968017, |
| "loss": 0.8115, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.10208872569674352, |
| "grad_norm": 2.588499069213867, |
| "learning_rate": 0.00019734695570291168, |
| "loss": 0.9158, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.10257027628965268, |
| "grad_norm": 3.7589855194091797, |
| "learning_rate": 0.00019731114470993962, |
| "loss": 1.2896, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.10305182688256186, |
| "grad_norm": 6.584591865539551, |
| "learning_rate": 0.0001972750969378998, |
| "loss": 1.2588, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.10353337747547102, |
| "grad_norm": 4.74730110168457, |
| "learning_rate": 0.00019723881247450434, |
| "loss": 0.8416, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.10401492806838018, |
| "grad_norm": 3.6451597213745117, |
| "learning_rate": 0.0001972022914080411, |
| "loss": 1.3575, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.10449647866128935, |
| "grad_norm": 4.290178298950195, |
| "learning_rate": 0.00019716553382737379, |
| "loss": 0.8757, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.10497802925419852, |
| "grad_norm": 4.238255023956299, |
| "learning_rate": 0.00019712853982194152, |
| "loss": 0.928, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.10545957984710769, |
| "grad_norm": 4.077386856079102, |
| "learning_rate": 0.00019709130948175876, |
| "loss": 1.1232, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10594113044001685, |
| "grad_norm": 2.5770697593688965, |
| "learning_rate": 0.0001970538428974149, |
| "loss": 0.867, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.10642268103292603, |
| "grad_norm": 3.7551944255828857, |
| "learning_rate": 0.00019701614016007436, |
| "loss": 1.0874, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10690423162583519, |
| "grad_norm": 3.312821626663208, |
| "learning_rate": 0.00019697820136147597, |
| "loss": 0.7091, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10738578221874436, |
| "grad_norm": 2.8703348636627197, |
| "learning_rate": 0.00019694002659393305, |
| "loss": 0.8432, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10786733281165352, |
| "grad_norm": 2.452773332595825, |
| "learning_rate": 0.0001969016159503331, |
| "loss": 1.2779, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.1083488834045627, |
| "grad_norm": 2.704692840576172, |
| "learning_rate": 0.00019686296952413747, |
| "loss": 0.651, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10883043399747186, |
| "grad_norm": 3.8419394493103027, |
| "learning_rate": 0.0001968240874093813, |
| "loss": 0.741, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10931198459038102, |
| "grad_norm": 4.401157379150391, |
| "learning_rate": 0.00019678496970067325, |
| "loss": 0.8972, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1097935351832902, |
| "grad_norm": 3.6739308834075928, |
| "learning_rate": 0.0001967456164931951, |
| "loss": 0.4634, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.11027508577619936, |
| "grad_norm": 5.284419059753418, |
| "learning_rate": 0.0001967060278827017, |
| "loss": 0.9738, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.11075663636910853, |
| "grad_norm": 4.1912360191345215, |
| "learning_rate": 0.00019666620396552076, |
| "loss": 1.0792, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.11123818696201769, |
| "grad_norm": 3.6482601165771484, |
| "learning_rate": 0.00019662614483855246, |
| "loss": 1.3046, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.11171973755492687, |
| "grad_norm": 6.06255578994751, |
| "learning_rate": 0.00019658585059926934, |
| "loss": 0.796, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.11220128814783603, |
| "grad_norm": 2.2104992866516113, |
| "learning_rate": 0.00019654532134571594, |
| "loss": 0.8634, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.1126828387407452, |
| "grad_norm": 5.140758991241455, |
| "learning_rate": 0.00019650455717650878, |
| "loss": 0.6197, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.11316438933365437, |
| "grad_norm": 3.7386419773101807, |
| "learning_rate": 0.00019646355819083589, |
| "loss": 0.8885, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.11364593992656354, |
| "grad_norm": 4.816273212432861, |
| "learning_rate": 0.0001964223244884566, |
| "loss": 0.9825, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1141274905194727, |
| "grad_norm": 5.175439834594727, |
| "learning_rate": 0.00019638085616970153, |
| "loss": 1.0349, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.11460904111238188, |
| "grad_norm": 7.430823802947998, |
| "learning_rate": 0.00019633915333547202, |
| "loss": 0.7472, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.11509059170529104, |
| "grad_norm": 5.103323936462402, |
| "learning_rate": 0.00019629721608724004, |
| "loss": 1.0827, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1155721422982002, |
| "grad_norm": 3.144728660583496, |
| "learning_rate": 0.0001962550445270481, |
| "loss": 0.6904, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11605369289110937, |
| "grad_norm": 3.113306760787964, |
| "learning_rate": 0.00019621263875750864, |
| "loss": 1.2587, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11653524348401854, |
| "grad_norm": 3.700697898864746, |
| "learning_rate": 0.00019616999888180406, |
| "loss": 0.8636, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.11701679407692771, |
| "grad_norm": 3.9976747035980225, |
| "learning_rate": 0.0001961271250036865, |
| "loss": 0.8097, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11749834466983687, |
| "grad_norm": 3.022249937057495, |
| "learning_rate": 0.0001960840172274773, |
| "loss": 0.616, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11797989526274605, |
| "grad_norm": 5.003868579864502, |
| "learning_rate": 0.00019604067565806704, |
| "loss": 0.9634, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11846144585565521, |
| "grad_norm": 3.217082977294922, |
| "learning_rate": 0.00019599710040091512, |
| "loss": 0.8464, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11894299644856438, |
| "grad_norm": 3.279885768890381, |
| "learning_rate": 0.00019595329156204955, |
| "loss": 0.9137, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11942454704147354, |
| "grad_norm": 7.89387845993042, |
| "learning_rate": 0.00019590924924806676, |
| "loss": 0.7351, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11990609763438272, |
| "grad_norm": 3.444643020629883, |
| "learning_rate": 0.0001958649735661312, |
| "loss": 1.1217, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.12038764822729188, |
| "grad_norm": 3.371429443359375, |
| "learning_rate": 0.00019582046462397515, |
| "loss": 0.6736, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.12086919882020104, |
| "grad_norm": 2.9329636096954346, |
| "learning_rate": 0.00019577572252989854, |
| "loss": 0.91, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.12135074941311022, |
| "grad_norm": 4.010715007781982, |
| "learning_rate": 0.00019573074739276858, |
| "loss": 1.1179, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.12183230000601938, |
| "grad_norm": 3.4689531326293945, |
| "learning_rate": 0.00019568553932201947, |
| "loss": 0.8237, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.12231385059892855, |
| "grad_norm": 2.893638849258423, |
| "learning_rate": 0.00019564009842765225, |
| "loss": 1.3797, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.12279540119183771, |
| "grad_norm": 3.4823315143585205, |
| "learning_rate": 0.00019559442482023444, |
| "loss": 1.0072, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.12327695178474689, |
| "grad_norm": 4.071844100952148, |
| "learning_rate": 0.0001955485186108998, |
| "loss": 0.9674, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.12375850237765605, |
| "grad_norm": 3.1438372135162354, |
| "learning_rate": 0.00019550237991134805, |
| "loss": 0.8143, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.12424005297056522, |
| "grad_norm": 1.9269888401031494, |
| "learning_rate": 0.00019545600883384467, |
| "loss": 0.9445, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.12472160356347439, |
| "grad_norm": 4.249316215515137, |
| "learning_rate": 0.0001954094054912205, |
| "loss": 0.9279, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.12520315415638356, |
| "grad_norm": 2.100592851638794, |
| "learning_rate": 0.00019536256999687157, |
| "loss": 0.566, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12568470474929272, |
| "grad_norm": 2.685699462890625, |
| "learning_rate": 0.00019531550246475876, |
| "loss": 0.6129, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.12616625534220188, |
| "grad_norm": 4.864408493041992, |
| "learning_rate": 0.00019526820300940756, |
| "loss": 1.4781, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.12664780593511105, |
| "grad_norm": 5.040529727935791, |
| "learning_rate": 0.00019522067174590778, |
| "loss": 0.9867, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12712935652802024, |
| "grad_norm": 4.969310760498047, |
| "learning_rate": 0.00019517290878991324, |
| "loss": 0.8467, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1276109071209294, |
| "grad_norm": 4.031760215759277, |
| "learning_rate": 0.0001951249142576416, |
| "loss": 1.3096, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12809245771383856, |
| "grad_norm": 4.020783424377441, |
| "learning_rate": 0.00019507668826587387, |
| "loss": 1.1767, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12857400830674773, |
| "grad_norm": 4.138341426849365, |
| "learning_rate": 0.0001950282309319544, |
| "loss": 1.2766, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.1290555588996569, |
| "grad_norm": 2.7963201999664307, |
| "learning_rate": 0.0001949795423737903, |
| "loss": 0.9197, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.12953710949256605, |
| "grad_norm": 4.995326042175293, |
| "learning_rate": 0.00019493062270985144, |
| "loss": 1.0874, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.13001866008547522, |
| "grad_norm": 2.315774917602539, |
| "learning_rate": 0.00019488147205916985, |
| "loss": 0.7577, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1305002106783844, |
| "grad_norm": 6.237738132476807, |
| "learning_rate": 0.00019483209054133976, |
| "loss": 1.0659, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.13098176127129357, |
| "grad_norm": 2.8713154792785645, |
| "learning_rate": 0.00019478247827651708, |
| "loss": 0.965, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.13146331186420274, |
| "grad_norm": 3.7494618892669678, |
| "learning_rate": 0.00019473263538541914, |
| "loss": 0.708, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.1319448624571119, |
| "grad_norm": 5.115126132965088, |
| "learning_rate": 0.00019468256198932455, |
| "loss": 1.1001, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.13242641305002106, |
| "grad_norm": 1.991768479347229, |
| "learning_rate": 0.00019463225821007268, |
| "loss": 1.5559, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.13290796364293023, |
| "grad_norm": 2.3148343563079834, |
| "learning_rate": 0.00019458172417006347, |
| "loss": 0.7299, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1333895142358394, |
| "grad_norm": 5.019519329071045, |
| "learning_rate": 0.00019453095999225726, |
| "loss": 0.9006, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.13387106482874858, |
| "grad_norm": 2.933354616165161, |
| "learning_rate": 0.0001944799658001742, |
| "loss": 0.8045, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.13435261542165775, |
| "grad_norm": 3.3015851974487305, |
| "learning_rate": 0.00019442874171789418, |
| "loss": 0.8641, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.1348341660145669, |
| "grad_norm": 3.103376626968384, |
| "learning_rate": 0.00019437728787005657, |
| "loss": 1.2227, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.13531571660747607, |
| "grad_norm": 3.754467725753784, |
| "learning_rate": 0.00019432560438185963, |
| "loss": 0.6292, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.13579726720038524, |
| "grad_norm": 3.083435297012329, |
| "learning_rate": 0.00019427369137906046, |
| "loss": 1.0215, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1362788177932944, |
| "grad_norm": 2.066274404525757, |
| "learning_rate": 0.00019422154898797472, |
| "loss": 0.9194, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.13676036838620356, |
| "grad_norm": 2.431302785873413, |
| "learning_rate": 0.00019416917733547603, |
| "loss": 1.2826, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.13724191897911275, |
| "grad_norm": 2.993353843688965, |
| "learning_rate": 0.00019411657654899597, |
| "loss": 0.8271, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13772346957202192, |
| "grad_norm": 5.987199783325195, |
| "learning_rate": 0.0001940637467565237, |
| "loss": 1.0192, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.13820502016493108, |
| "grad_norm": 1.7869144678115845, |
| "learning_rate": 0.00019401068808660546, |
| "loss": 1.1415, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13868657075784024, |
| "grad_norm": 1.75114905834198, |
| "learning_rate": 0.0001939574006683445, |
| "loss": 0.9718, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.1391681213507494, |
| "grad_norm": 3.034210443496704, |
| "learning_rate": 0.00019390388463140065, |
| "loss": 0.4041, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13964967194365857, |
| "grad_norm": 7.397146224975586, |
| "learning_rate": 0.00019385014010598998, |
| "loss": 0.9913, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.14013122253656773, |
| "grad_norm": 2.123467206954956, |
| "learning_rate": 0.00019379616722288456, |
| "loss": 0.8688, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.14061277312947693, |
| "grad_norm": 3.545257329940796, |
| "learning_rate": 0.0001937419661134121, |
| "loss": 1.1841, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.1410943237223861, |
| "grad_norm": 3.6970181465148926, |
| "learning_rate": 0.0001936875369094556, |
| "loss": 1.241, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.14157587431529525, |
| "grad_norm": 2.9844279289245605, |
| "learning_rate": 0.0001936328797434531, |
| "loss": 0.9453, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.14205742490820442, |
| "grad_norm": 1.6827529668807983, |
| "learning_rate": 0.00019357799474839735, |
| "loss": 0.9734, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.14253897550111358, |
| "grad_norm": 3.7799909114837646, |
| "learning_rate": 0.00019352288205783536, |
| "loss": 0.7606, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.14302052609402274, |
| "grad_norm": 3.0586202144622803, |
| "learning_rate": 0.00019346754180586825, |
| "loss": 0.6152, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.14350207668693193, |
| "grad_norm": 1.6172605752944946, |
| "learning_rate": 0.00019341197412715082, |
| "loss": 0.7054, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1439836272798411, |
| "grad_norm": 2.8775248527526855, |
| "learning_rate": 0.00019335617915689128, |
| "loss": 1.1397, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.14446517787275026, |
| "grad_norm": 3.2396507263183594, |
| "learning_rate": 0.00019330015703085082, |
| "loss": 0.6854, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14494672846565942, |
| "grad_norm": 4.7253289222717285, |
| "learning_rate": 0.00019324390788534343, |
| "loss": 0.9446, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.1454282790585686, |
| "grad_norm": 5.362252712249756, |
| "learning_rate": 0.00019318743185723546, |
| "loss": 0.8447, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.14590982965147775, |
| "grad_norm": 2.6400959491729736, |
| "learning_rate": 0.00019313072908394525, |
| "loss": 0.8309, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.14639138024438691, |
| "grad_norm": 2.0676944255828857, |
| "learning_rate": 0.00019307379970344294, |
| "loss": 1.0386, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1468729308372961, |
| "grad_norm": 2.806190013885498, |
| "learning_rate": 0.00019301664385425004, |
| "loss": 0.8906, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.14735448143020527, |
| "grad_norm": 2.275996446609497, |
| "learning_rate": 0.0001929592616754391, |
| "loss": 0.6389, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.14783603202311443, |
| "grad_norm": 1.520571231842041, |
| "learning_rate": 0.00019290165330663336, |
| "loss": 1.0456, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.1483175826160236, |
| "grad_norm": 2.8603453636169434, |
| "learning_rate": 0.00019284381888800647, |
| "loss": 0.9411, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14879913320893276, |
| "grad_norm": 3.3105409145355225, |
| "learning_rate": 0.00019278575856028206, |
| "loss": 0.9477, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.14928068380184192, |
| "grad_norm": 2.5645644664764404, |
| "learning_rate": 0.00019272747246473345, |
| "loss": 0.54, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1497622343947511, |
| "grad_norm": 10.714997291564941, |
| "learning_rate": 0.00019266896074318334, |
| "loss": 1.3273, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.15024378498766028, |
| "grad_norm": 3.5276308059692383, |
| "learning_rate": 0.00019261022353800344, |
| "loss": 0.9848, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.15072533558056944, |
| "grad_norm": 4.048092365264893, |
| "learning_rate": 0.00019255126099211402, |
| "loss": 0.7335, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.1512068861734786, |
| "grad_norm": 1.8157590627670288, |
| "learning_rate": 0.00019249207324898376, |
| "loss": 1.0381, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.15168843676638777, |
| "grad_norm": 3.7452712059020996, |
| "learning_rate": 0.0001924326604526292, |
| "loss": 0.6327, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.15216998735929693, |
| "grad_norm": 3.773587942123413, |
| "learning_rate": 0.00019237302274761458, |
| "loss": 0.5525, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.1526515379522061, |
| "grad_norm": 3.6880061626434326, |
| "learning_rate": 0.0001923131602790513, |
| "loss": 1.2273, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.15313308854511526, |
| "grad_norm": 3.083782196044922, |
| "learning_rate": 0.00019225307319259768, |
| "loss": 1.0963, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.15361463913802445, |
| "grad_norm": 5.022973537445068, |
| "learning_rate": 0.00019219276163445862, |
| "loss": 0.7697, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1540961897309336, |
| "grad_norm": 3.2828280925750732, |
| "learning_rate": 0.00019213222575138522, |
| "loss": 1.069, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.15457774032384278, |
| "grad_norm": 4.293641567230225, |
| "learning_rate": 0.00019207146569067435, |
| "loss": 0.6866, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.15505929091675194, |
| "grad_norm": 4.453739166259766, |
| "learning_rate": 0.00019201048160016838, |
| "loss": 0.939, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1555408415096611, |
| "grad_norm": 2.3290882110595703, |
| "learning_rate": 0.00019194927362825478, |
| "loss": 0.8467, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.15602239210257027, |
| "grad_norm": 4.609375476837158, |
| "learning_rate": 0.00019188784192386587, |
| "loss": 1.064, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.15650394269547943, |
| "grad_norm": 2.483145236968994, |
| "learning_rate": 0.00019182618663647817, |
| "loss": 0.6174, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.15698549328838862, |
| "grad_norm": 4.485541820526123, |
| "learning_rate": 0.0001917643079161124, |
| "loss": 0.9137, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.15746704388129779, |
| "grad_norm": 4.135148525238037, |
| "learning_rate": 0.00019170220591333283, |
| "loss": 0.7697, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.15794859447420695, |
| "grad_norm": 4.6411333084106445, |
| "learning_rate": 0.00019163988077924713, |
| "loss": 1.1936, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.1584301450671161, |
| "grad_norm": 3.8218069076538086, |
| "learning_rate": 0.00019157733266550575, |
| "loss": 0.7865, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15891169566002528, |
| "grad_norm": 5.044341564178467, |
| "learning_rate": 0.00019151456172430183, |
| "loss": 1.0328, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15939324625293444, |
| "grad_norm": 5.211885929107666, |
| "learning_rate": 0.0001914515681083707, |
| "loss": 1.096, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1598747968458436, |
| "grad_norm": 4.778816223144531, |
| "learning_rate": 0.00019138835197098937, |
| "loss": 0.7164, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.1603563474387528, |
| "grad_norm": 3.1588540077209473, |
| "learning_rate": 0.00019132491346597643, |
| "loss": 1.1062, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.16083789803166196, |
| "grad_norm": 2.6961734294891357, |
| "learning_rate": 0.00019126125274769145, |
| "loss": 0.7453, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.16131944862457112, |
| "grad_norm": 2.4815406799316406, |
| "learning_rate": 0.00019119736997103476, |
| "loss": 0.9451, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.16180099921748028, |
| "grad_norm": 1.7432652711868286, |
| "learning_rate": 0.000191133265291447, |
| "loss": 0.8675, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.16228254981038945, |
| "grad_norm": 2.9783449172973633, |
| "learning_rate": 0.00019106893886490864, |
| "loss": 1.1438, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1627641004032986, |
| "grad_norm": 3.031538248062134, |
| "learning_rate": 0.00019100439084793989, |
| "loss": 0.7219, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.1632456509962078, |
| "grad_norm": 6.661623001098633, |
| "learning_rate": 0.00019093962139759998, |
| "loss": 1.3528, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.16372720158911697, |
| "grad_norm": 3.402763843536377, |
| "learning_rate": 0.000190874630671487, |
| "loss": 0.9113, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.16420875218202613, |
| "grad_norm": 3.233682632446289, |
| "learning_rate": 0.00019080941882773745, |
| "loss": 1.1422, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1646903027749353, |
| "grad_norm": 3.8870816230773926, |
| "learning_rate": 0.00019074398602502584, |
| "loss": 0.6947, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.16517185336784446, |
| "grad_norm": 4.976253509521484, |
| "learning_rate": 0.00019067833242256442, |
| "loss": 1.066, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.16565340396075362, |
| "grad_norm": 3.8390371799468994, |
| "learning_rate": 0.0001906124581801025, |
| "loss": 0.7719, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.16613495455366278, |
| "grad_norm": 6.181506633758545, |
| "learning_rate": 0.0001905463634579264, |
| "loss": 0.6594, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.16661650514657197, |
| "grad_norm": 2.2047243118286133, |
| "learning_rate": 0.00019048004841685888, |
| "loss": 0.8504, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.16709805573948114, |
| "grad_norm": 7.163036346435547, |
| "learning_rate": 0.00019041351321825883, |
| "loss": 1.1279, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1675796063323903, |
| "grad_norm": 3.1026716232299805, |
| "learning_rate": 0.00019034675802402068, |
| "loss": 1.1557, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.16806115692529947, |
| "grad_norm": 2.942558526992798, |
| "learning_rate": 0.00019027978299657436, |
| "loss": 0.4541, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.16854270751820863, |
| "grad_norm": 2.2551536560058594, |
| "learning_rate": 0.00019021258829888456, |
| "loss": 0.7569, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1690242581111178, |
| "grad_norm": 3.777118682861328, |
| "learning_rate": 0.00019014517409445052, |
| "loss": 1.0268, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16950580870402696, |
| "grad_norm": 2.533811330795288, |
| "learning_rate": 0.00019007754054730554, |
| "loss": 0.6828, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.16998735929693615, |
| "grad_norm": 3.8600101470947266, |
| "learning_rate": 0.00019000968782201675, |
| "loss": 0.5744, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1704689098898453, |
| "grad_norm": 3.22756028175354, |
| "learning_rate": 0.00018994161608368448, |
| "loss": 1.3307, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.17095046048275447, |
| "grad_norm": 2.4903550148010254, |
| "learning_rate": 0.00018987332549794196, |
| "loss": 1.0393, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.17143201107566364, |
| "grad_norm": 2.447472333908081, |
| "learning_rate": 0.00018980481623095502, |
| "loss": 0.7863, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1719135616685728, |
| "grad_norm": 4.140078544616699, |
| "learning_rate": 0.00018973608844942148, |
| "loss": 0.8217, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.17239511226148196, |
| "grad_norm": 2.152505874633789, |
| "learning_rate": 0.00018966714232057094, |
| "loss": 1.1535, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.17287666285439113, |
| "grad_norm": 3.840864419937134, |
| "learning_rate": 0.00018959797801216418, |
| "loss": 0.6864, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.17335821344730032, |
| "grad_norm": 4.183706283569336, |
| "learning_rate": 0.000189528595692493, |
| "loss": 0.7509, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.17383976404020948, |
| "grad_norm": 3.3056836128234863, |
| "learning_rate": 0.00018945899553037956, |
| "loss": 0.8942, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.17432131463311865, |
| "grad_norm": 4.057851314544678, |
| "learning_rate": 0.00018938917769517613, |
| "loss": 1.4257, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1748028652260278, |
| "grad_norm": 2.97546648979187, |
| "learning_rate": 0.00018931914235676458, |
| "loss": 0.9936, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.17528441581893697, |
| "grad_norm": 0.8970528841018677, |
| "learning_rate": 0.00018924888968555606, |
| "loss": 0.6008, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.17576596641184614, |
| "grad_norm": 4.94012975692749, |
| "learning_rate": 0.00018917841985249055, |
| "loss": 0.985, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1762475170047553, |
| "grad_norm": 3.416455030441284, |
| "learning_rate": 0.0001891077330290363, |
| "loss": 0.9864, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.1767290675976645, |
| "grad_norm": 2.498899221420288, |
| "learning_rate": 0.00018903682938718977, |
| "loss": 0.8499, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.17721061819057365, |
| "grad_norm": 3.770181655883789, |
| "learning_rate": 0.00018896570909947475, |
| "loss": 0.8731, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.17769216878348282, |
| "grad_norm": 4.896841049194336, |
| "learning_rate": 0.00018889437233894234, |
| "loss": 1.0454, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.17817371937639198, |
| "grad_norm": 2.264261245727539, |
| "learning_rate": 0.0001888228192791703, |
| "loss": 1.0061, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.17865526996930114, |
| "grad_norm": 3.097073793411255, |
| "learning_rate": 0.00018875105009426272, |
| "loss": 0.6645, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.1791368205622103, |
| "grad_norm": 1.4488924741744995, |
| "learning_rate": 0.00018867906495884955, |
| "loss": 1.1148, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.17961837115511947, |
| "grad_norm": 3.17714524269104, |
| "learning_rate": 0.0001886068640480862, |
| "loss": 0.7299, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.18009992174802866, |
| "grad_norm": 5.30600118637085, |
| "learning_rate": 0.00018853444753765306, |
| "loss": 0.797, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.18058147234093783, |
| "grad_norm": 2.218719959259033, |
| "learning_rate": 0.00018846181560375525, |
| "loss": 0.6802, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.181063022933847, |
| "grad_norm": 1.5951119661331177, |
| "learning_rate": 0.0001883889684231219, |
| "loss": 0.9983, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.18154457352675615, |
| "grad_norm": 4.742170810699463, |
| "learning_rate": 0.000188315906173006, |
| "loss": 1.3679, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.18202612411966532, |
| "grad_norm": 4.083590507507324, |
| "learning_rate": 0.0001882426290311838, |
| "loss": 0.6986, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.18250767471257448, |
| "grad_norm": 4.625772476196289, |
| "learning_rate": 0.00018816913717595445, |
| "loss": 0.9917, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.18298922530548364, |
| "grad_norm": 3.811450958251953, |
| "learning_rate": 0.00018809543078613953, |
| "loss": 0.7278, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18347077589839283, |
| "grad_norm": 2.0530614852905273, |
| "learning_rate": 0.00018802151004108263, |
| "loss": 0.574, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.183952326491302, |
| "grad_norm": 3.4029970169067383, |
| "learning_rate": 0.0001879473751206489, |
| "loss": 0.8849, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.18443387708421116, |
| "grad_norm": 5.864663124084473, |
| "learning_rate": 0.00018787302620522467, |
| "loss": 0.5902, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.18491542767712033, |
| "grad_norm": 2.628844976425171, |
| "learning_rate": 0.00018779846347571693, |
| "loss": 0.9543, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1853969782700295, |
| "grad_norm": 5.040539741516113, |
| "learning_rate": 0.0001877236871135529, |
| "loss": 0.7266, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.18587852886293865, |
| "grad_norm": 3.538259983062744, |
| "learning_rate": 0.00018764869730067968, |
| "loss": 1.0102, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.18636007945584784, |
| "grad_norm": 3.6347954273223877, |
| "learning_rate": 0.0001875734942195637, |
| "loss": 0.4102, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.186841630048757, |
| "grad_norm": 3.6020264625549316, |
| "learning_rate": 0.0001874980780531903, |
| "loss": 1.0311, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.18732318064166617, |
| "grad_norm": 3.3939337730407715, |
| "learning_rate": 0.00018742244898506337, |
| "loss": 0.8185, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.18780473123457533, |
| "grad_norm": 3.194336175918579, |
| "learning_rate": 0.00018734660719920475, |
| "loss": 1.0069, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1882862818274845, |
| "grad_norm": 3.444998025894165, |
| "learning_rate": 0.00018727055288015397, |
| "loss": 1.0042, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.18876783242039366, |
| "grad_norm": 1.6734000444412231, |
| "learning_rate": 0.00018719428621296764, |
| "loss": 1.1157, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.18924938301330282, |
| "grad_norm": 3.978752374649048, |
| "learning_rate": 0.00018711780738321897, |
| "loss": 0.5787, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.18973093360621202, |
| "grad_norm": 3.7722957134246826, |
| "learning_rate": 0.00018704111657699758, |
| "loss": 0.7111, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.19021248419912118, |
| "grad_norm": 1.650201678276062, |
| "learning_rate": 0.0001869642139809088, |
| "loss": 1.1438, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.19069403479203034, |
| "grad_norm": 8.692386627197266, |
| "learning_rate": 0.00018688709978207323, |
| "loss": 0.8174, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.1911755853849395, |
| "grad_norm": 1.9491546154022217, |
| "learning_rate": 0.00018680977416812644, |
| "loss": 1.0687, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.19165713597784867, |
| "grad_norm": 2.265296459197998, |
| "learning_rate": 0.00018673223732721837, |
| "loss": 0.739, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.19213868657075783, |
| "grad_norm": 3.7249464988708496, |
| "learning_rate": 0.0001866544894480129, |
| "loss": 1.0551, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.192620237163667, |
| "grad_norm": 2.0633394718170166, |
| "learning_rate": 0.00018657653071968747, |
| "loss": 0.8448, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1931017877565762, |
| "grad_norm": 3.448591947555542, |
| "learning_rate": 0.00018649836133193253, |
| "loss": 0.7965, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.19358333834948535, |
| "grad_norm": 1.4229322671890259, |
| "learning_rate": 0.00018641998147495112, |
| "loss": 0.4359, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.19406488894239451, |
| "grad_norm": 2.978297233581543, |
| "learning_rate": 0.00018634139133945837, |
| "loss": 0.632, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.19454643953530368, |
| "grad_norm": 2.407181978225708, |
| "learning_rate": 0.00018626259111668105, |
| "loss": 1.0891, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.19502799012821284, |
| "grad_norm": 3.066446542739868, |
| "learning_rate": 0.00018618358099835723, |
| "loss": 1.3327, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.195509540721122, |
| "grad_norm": 1.5488284826278687, |
| "learning_rate": 0.00018610436117673555, |
| "loss": 0.42, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.19599109131403117, |
| "grad_norm": 1.5752339363098145, |
| "learning_rate": 0.00018602493184457505, |
| "loss": 0.6942, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.19647264190694036, |
| "grad_norm": 3.5753307342529297, |
| "learning_rate": 0.00018594529319514437, |
| "loss": 0.9352, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.19695419249984952, |
| "grad_norm": 2.1090338230133057, |
| "learning_rate": 0.00018586544542222169, |
| "loss": 0.8635, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1974357430927587, |
| "grad_norm": 4.924689292907715, |
| "learning_rate": 0.00018578538872009384, |
| "loss": 0.6627, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.19791729368566785, |
| "grad_norm": 0.7094942331314087, |
| "learning_rate": 0.00018570512328355612, |
| "loss": 0.4438, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.198398844278577, |
| "grad_norm": 3.230691909790039, |
| "learning_rate": 0.00018562464930791167, |
| "loss": 0.7968, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.19888039487148618, |
| "grad_norm": 3.516850709915161, |
| "learning_rate": 0.00018554396698897116, |
| "loss": 0.7121, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.19936194546439534, |
| "grad_norm": 2.1137967109680176, |
| "learning_rate": 0.00018546307652305205, |
| "loss": 0.8463, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.19984349605730453, |
| "grad_norm": 2.3233392238616943, |
| "learning_rate": 0.00018538197810697842, |
| "loss": 0.8193, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.2003250466502137, |
| "grad_norm": 6.277778625488281, |
| "learning_rate": 0.0001853006719380802, |
| "loss": 0.8697, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.20080659724312286, |
| "grad_norm": 3.4568729400634766, |
| "learning_rate": 0.00018521915821419284, |
| "loss": 0.5947, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.20128814783603202, |
| "grad_norm": 2.8246490955352783, |
| "learning_rate": 0.00018513743713365698, |
| "loss": 0.8121, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.20176969842894119, |
| "grad_norm": 3.8738162517547607, |
| "learning_rate": 0.00018505550889531765, |
| "loss": 0.7239, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.20225124902185035, |
| "grad_norm": 3.2353687286376953, |
| "learning_rate": 0.00018497337369852395, |
| "loss": 0.6751, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2027327996147595, |
| "grad_norm": 5.0770039558410645, |
| "learning_rate": 0.0001848910317431286, |
| "loss": 1.2165, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2032143502076687, |
| "grad_norm": 3.751051902770996, |
| "learning_rate": 0.00018480848322948739, |
| "loss": 0.661, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.20369590080057787, |
| "grad_norm": 3.767159938812256, |
| "learning_rate": 0.00018472572835845873, |
| "loss": 0.7486, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.20417745139348703, |
| "grad_norm": 2.8906075954437256, |
| "learning_rate": 0.00018464276733140306, |
| "loss": 0.7135, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2046590019863962, |
| "grad_norm": 2.5436739921569824, |
| "learning_rate": 0.0001845596003501826, |
| "loss": 0.7165, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.20514055257930536, |
| "grad_norm": 2.0007944107055664, |
| "learning_rate": 0.00018447622761716057, |
| "loss": 0.8495, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.20562210317221452, |
| "grad_norm": 1.651319146156311, |
| "learning_rate": 0.00018439264933520084, |
| "loss": 0.9067, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2061036537651237, |
| "grad_norm": 1.483290433883667, |
| "learning_rate": 0.00018430886570766747, |
| "loss": 0.7198, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.20658520435803288, |
| "grad_norm": 4.108277797698975, |
| "learning_rate": 0.0001842248769384242, |
| "loss": 1.5626, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.20706675495094204, |
| "grad_norm": 2.4852449893951416, |
| "learning_rate": 0.00018414068323183375, |
| "loss": 0.7769, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2075483055438512, |
| "grad_norm": 4.069541931152344, |
| "learning_rate": 0.00018405628479275775, |
| "loss": 0.663, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.20802985613676037, |
| "grad_norm": 2.137789011001587, |
| "learning_rate": 0.00018397168182655583, |
| "loss": 0.5468, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.20851140672966953, |
| "grad_norm": 3.0423202514648438, |
| "learning_rate": 0.00018388687453908527, |
| "loss": 0.8064, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2089929573225787, |
| "grad_norm": 2.8734021186828613, |
| "learning_rate": 0.00018380186313670058, |
| "loss": 1.0275, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.20947450791548788, |
| "grad_norm": 2.459599733352661, |
| "learning_rate": 0.00018371664782625287, |
| "loss": 0.7337, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.20995605850839705, |
| "grad_norm": 4.528045177459717, |
| "learning_rate": 0.00018363122881508945, |
| "loss": 1.0966, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.2104376091013062, |
| "grad_norm": 2.942962169647217, |
| "learning_rate": 0.00018354560631105328, |
| "loss": 0.6714, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.21091915969421537, |
| "grad_norm": 5.867403507232666, |
| "learning_rate": 0.00018345978052248233, |
| "loss": 0.9835, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.21140071028712454, |
| "grad_norm": 1.642223834991455, |
| "learning_rate": 0.00018337375165820944, |
| "loss": 0.8807, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2118822608800337, |
| "grad_norm": 5.436519145965576, |
| "learning_rate": 0.00018328751992756137, |
| "loss": 0.8824, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.21236381147294286, |
| "grad_norm": 5.564542770385742, |
| "learning_rate": 0.0001832010855403586, |
| "loss": 1.0045, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.21284536206585206, |
| "grad_norm": 2.1191842555999756, |
| "learning_rate": 0.0001831144487069147, |
| "loss": 0.7834, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.21332691265876122, |
| "grad_norm": 2.1237781047821045, |
| "learning_rate": 0.0001830276096380358, |
| "loss": 0.3141, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.21380846325167038, |
| "grad_norm": 4.568416118621826, |
| "learning_rate": 0.0001829405685450202, |
| "loss": 1.2394, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.21429001384457955, |
| "grad_norm": 2.3887364864349365, |
| "learning_rate": 0.00018285332563965765, |
| "loss": 1.1355, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.2147715644374887, |
| "grad_norm": 4.617615222930908, |
| "learning_rate": 0.00018276588113422905, |
| "loss": 0.8803, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.21525311503039787, |
| "grad_norm": 2.1679558753967285, |
| "learning_rate": 0.00018267823524150575, |
| "loss": 0.9606, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.21573466562330704, |
| "grad_norm": 2.1338422298431396, |
| "learning_rate": 0.00018259038817474923, |
| "loss": 0.9403, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.21621621621621623, |
| "grad_norm": 2.041907548904419, |
| "learning_rate": 0.0001825023401477104, |
| "loss": 0.6543, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.2166977668091254, |
| "grad_norm": 1.8490098714828491, |
| "learning_rate": 0.0001824140913746291, |
| "loss": 0.9038, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.21717931740203456, |
| "grad_norm": 3.1015329360961914, |
| "learning_rate": 0.00018232564207023376, |
| "loss": 0.6252, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.21766086799494372, |
| "grad_norm": 2.5276334285736084, |
| "learning_rate": 0.00018223699244974064, |
| "loss": 0.653, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.21814241858785288, |
| "grad_norm": 2.063218116760254, |
| "learning_rate": 0.00018214814272885343, |
| "loss": 0.8085, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.21862396918076205, |
| "grad_norm": 2.227787494659424, |
| "learning_rate": 0.00018205909312376276, |
| "loss": 0.9719, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2191055197736712, |
| "grad_norm": 2.54950213432312, |
| "learning_rate": 0.00018196984385114554, |
| "loss": 0.9854, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2195870703665804, |
| "grad_norm": 2.7499783039093018, |
| "learning_rate": 0.0001818803951281646, |
| "loss": 0.7189, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.22006862095948956, |
| "grad_norm": 4.1418070793151855, |
| "learning_rate": 0.000181790747172468, |
| "loss": 0.7065, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.22055017155239873, |
| "grad_norm": 1.8730262517929077, |
| "learning_rate": 0.00018170090020218864, |
| "loss": 1.164, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2210317221453079, |
| "grad_norm": 2.4339802265167236, |
| "learning_rate": 0.00018161085443594365, |
| "loss": 0.614, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.22151327273821705, |
| "grad_norm": 1.8113713264465332, |
| "learning_rate": 0.00018152061009283382, |
| "loss": 0.5136, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.22199482333112622, |
| "grad_norm": 4.9851250648498535, |
| "learning_rate": 0.00018143016739244314, |
| "loss": 0.9962, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.22247637392403538, |
| "grad_norm": 3.5249273777008057, |
| "learning_rate": 0.0001813395265548383, |
| "loss": 0.7784, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.22295792451694457, |
| "grad_norm": 4.574691295623779, |
| "learning_rate": 0.00018124868780056814, |
| "loss": 0.9796, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.22343947510985374, |
| "grad_norm": 4.666108131408691, |
| "learning_rate": 0.0001811576513506629, |
| "loss": 0.749, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2239210257027629, |
| "grad_norm": 2.94307541847229, |
| "learning_rate": 0.00018106641742663397, |
| "loss": 0.9141, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.22440257629567206, |
| "grad_norm": 3.553006887435913, |
| "learning_rate": 0.00018097498625047328, |
| "loss": 0.7818, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.22488412688858123, |
| "grad_norm": 3.156838893890381, |
| "learning_rate": 0.00018088335804465258, |
| "loss": 0.8416, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2253656774814904, |
| "grad_norm": 3.0316860675811768, |
| "learning_rate": 0.00018079153303212318, |
| "loss": 0.777, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.22584722807439955, |
| "grad_norm": 4.4010443687438965, |
| "learning_rate": 0.0001806995114363152, |
| "loss": 1.5936, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.22632877866730874, |
| "grad_norm": 2.7999866008758545, |
| "learning_rate": 0.00018060729348113707, |
| "loss": 0.662, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2268103292602179, |
| "grad_norm": 1.6529977321624756, |
| "learning_rate": 0.00018051487939097505, |
| "loss": 0.6979, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.22729187985312707, |
| "grad_norm": 9.427469253540039, |
| "learning_rate": 0.00018042226939069255, |
| "loss": 1.0829, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.22777343044603623, |
| "grad_norm": 3.0257017612457275, |
| "learning_rate": 0.00018032946370562982, |
| "loss": 0.8094, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2282549810389454, |
| "grad_norm": 3.00710391998291, |
| "learning_rate": 0.00018023646256160313, |
| "loss": 0.4987, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.22873653163185456, |
| "grad_norm": 3.28983473777771, |
| "learning_rate": 0.00018014326618490437, |
| "loss": 0.7542, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.22921808222476375, |
| "grad_norm": 4.3024210929870605, |
| "learning_rate": 0.0001800498748023005, |
| "loss": 1.2465, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.22969963281767292, |
| "grad_norm": 1.8468772172927856, |
| "learning_rate": 0.000179956288641033, |
| "loss": 0.5073, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.23018118341058208, |
| "grad_norm": 6.304190635681152, |
| "learning_rate": 0.00017986250792881718, |
| "loss": 0.4624, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.23066273400349124, |
| "grad_norm": 2.0171825885772705, |
| "learning_rate": 0.00017976853289384184, |
| "loss": 0.886, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2311442845964004, |
| "grad_norm": 3.8221256732940674, |
| "learning_rate": 0.00017967436376476855, |
| "loss": 1.182, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.23162583518930957, |
| "grad_norm": 2.4394781589508057, |
| "learning_rate": 0.0001795800007707312, |
| "loss": 0.8758, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.23210738578221873, |
| "grad_norm": 3.8338496685028076, |
| "learning_rate": 0.00017948544414133534, |
| "loss": 0.4596, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.23258893637512792, |
| "grad_norm": 6.3291239738464355, |
| "learning_rate": 0.00017939069410665773, |
| "loss": 0.7862, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.2330704869680371, |
| "grad_norm": 3.5340969562530518, |
| "learning_rate": 0.0001792957508972457, |
| "loss": 0.857, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.23355203756094625, |
| "grad_norm": 3.2382187843322754, |
| "learning_rate": 0.00017920061474411658, |
| "loss": 0.4476, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.23403358815385542, |
| "grad_norm": 2.6530380249023438, |
| "learning_rate": 0.00017910528587875729, |
| "loss": 0.7092, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.23451513874676458, |
| "grad_norm": 2.1890785694122314, |
| "learning_rate": 0.00017900976453312352, |
| "loss": 0.6607, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.23499668933967374, |
| "grad_norm": 3.7793309688568115, |
| "learning_rate": 0.00017891405093963938, |
| "loss": 0.805, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.2354782399325829, |
| "grad_norm": 3.355741500854492, |
| "learning_rate": 0.00017881814533119675, |
| "loss": 1.1384, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2359597905254921, |
| "grad_norm": 4.145727157592773, |
| "learning_rate": 0.00017872204794115474, |
| "loss": 0.8834, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.23644134111840126, |
| "grad_norm": 2.3569817543029785, |
| "learning_rate": 0.0001786257590033391, |
| "loss": 0.678, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.23692289171131042, |
| "grad_norm": 3.5754706859588623, |
| "learning_rate": 0.00017852927875204163, |
| "loss": 1.1182, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2374044423042196, |
| "grad_norm": 4.857983112335205, |
| "learning_rate": 0.00017843260742201963, |
| "loss": 0.8987, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.23788599289712875, |
| "grad_norm": 1.9549206495285034, |
| "learning_rate": 0.00017833574524849535, |
| "loss": 0.8174, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.23836754349003791, |
| "grad_norm": 6.188207626342773, |
| "learning_rate": 0.00017823869246715553, |
| "loss": 1.0608, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.23884909408294708, |
| "grad_norm": 5.574507713317871, |
| "learning_rate": 0.00017814144931415043, |
| "loss": 1.1371, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.23933064467585627, |
| "grad_norm": 4.148524284362793, |
| "learning_rate": 0.0001780440160260938, |
| "loss": 1.213, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.23981219526876543, |
| "grad_norm": 1.1119147539138794, |
| "learning_rate": 0.00017794639284006184, |
| "loss": 0.9442, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2402937458616746, |
| "grad_norm": 2.266169548034668, |
| "learning_rate": 0.0001778485799935929, |
| "loss": 0.8459, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.24077529645458376, |
| "grad_norm": 3.1551809310913086, |
| "learning_rate": 0.00017775057772468679, |
| "loss": 1.3422, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.24125684704749292, |
| "grad_norm": 1.7529497146606445, |
| "learning_rate": 0.00017765238627180424, |
| "loss": 0.5388, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2417383976404021, |
| "grad_norm": 1.8274682760238647, |
| "learning_rate": 0.00017755400587386632, |
| "loss": 0.679, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.24221994823331125, |
| "grad_norm": 3.1747913360595703, |
| "learning_rate": 0.00017745543677025378, |
| "loss": 1.0639, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.24270149882622044, |
| "grad_norm": 3.80859637260437, |
| "learning_rate": 0.00017735667920080661, |
| "loss": 0.9085, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.2431830494191296, |
| "grad_norm": 3.507260322570801, |
| "learning_rate": 0.0001772577334058233, |
| "loss": 1.2649, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.24366460001203877, |
| "grad_norm": 2.216651201248169, |
| "learning_rate": 0.00017715859962606043, |
| "loss": 0.6616, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.24414615060494793, |
| "grad_norm": 2.3983209133148193, |
| "learning_rate": 0.00017705927810273187, |
| "loss": 0.5882, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2446277011978571, |
| "grad_norm": 2.6623141765594482, |
| "learning_rate": 0.00017695976907750844, |
| "loss": 0.5488, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.24510925179076626, |
| "grad_norm": 2.3083388805389404, |
| "learning_rate": 0.00017686007279251706, |
| "loss": 0.5942, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.24559080238367542, |
| "grad_norm": 2.294562578201294, |
| "learning_rate": 0.00017676018949034045, |
| "loss": 0.9633, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2460723529765846, |
| "grad_norm": 3.9046053886413574, |
| "learning_rate": 0.0001766601194140162, |
| "loss": 1.0614, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.24655390356949378, |
| "grad_norm": 2.0208675861358643, |
| "learning_rate": 0.0001765598628070365, |
| "loss": 0.8143, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.24703545416240294, |
| "grad_norm": 4.851849555969238, |
| "learning_rate": 0.00017645941991334732, |
| "loss": 1.0167, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2475170047553121, |
| "grad_norm": 2.7783102989196777, |
| "learning_rate": 0.00017635879097734804, |
| "loss": 0.9836, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.24799855534822127, |
| "grad_norm": 3.420109510421753, |
| "learning_rate": 0.00017625797624389055, |
| "loss": 0.8772, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.24848010594113043, |
| "grad_norm": 2.0368940830230713, |
| "learning_rate": 0.00017615697595827897, |
| "loss": 0.6702, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2489616565340396, |
| "grad_norm": 3.138749599456787, |
| "learning_rate": 0.0001760557903662688, |
| "loss": 0.8044, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.24944320712694878, |
| "grad_norm": 3.4814579486846924, |
| "learning_rate": 0.00017595441971406648, |
| "loss": 1.1824, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.24992475771985795, |
| "grad_norm": 2.961376190185547, |
| "learning_rate": 0.00017585286424832874, |
| "loss": 0.9286, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.2504063083127671, |
| "grad_norm": 2.2734553813934326, |
| "learning_rate": 0.00017575112421616202, |
| "loss": 0.8274, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2508878589056763, |
| "grad_norm": 2.522198438644409, |
| "learning_rate": 0.0001756491998651218, |
| "loss": 0.8287, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.25136940949858544, |
| "grad_norm": 3.1421101093292236, |
| "learning_rate": 0.0001755470914432121, |
| "loss": 1.0712, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2518509600914946, |
| "grad_norm": 2.7851064205169678, |
| "learning_rate": 0.0001754447991988848, |
| "loss": 0.7893, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.25233251068440377, |
| "grad_norm": 3.5308754444122314, |
| "learning_rate": 0.00017534232338103903, |
| "loss": 1.0271, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.25281406127731293, |
| "grad_norm": 2.5698599815368652, |
| "learning_rate": 0.0001752396642390207, |
| "loss": 1.2494, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.2532956118702221, |
| "grad_norm": 3.077817440032959, |
| "learning_rate": 0.00017513682202262163, |
| "loss": 1.2176, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.25377716246313126, |
| "grad_norm": 2.7685060501098633, |
| "learning_rate": 0.00017503379698207918, |
| "loss": 1.017, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.2542587130560405, |
| "grad_norm": 1.3066401481628418, |
| "learning_rate": 0.00017493058936807562, |
| "loss": 0.9528, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.25474026364894964, |
| "grad_norm": 2.54870343208313, |
| "learning_rate": 0.00017482719943173739, |
| "loss": 0.8665, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2552218142418588, |
| "grad_norm": 1.8381497859954834, |
| "learning_rate": 0.00017472362742463455, |
| "loss": 0.6502, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.25570336483476797, |
| "grad_norm": 3.0634922981262207, |
| "learning_rate": 0.0001746198735987802, |
| "loss": 0.7654, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.25618491542767713, |
| "grad_norm": 2.2630629539489746, |
| "learning_rate": 0.00017451593820662988, |
| "loss": 0.6992, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2566664660205863, |
| "grad_norm": 2.220201015472412, |
| "learning_rate": 0.00017441182150108086, |
| "loss": 1.0391, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.25714801661349546, |
| "grad_norm": 1.3255223035812378, |
| "learning_rate": 0.0001743075237354716, |
| "loss": 1.1717, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.2576295672064046, |
| "grad_norm": 0.7325202822685242, |
| "learning_rate": 0.00017420304516358113, |
| "loss": 0.6, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2581111177993138, |
| "grad_norm": 2.993429660797119, |
| "learning_rate": 0.00017409838603962843, |
| "loss": 0.8133, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.25859266839222295, |
| "grad_norm": 1.611694097518921, |
| "learning_rate": 0.00017399354661827178, |
| "loss": 0.8367, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2590742189851321, |
| "grad_norm": 4.344441890716553, |
| "learning_rate": 0.00017388852715460819, |
| "loss": 0.7335, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.2595557695780413, |
| "grad_norm": 2.118799924850464, |
| "learning_rate": 0.00017378332790417273, |
| "loss": 0.8942, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.26003732017095044, |
| "grad_norm": 3.4612903594970703, |
| "learning_rate": 0.00017367794912293794, |
| "loss": 0.997, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.26051887076385966, |
| "grad_norm": 2.6911776065826416, |
| "learning_rate": 0.00017357239106731317, |
| "loss": 0.59, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.2610004213567688, |
| "grad_norm": 3.0154871940612793, |
| "learning_rate": 0.00017346665399414405, |
| "loss": 0.8628, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.261481971949678, |
| "grad_norm": 2.9015610218048096, |
| "learning_rate": 0.00017336073816071168, |
| "loss": 0.7398, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.26196352254258715, |
| "grad_norm": 3.7456789016723633, |
| "learning_rate": 0.00017325464382473226, |
| "loss": 1.2309, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2624450731354963, |
| "grad_norm": 3.10002064704895, |
| "learning_rate": 0.00017314837124435622, |
| "loss": 0.8035, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2629266237284055, |
| "grad_norm": 3.767289876937866, |
| "learning_rate": 0.00017304192067816782, |
| "loss": 0.9462, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.26340817432131464, |
| "grad_norm": 1.6517798900604248, |
| "learning_rate": 0.00017293529238518422, |
| "loss": 0.7511, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.2638897249142238, |
| "grad_norm": 3.3719449043273926, |
| "learning_rate": 0.0001728284866248552, |
| "loss": 0.8092, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.26437127550713296, |
| "grad_norm": 2.3722736835479736, |
| "learning_rate": 0.00017272150365706224, |
| "loss": 0.8951, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2648528261000421, |
| "grad_norm": 4.863512992858887, |
| "learning_rate": 0.00017261434374211802, |
| "loss": 0.6823, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2653343766929513, |
| "grad_norm": 2.9136412143707275, |
| "learning_rate": 0.00017250700714076586, |
| "loss": 0.7394, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.26581592728586045, |
| "grad_norm": 1.59123957157135, |
| "learning_rate": 0.00017239949411417888, |
| "loss": 0.9012, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2662974778787696, |
| "grad_norm": 3.566239356994629, |
| "learning_rate": 0.0001722918049239596, |
| "loss": 1.3038, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.2667790284716788, |
| "grad_norm": 1.6607437133789062, |
| "learning_rate": 0.00017218393983213902, |
| "loss": 0.5825, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.267260579064588, |
| "grad_norm": 2.2083866596221924, |
| "learning_rate": 0.00017207589910117634, |
| "loss": 0.6033, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.26774212965749716, |
| "grad_norm": 6.2903523445129395, |
| "learning_rate": 0.00017196768299395797, |
| "loss": 0.9105, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2682236802504063, |
| "grad_norm": 1.9703174829483032, |
| "learning_rate": 0.00017185929177379714, |
| "loss": 0.7654, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2687052308433155, |
| "grad_norm": 3.087989568710327, |
| "learning_rate": 0.00017175072570443312, |
| "loss": 0.9272, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.26918678143622465, |
| "grad_norm": 4.169714450836182, |
| "learning_rate": 0.00017164198505003066, |
| "loss": 0.6082, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.2696683320291338, |
| "grad_norm": 2.0831711292266846, |
| "learning_rate": 0.0001715330700751793, |
| "loss": 0.5273, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.270149882622043, |
| "grad_norm": 2.3498497009277344, |
| "learning_rate": 0.00017142398104489273, |
| "loss": 0.6253, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.27063143321495214, |
| "grad_norm": 3.0425565242767334, |
| "learning_rate": 0.00017131471822460814, |
| "loss": 0.8767, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.2711129838078613, |
| "grad_norm": 3.1900765895843506, |
| "learning_rate": 0.00017120528188018565, |
| "loss": 0.8782, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.27159453440077047, |
| "grad_norm": 4.279084205627441, |
| "learning_rate": 0.00017109567227790754, |
| "loss": 0.5321, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.27207608499367963, |
| "grad_norm": 1.9123119115829468, |
| "learning_rate": 0.00017098588968447766, |
| "loss": 1.0175, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2725576355865888, |
| "grad_norm": 3.689532995223999, |
| "learning_rate": 0.00017087593436702084, |
| "loss": 0.9917, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.27303918617949796, |
| "grad_norm": 2.9351844787597656, |
| "learning_rate": 0.00017076580659308222, |
| "loss": 1.1911, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2735207367724071, |
| "grad_norm": 1.8407542705535889, |
| "learning_rate": 0.00017065550663062634, |
| "loss": 0.6958, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.27400228736531634, |
| "grad_norm": 3.2096939086914062, |
| "learning_rate": 0.00017054503474803702, |
| "loss": 1.0163, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.2744838379582255, |
| "grad_norm": 4.783024311065674, |
| "learning_rate": 0.00017043439121411618, |
| "loss": 0.9086, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.27496538855113467, |
| "grad_norm": 3.336801290512085, |
| "learning_rate": 0.0001703235762980835, |
| "loss": 0.6395, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.27544693914404383, |
| "grad_norm": 6.958205223083496, |
| "learning_rate": 0.00017021259026957567, |
| "loss": 1.0467, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.275928489736953, |
| "grad_norm": 3.0236618518829346, |
| "learning_rate": 0.00017010143339864562, |
| "loss": 0.8194, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.27641004032986216, |
| "grad_norm": 2.2934484481811523, |
| "learning_rate": 0.0001699901059557621, |
| "loss": 0.7926, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.2768915909227713, |
| "grad_norm": 3.157127618789673, |
| "learning_rate": 0.00016987860821180895, |
| "loss": 0.849, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2773731415156805, |
| "grad_norm": 2.1495730876922607, |
| "learning_rate": 0.00016976694043808416, |
| "loss": 0.9138, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.27785469210858965, |
| "grad_norm": 2.7366628646850586, |
| "learning_rate": 0.00016965510290629972, |
| "loss": 0.7798, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2783362427014988, |
| "grad_norm": 1.7467342615127563, |
| "learning_rate": 0.00016954309588858044, |
| "loss": 0.5792, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.278817793294408, |
| "grad_norm": 2.1908276081085205, |
| "learning_rate": 0.00016943091965746366, |
| "loss": 0.6596, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.27929934388731714, |
| "grad_norm": 4.674194812774658, |
| "learning_rate": 0.00016931857448589845, |
| "loss": 0.9044, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2797808944802263, |
| "grad_norm": 1.4668383598327637, |
| "learning_rate": 0.00016920606064724488, |
| "loss": 0.5977, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.28026244507313547, |
| "grad_norm": 1.8525112867355347, |
| "learning_rate": 0.00016909337841527344, |
| "loss": 0.7542, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.2807439956660447, |
| "grad_norm": 2.2113821506500244, |
| "learning_rate": 0.00016898052806416444, |
| "loss": 0.8215, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.28122554625895385, |
| "grad_norm": 3.6429834365844727, |
| "learning_rate": 0.00016886750986850718, |
| "loss": 1.0825, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.281707096851863, |
| "grad_norm": 2.043597459793091, |
| "learning_rate": 0.00016875432410329934, |
| "loss": 0.5168, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.2821886474447722, |
| "grad_norm": 3.5646705627441406, |
| "learning_rate": 0.0001686409710439464, |
| "loss": 0.8117, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.28267019803768134, |
| "grad_norm": 2.080101251602173, |
| "learning_rate": 0.00016852745096626088, |
| "loss": 1.0391, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.2831517486305905, |
| "grad_norm": 4.150976181030273, |
| "learning_rate": 0.0001684137641464617, |
| "loss": 1.3853, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.28363329922349967, |
| "grad_norm": 3.107977867126465, |
| "learning_rate": 0.0001682999108611735, |
| "loss": 0.6732, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.28411484981640883, |
| "grad_norm": 1.3921109437942505, |
| "learning_rate": 0.00016818589138742587, |
| "loss": 0.5883, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.284596400409318, |
| "grad_norm": 3.5491769313812256, |
| "learning_rate": 0.00016807170600265296, |
| "loss": 0.7489, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.28507795100222716, |
| "grad_norm": 2.4986155033111572, |
| "learning_rate": 0.00016795735498469246, |
| "loss": 0.9951, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2855595015951363, |
| "grad_norm": 2.9514119625091553, |
| "learning_rate": 0.00016784283861178513, |
| "loss": 0.9067, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.2860410521880455, |
| "grad_norm": 2.5975046157836914, |
| "learning_rate": 0.00016772815716257412, |
| "loss": 0.6998, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.28652260278095465, |
| "grad_norm": 3.2922167778015137, |
| "learning_rate": 0.00016761331091610416, |
| "loss": 1.1803, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.28700415337386387, |
| "grad_norm": 2.982027053833008, |
| "learning_rate": 0.00016749830015182107, |
| "loss": 0.9353, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.28748570396677303, |
| "grad_norm": 1.3692221641540527, |
| "learning_rate": 0.00016738312514957086, |
| "loss": 0.5095, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.2879672545596822, |
| "grad_norm": 2.186056613922119, |
| "learning_rate": 0.00016726778618959926, |
| "loss": 0.8501, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.28844880515259136, |
| "grad_norm": 1.2103948593139648, |
| "learning_rate": 0.00016715228355255093, |
| "loss": 0.5927, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2889303557455005, |
| "grad_norm": 2.340742349624634, |
| "learning_rate": 0.00016703661751946874, |
| "loss": 0.7416, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2894119063384097, |
| "grad_norm": 3.421393871307373, |
| "learning_rate": 0.00016692078837179318, |
| "loss": 1.001, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.28989345693131885, |
| "grad_norm": 2.6955082416534424, |
| "learning_rate": 0.00016680479639136163, |
| "loss": 0.7825, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.290375007524228, |
| "grad_norm": 2.7069554328918457, |
| "learning_rate": 0.0001666886418604077, |
| "loss": 0.5969, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2908565581171372, |
| "grad_norm": 2.855012893676758, |
| "learning_rate": 0.0001665723250615604, |
| "loss": 0.6626, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.29133810871004634, |
| "grad_norm": 3.311091423034668, |
| "learning_rate": 0.00016645584627784381, |
| "loss": 1.0357, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.2918196593029555, |
| "grad_norm": 3.6477015018463135, |
| "learning_rate": 0.0001663392057926759, |
| "loss": 0.8369, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.29230120989586467, |
| "grad_norm": 4.4921345710754395, |
| "learning_rate": 0.00016622240388986824, |
| "loss": 0.6446, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.29278276048877383, |
| "grad_norm": 7.8272013664245605, |
| "learning_rate": 0.0001661054408536251, |
| "loss": 1.0073, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.293264311081683, |
| "grad_norm": 3.6183552742004395, |
| "learning_rate": 0.00016598831696854288, |
| "loss": 1.0384, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2937458616745922, |
| "grad_norm": 3.8852734565734863, |
| "learning_rate": 0.00016587103251960937, |
| "loss": 0.9137, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2942274122675014, |
| "grad_norm": 4.892505645751953, |
| "learning_rate": 0.00016575358779220294, |
| "loss": 0.8421, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.29470896286041054, |
| "grad_norm": 5.816843032836914, |
| "learning_rate": 0.00016563598307209204, |
| "loss": 0.9884, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2951905134533197, |
| "grad_norm": 3.2219488620758057, |
| "learning_rate": 0.0001655182186454344, |
| "loss": 0.549, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.29567206404622887, |
| "grad_norm": 1.8293180465698242, |
| "learning_rate": 0.00016540029479877638, |
| "loss": 1.0032, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.29615361463913803, |
| "grad_norm": 2.564122200012207, |
| "learning_rate": 0.00016528221181905217, |
| "loss": 0.8074, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2966351652320472, |
| "grad_norm": 2.1164186000823975, |
| "learning_rate": 0.00016516396999358322, |
| "loss": 0.7476, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.29711671582495636, |
| "grad_norm": 1.6424773931503296, |
| "learning_rate": 0.00016504556961007748, |
| "loss": 0.505, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.2975982664178655, |
| "grad_norm": 2.669262170791626, |
| "learning_rate": 0.00016492701095662866, |
| "loss": 0.5681, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2980798170107747, |
| "grad_norm": 3.94612455368042, |
| "learning_rate": 0.00016480829432171564, |
| "loss": 1.0503, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.29856136760368385, |
| "grad_norm": 1.6237881183624268, |
| "learning_rate": 0.0001646894199942017, |
| "loss": 0.7969, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.299042918196593, |
| "grad_norm": 1.9715096950531006, |
| "learning_rate": 0.0001645703882633338, |
| "loss": 0.9146, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2995244687895022, |
| "grad_norm": 4.666619300842285, |
| "learning_rate": 0.00016445119941874183, |
| "loss": 0.4521, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.30000601938241134, |
| "grad_norm": 5.420642375946045, |
| "learning_rate": 0.00016433185375043809, |
| "loss": 0.8339, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.30048756997532056, |
| "grad_norm": 3.869925022125244, |
| "learning_rate": 0.00016421235154881638, |
| "loss": 0.9278, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3009691205682297, |
| "grad_norm": 2.435767889022827, |
| "learning_rate": 0.00016409269310465146, |
| "loss": 0.4431, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.3014506711611389, |
| "grad_norm": 3.733633041381836, |
| "learning_rate": 0.00016397287870909813, |
| "loss": 1.1122, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.30193222175404805, |
| "grad_norm": 2.7327606678009033, |
| "learning_rate": 0.00016385290865369079, |
| "loss": 1.8192, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3024137723469572, |
| "grad_norm": 2.777545213699341, |
| "learning_rate": 0.00016373278323034255, |
| "loss": 0.762, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3028953229398664, |
| "grad_norm": 3.036078691482544, |
| "learning_rate": 0.0001636125027313445, |
| "loss": 0.9722, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.30337687353277554, |
| "grad_norm": 1.9548509120941162, |
| "learning_rate": 0.00016349206744936518, |
| "loss": 0.909, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3038584241256847, |
| "grad_norm": 1.862438678741455, |
| "learning_rate": 0.00016337147767744967, |
| "loss": 0.9632, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.30433997471859386, |
| "grad_norm": 3.5347414016723633, |
| "learning_rate": 0.0001632507337090189, |
| "loss": 0.6586, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.304821525311503, |
| "grad_norm": 3.137190818786621, |
| "learning_rate": 0.0001631298358378692, |
| "loss": 0.8476, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.3053030759044122, |
| "grad_norm": 4.065056800842285, |
| "learning_rate": 0.00016300878435817113, |
| "loss": 0.9919, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.30578462649732135, |
| "grad_norm": 3.0076253414154053, |
| "learning_rate": 0.00016288757956446918, |
| "loss": 0.8693, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.3062661770902305, |
| "grad_norm": 4.386903762817383, |
| "learning_rate": 0.00016276622175168083, |
| "loss": 0.5319, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.30674772768313974, |
| "grad_norm": 2.739208459854126, |
| "learning_rate": 0.0001626447112150959, |
| "loss": 1.0968, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3072292782760489, |
| "grad_norm": 2.428248882293701, |
| "learning_rate": 0.00016252304825037576, |
| "loss": 0.9611, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.30771082886895806, |
| "grad_norm": 4.173031330108643, |
| "learning_rate": 0.0001624012331535528, |
| "loss": 1.0045, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3081923794618672, |
| "grad_norm": 2.0858750343322754, |
| "learning_rate": 0.00016227926622102947, |
| "loss": 0.4524, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3086739300547764, |
| "grad_norm": 3.544163703918457, |
| "learning_rate": 0.00016215714774957772, |
| "loss": 1.0543, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.30915548064768555, |
| "grad_norm": 5.18956995010376, |
| "learning_rate": 0.00016203487803633822, |
| "loss": 1.7808, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.3096370312405947, |
| "grad_norm": 4.177835464477539, |
| "learning_rate": 0.00016191245737881956, |
| "loss": 0.6678, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3101185818335039, |
| "grad_norm": 2.31697678565979, |
| "learning_rate": 0.00016178988607489777, |
| "loss": 0.8606, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.31060013242641304, |
| "grad_norm": 3.908705949783325, |
| "learning_rate": 0.00016166716442281528, |
| "loss": 0.9769, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.3110816830193222, |
| "grad_norm": 3.6292724609375, |
| "learning_rate": 0.0001615442927211805, |
| "loss": 0.7383, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.31156323361223137, |
| "grad_norm": 1.9753597974777222, |
| "learning_rate": 0.0001614212712689668, |
| "loss": 0.6893, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.31204478420514054, |
| "grad_norm": 2.6053380966186523, |
| "learning_rate": 0.00016129810036551198, |
| "loss": 0.8903, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3125263347980497, |
| "grad_norm": 4.439826965332031, |
| "learning_rate": 0.00016117478031051755, |
| "loss": 0.6233, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.31300788539095886, |
| "grad_norm": 2.590153694152832, |
| "learning_rate": 0.00016105131140404787, |
| "loss": 0.8171, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3134894359838681, |
| "grad_norm": 2.094128370285034, |
| "learning_rate": 0.00016092769394652947, |
| "loss": 0.9461, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.31397098657677724, |
| "grad_norm": 2.2599637508392334, |
| "learning_rate": 0.0001608039282387504, |
| "loss": 0.7661, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3144525371696864, |
| "grad_norm": 2.008302927017212, |
| "learning_rate": 0.00016068001458185936, |
| "loss": 0.7038, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.31493408776259557, |
| "grad_norm": 1.2377398014068604, |
| "learning_rate": 0.0001605559532773651, |
| "loss": 0.7293, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.31541563835550473, |
| "grad_norm": 1.3407477140426636, |
| "learning_rate": 0.00016043174462713566, |
| "loss": 0.4857, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.3158971889484139, |
| "grad_norm": 1.853411078453064, |
| "learning_rate": 0.00016030738893339753, |
| "loss": 0.8002, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.31637873954132306, |
| "grad_norm": 2.6167731285095215, |
| "learning_rate": 0.00016018288649873497, |
| "loss": 0.8817, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3168602901342322, |
| "grad_norm": 2.0071139335632324, |
| "learning_rate": 0.0001600582376260894, |
| "loss": 0.7619, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3173418407271414, |
| "grad_norm": 1.7096420526504517, |
| "learning_rate": 0.00015993344261875847, |
| "loss": 0.3737, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.31782339132005055, |
| "grad_norm": 1.3771498203277588, |
| "learning_rate": 0.00015980850178039547, |
| "loss": 0.8174, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3183049419129597, |
| "grad_norm": 1.803589105606079, |
| "learning_rate": 0.00015968341541500842, |
| "loss": 0.886, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.3187864925058689, |
| "grad_norm": 2.595926523208618, |
| "learning_rate": 0.00015955818382695953, |
| "loss": 0.6081, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.31926804309877804, |
| "grad_norm": 3.571012496948242, |
| "learning_rate": 0.00015943280732096438, |
| "loss": 0.8304, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3197495936916872, |
| "grad_norm": 3.7733047008514404, |
| "learning_rate": 0.00015930728620209113, |
| "loss": 0.9931, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.3202311442845964, |
| "grad_norm": 2.0570201873779297, |
| "learning_rate": 0.00015918162077575976, |
| "loss": 0.6084, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3207126948775056, |
| "grad_norm": 2.1481802463531494, |
| "learning_rate": 0.00015905581134774153, |
| "loss": 0.637, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.32119424547041475, |
| "grad_norm": 2.172410488128662, |
| "learning_rate": 0.0001589298582241579, |
| "loss": 0.8891, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3216757960633239, |
| "grad_norm": 1.3450566530227661, |
| "learning_rate": 0.00015880376171148014, |
| "loss": 0.743, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3221573466562331, |
| "grad_norm": 2.8985812664031982, |
| "learning_rate": 0.00015867752211652831, |
| "loss": 1.0136, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.32263889724914224, |
| "grad_norm": 2.9778859615325928, |
| "learning_rate": 0.00015855113974647068, |
| "loss": 1.1762, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3231204478420514, |
| "grad_norm": 2.839142322540283, |
| "learning_rate": 0.0001584246149088229, |
| "loss": 0.8616, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.32360199843496057, |
| "grad_norm": 2.5461061000823975, |
| "learning_rate": 0.0001582979479114472, |
| "loss": 0.883, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.32408354902786973, |
| "grad_norm": 2.5526607036590576, |
| "learning_rate": 0.0001581711390625519, |
| "loss": 1.0749, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.3245650996207789, |
| "grad_norm": 1.8686296939849854, |
| "learning_rate": 0.0001580441886706903, |
| "loss": 0.5283, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.32504665021368806, |
| "grad_norm": 3.071974277496338, |
| "learning_rate": 0.00015791709704476015, |
| "loss": 0.8488, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3255282008065972, |
| "grad_norm": 2.5691075325012207, |
| "learning_rate": 0.00015778986449400292, |
| "loss": 0.6839, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.3260097513995064, |
| "grad_norm": 3.5297348499298096, |
| "learning_rate": 0.00015766249132800292, |
| "loss": 0.7555, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.3264913019924156, |
| "grad_norm": 1.9047622680664062, |
| "learning_rate": 0.00015753497785668663, |
| "loss": 0.8286, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.32697285258532477, |
| "grad_norm": 1.899084210395813, |
| "learning_rate": 0.00015740732439032187, |
| "loss": 0.8663, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.32745440317823393, |
| "grad_norm": 1.7221845388412476, |
| "learning_rate": 0.00015727953123951716, |
| "loss": 1.0307, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3279359537711431, |
| "grad_norm": 2.8478286266326904, |
| "learning_rate": 0.00015715159871522086, |
| "loss": 0.9206, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.32841750436405226, |
| "grad_norm": 2.7343266010284424, |
| "learning_rate": 0.00015702352712872056, |
| "loss": 0.7745, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.3288990549569614, |
| "grad_norm": 2.272082567214966, |
| "learning_rate": 0.00015689531679164204, |
| "loss": 0.7247, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3293806055498706, |
| "grad_norm": 3.404125690460205, |
| "learning_rate": 0.00015676696801594886, |
| "loss": 1.185, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.32986215614277975, |
| "grad_norm": 2.005213499069214, |
| "learning_rate": 0.00015663848111394132, |
| "loss": 1.0382, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.3303437067356889, |
| "grad_norm": 3.435062885284424, |
| "learning_rate": 0.00015650985639825585, |
| "loss": 0.937, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.3308252573285981, |
| "grad_norm": 2.289677858352661, |
| "learning_rate": 0.00015638109418186424, |
| "loss": 0.5748, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.33130680792150724, |
| "grad_norm": 3.6423025131225586, |
| "learning_rate": 0.00015625219477807277, |
| "loss": 0.9767, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3317883585144164, |
| "grad_norm": 2.872910737991333, |
| "learning_rate": 0.00015612315850052166, |
| "loss": 0.6958, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.33226990910732557, |
| "grad_norm": 3.16129469871521, |
| "learning_rate": 0.00015599398566318396, |
| "loss": 1.0489, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.33275145970023473, |
| "grad_norm": 2.4782161712646484, |
| "learning_rate": 0.00015586467658036524, |
| "loss": 0.6644, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.33323301029314395, |
| "grad_norm": 4.447420597076416, |
| "learning_rate": 0.00015573523156670244, |
| "loss": 1.2536, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3337145608860531, |
| "grad_norm": 4.194264888763428, |
| "learning_rate": 0.0001556056509371633, |
| "loss": 0.9997, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.3341961114789623, |
| "grad_norm": 3.0863115787506104, |
| "learning_rate": 0.00015547593500704547, |
| "loss": 0.9827, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.33467766207187144, |
| "grad_norm": 7.232437610626221, |
| "learning_rate": 0.00015534608409197592, |
| "loss": 0.5336, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3351592126647806, |
| "grad_norm": 2.40484881401062, |
| "learning_rate": 0.00015521609850791004, |
| "loss": 0.4391, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.33564076325768977, |
| "grad_norm": 2.6389102935791016, |
| "learning_rate": 0.0001550859785711308, |
| "loss": 0.8676, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.33612231385059893, |
| "grad_norm": 1.548851490020752, |
| "learning_rate": 0.0001549557245982482, |
| "loss": 0.8892, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3366038644435081, |
| "grad_norm": 1.7563083171844482, |
| "learning_rate": 0.00015482533690619837, |
| "loss": 0.755, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.33708541503641726, |
| "grad_norm": 4.792996883392334, |
| "learning_rate": 0.00015469481581224272, |
| "loss": 0.6721, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3375669656293264, |
| "grad_norm": 2.4070699214935303, |
| "learning_rate": 0.0001545641616339673, |
| "loss": 0.8127, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.3380485162222356, |
| "grad_norm": 3.3054072856903076, |
| "learning_rate": 0.00015443337468928206, |
| "loss": 0.5389, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.33853006681514475, |
| "grad_norm": 2.826061725616455, |
| "learning_rate": 0.00015430245529641986, |
| "loss": 0.58, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3390116174080539, |
| "grad_norm": 2.2573275566101074, |
| "learning_rate": 0.00015417140377393596, |
| "loss": 0.9465, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3394931680009631, |
| "grad_norm": 2.5977699756622314, |
| "learning_rate": 0.00015404022044070704, |
| "loss": 0.707, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.3399747185938723, |
| "grad_norm": 4.191378116607666, |
| "learning_rate": 0.00015390890561593052, |
| "loss": 0.8705, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.34045626918678146, |
| "grad_norm": 2.229558229446411, |
| "learning_rate": 0.0001537774596191238, |
| "loss": 0.6722, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3409378197796906, |
| "grad_norm": 3.5231106281280518, |
| "learning_rate": 0.00015364588277012344, |
| "loss": 0.6765, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.3414193703725998, |
| "grad_norm": 2.406374216079712, |
| "learning_rate": 0.00015351417538908435, |
| "loss": 1.1367, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.34190092096550895, |
| "grad_norm": 2.4399731159210205, |
| "learning_rate": 0.0001533823377964791, |
| "loss": 0.8311, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3423824715584181, |
| "grad_norm": 3.170764207839966, |
| "learning_rate": 0.00015325037031309704, |
| "loss": 1.1677, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.3428640221513273, |
| "grad_norm": 2.4215619564056396, |
| "learning_rate": 0.00015311827326004363, |
| "loss": 0.7897, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.34334557274423644, |
| "grad_norm": 2.092327117919922, |
| "learning_rate": 0.0001529860469587396, |
| "loss": 0.6389, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3438271233371456, |
| "grad_norm": 2.5935378074645996, |
| "learning_rate": 0.00015285369173092015, |
| "loss": 0.6437, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.34430867393005476, |
| "grad_norm": 2.7213246822357178, |
| "learning_rate": 0.00015272120789863413, |
| "loss": 1.2337, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.34479022452296393, |
| "grad_norm": 2.3775582313537598, |
| "learning_rate": 0.00015258859578424342, |
| "loss": 0.9354, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3452717751158731, |
| "grad_norm": 2.3401012420654297, |
| "learning_rate": 0.00015245585571042194, |
| "loss": 0.573, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.34575332570878226, |
| "grad_norm": 2.1838347911834717, |
| "learning_rate": 0.00015232298800015506, |
| "loss": 0.5484, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.3462348763016914, |
| "grad_norm": 2.1311140060424805, |
| "learning_rate": 0.00015218999297673862, |
| "loss": 0.695, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.34671642689460064, |
| "grad_norm": 3.2175323963165283, |
| "learning_rate": 0.0001520568709637783, |
| "loss": 0.8665, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3471979774875098, |
| "grad_norm": 2.6574606895446777, |
| "learning_rate": 0.00015192362228518875, |
| "loss": 0.6068, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.34767952808041896, |
| "grad_norm": 3.131312847137451, |
| "learning_rate": 0.00015179024726519284, |
| "loss": 1.0317, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.34816107867332813, |
| "grad_norm": 2.886371612548828, |
| "learning_rate": 0.00015165674622832085, |
| "loss": 0.6881, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.3486426292662373, |
| "grad_norm": 2.9966607093811035, |
| "learning_rate": 0.0001515231194994097, |
| "loss": 1.6059, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.34912417985914646, |
| "grad_norm": 5.8844404220581055, |
| "learning_rate": 0.00015138936740360207, |
| "loss": 0.8733, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3496057304520556, |
| "grad_norm": 2.432682752609253, |
| "learning_rate": 0.00015125549026634585, |
| "loss": 0.4045, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3500872810449648, |
| "grad_norm": 2.9346506595611572, |
| "learning_rate": 0.00015112148841339295, |
| "loss": 0.6577, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.35056883163787395, |
| "grad_norm": 3.6856017112731934, |
| "learning_rate": 0.000150987362170799, |
| "loss": 0.7203, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.3510503822307831, |
| "grad_norm": 3.772768974304199, |
| "learning_rate": 0.00015085311186492206, |
| "loss": 0.961, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3515319328236923, |
| "grad_norm": 2.937117576599121, |
| "learning_rate": 0.00015071873782242223, |
| "loss": 0.5519, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.35201348341660144, |
| "grad_norm": 3.9652099609375, |
| "learning_rate": 0.0001505842403702606, |
| "loss": 0.9024, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3524950340095106, |
| "grad_norm": 2.1614396572113037, |
| "learning_rate": 0.00015044961983569856, |
| "loss": 0.6737, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.3529765846024198, |
| "grad_norm": 2.625931978225708, |
| "learning_rate": 0.00015031487654629702, |
| "loss": 0.6265, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.353458135195329, |
| "grad_norm": 3.378445863723755, |
| "learning_rate": 0.00015018001082991553, |
| "loss": 0.6916, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.35393968578823815, |
| "grad_norm": 1.6671521663665771, |
| "learning_rate": 0.0001500450230147116, |
| "loss": 0.5809, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3544212363811473, |
| "grad_norm": 2.095771074295044, |
| "learning_rate": 0.00014990991342913974, |
| "loss": 1.0634, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.35490278697405647, |
| "grad_norm": 2.0476694107055664, |
| "learning_rate": 0.00014977468240195084, |
| "loss": 0.7652, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.35538433756696564, |
| "grad_norm": 2.9106428623199463, |
| "learning_rate": 0.0001496393302621912, |
| "loss": 1.1553, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3558658881598748, |
| "grad_norm": 2.1478304862976074, |
| "learning_rate": 0.00014950385733920188, |
| "loss": 0.7608, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.35634743875278396, |
| "grad_norm": 1.8038551807403564, |
| "learning_rate": 0.00014936826396261783, |
| "loss": 0.6694, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3568289893456931, |
| "grad_norm": 3.3769569396972656, |
| "learning_rate": 0.00014923255046236705, |
| "loss": 1.2689, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3573105399386023, |
| "grad_norm": 4.4860334396362305, |
| "learning_rate": 0.00014909671716866984, |
| "loss": 0.852, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.35779209053151145, |
| "grad_norm": 4.017233371734619, |
| "learning_rate": 0.00014896076441203802, |
| "loss": 0.8332, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.3582736411244206, |
| "grad_norm": 2.824586868286133, |
| "learning_rate": 0.000148824692523274, |
| "loss": 1.215, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.3587551917173298, |
| "grad_norm": 3.6129872798919678, |
| "learning_rate": 0.0001486885018334702, |
| "loss": 1.1055, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.35923674231023894, |
| "grad_norm": 3.1985294818878174, |
| "learning_rate": 0.00014855219267400797, |
| "loss": 0.8963, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.35971829290314816, |
| "grad_norm": 3.0053601264953613, |
| "learning_rate": 0.00014841576537655705, |
| "loss": 0.8728, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3601998434960573, |
| "grad_norm": 2.2497479915618896, |
| "learning_rate": 0.00014827922027307451, |
| "loss": 0.9084, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.3606813940889665, |
| "grad_norm": 3.9402804374694824, |
| "learning_rate": 0.00014814255769580415, |
| "loss": 0.609, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.36116294468187565, |
| "grad_norm": 2.3622281551361084, |
| "learning_rate": 0.00014800577797727558, |
| "loss": 1.0189, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3616444952747848, |
| "grad_norm": 1.9683716297149658, |
| "learning_rate": 0.00014786888145030343, |
| "loss": 0.8275, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.362126045867694, |
| "grad_norm": 0.9872303009033203, |
| "learning_rate": 0.0001477318684479866, |
| "loss": 0.3827, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.36260759646060314, |
| "grad_norm": 3.7244014739990234, |
| "learning_rate": 0.00014759473930370736, |
| "loss": 0.6359, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.3630891470535123, |
| "grad_norm": 1.6438435316085815, |
| "learning_rate": 0.0001474574943511306, |
| "loss": 0.879, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.36357069764642147, |
| "grad_norm": 2.410429000854492, |
| "learning_rate": 0.0001473201339242029, |
| "loss": 1.2406, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.36405224823933063, |
| "grad_norm": 5.812607765197754, |
| "learning_rate": 0.000147182658357152, |
| "loss": 0.4273, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.3645337988322398, |
| "grad_norm": 3.4380412101745605, |
| "learning_rate": 0.00014704506798448566, |
| "loss": 0.5765, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.36501534942514896, |
| "grad_norm": 10.264464378356934, |
| "learning_rate": 0.00014690736314099101, |
| "loss": 0.8553, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3654969000180581, |
| "grad_norm": 2.479084014892578, |
| "learning_rate": 0.00014676954416173373, |
| "loss": 0.8062, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.3659784506109673, |
| "grad_norm": 3.285261392593384, |
| "learning_rate": 0.00014663161138205724, |
| "loss": 0.9467, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.3664600012038765, |
| "grad_norm": 2.4864413738250732, |
| "learning_rate": 0.00014649356513758176, |
| "loss": 0.8893, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.36694155179678567, |
| "grad_norm": 1.9362248182296753, |
| "learning_rate": 0.00014635540576420374, |
| "loss": 0.6744, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.36742310238969483, |
| "grad_norm": 2.7063558101654053, |
| "learning_rate": 0.0001462171335980948, |
| "loss": 0.4627, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.367904652982604, |
| "grad_norm": 1.6518278121948242, |
| "learning_rate": 0.00014607874897570105, |
| "loss": 0.6235, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.36838620357551316, |
| "grad_norm": 1.9559590816497803, |
| "learning_rate": 0.0001459402522337422, |
| "loss": 0.6709, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.3688677541684223, |
| "grad_norm": 2.930201292037964, |
| "learning_rate": 0.00014580164370921078, |
| "loss": 0.5976, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3693493047613315, |
| "grad_norm": 2.146150588989258, |
| "learning_rate": 0.0001456629237393713, |
| "loss": 0.7809, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.36983085535424065, |
| "grad_norm": 3.9922261238098145, |
| "learning_rate": 0.00014552409266175952, |
| "loss": 0.6659, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.3703124059471498, |
| "grad_norm": 1.5614209175109863, |
| "learning_rate": 0.00014538515081418142, |
| "loss": 0.6743, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.370793956540059, |
| "grad_norm": 4.6989970207214355, |
| "learning_rate": 0.00014524609853471264, |
| "loss": 0.8936, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.37127550713296814, |
| "grad_norm": 3.576082229614258, |
| "learning_rate": 0.00014510693616169741, |
| "loss": 0.9577, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.3717570577258773, |
| "grad_norm": 0.9595773816108704, |
| "learning_rate": 0.0001449676640337479, |
| "loss": 0.7148, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.37223860831878647, |
| "grad_norm": 4.762831687927246, |
| "learning_rate": 0.00014482828248974335, |
| "loss": 0.4595, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3727201589116957, |
| "grad_norm": 3.842872381210327, |
| "learning_rate": 0.00014468879186882916, |
| "loss": 1.3252, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.37320170950460485, |
| "grad_norm": 1.4984766244888306, |
| "learning_rate": 0.00014454919251041622, |
| "loss": 0.5666, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.373683260097514, |
| "grad_norm": 2.2089576721191406, |
| "learning_rate": 0.00014440948475418, |
| "loss": 1.341, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3741648106904232, |
| "grad_norm": 1.6905694007873535, |
| "learning_rate": 0.00014426966894005966, |
| "loss": 0.6712, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.37464636128333234, |
| "grad_norm": 2.767066478729248, |
| "learning_rate": 0.0001441297454082573, |
| "loss": 0.9909, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3751279118762415, |
| "grad_norm": 2.3299660682678223, |
| "learning_rate": 0.00014398971449923722, |
| "loss": 0.5103, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.37560946246915067, |
| "grad_norm": 1.6824946403503418, |
| "learning_rate": 0.00014384957655372483, |
| "loss": 0.6759, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.37609101306205983, |
| "grad_norm": 1.1821125745773315, |
| "learning_rate": 0.00014370933191270617, |
| "loss": 0.5259, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.376572563654969, |
| "grad_norm": 1.904584288597107, |
| "learning_rate": 0.0001435689809174267, |
| "loss": 0.9894, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.37705411424787816, |
| "grad_norm": 2.2008063793182373, |
| "learning_rate": 0.0001434285239093908, |
| "loss": 1.3456, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.3775356648407873, |
| "grad_norm": 2.202802896499634, |
| "learning_rate": 0.00014328796123036071, |
| "loss": 0.4833, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.3780172154336965, |
| "grad_norm": 1.7282605171203613, |
| "learning_rate": 0.0001431472932223559, |
| "loss": 0.9194, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.37849876602660565, |
| "grad_norm": 1.2373805046081543, |
| "learning_rate": 0.00014300652022765207, |
| "loss": 0.466, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3789803166195148, |
| "grad_norm": 4.87825870513916, |
| "learning_rate": 0.00014286564258878033, |
| "loss": 0.9176, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.37946186721242403, |
| "grad_norm": 3.325873613357544, |
| "learning_rate": 0.00014272466064852644, |
| "loss": 0.4595, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.3799434178053332, |
| "grad_norm": 5.119507789611816, |
| "learning_rate": 0.00014258357474993, |
| "loss": 0.8462, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.38042496839824236, |
| "grad_norm": 4.062798976898193, |
| "learning_rate": 0.0001424423852362835, |
| "loss": 0.7553, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3809065189911515, |
| "grad_norm": 1.8997843265533447, |
| "learning_rate": 0.00014230109245113158, |
| "loss": 0.968, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.3813880695840607, |
| "grad_norm": 3.648345470428467, |
| "learning_rate": 0.00014215969673827018, |
| "loss": 0.7866, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.38186962017696985, |
| "grad_norm": 3.1891438961029053, |
| "learning_rate": 0.00014201819844174564, |
| "loss": 0.7841, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.382351170769879, |
| "grad_norm": 3.960712432861328, |
| "learning_rate": 0.0001418765979058539, |
| "loss": 0.8922, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3828327213627882, |
| "grad_norm": 1.958216667175293, |
| "learning_rate": 0.00014173489547513973, |
| "loss": 0.9929, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.38331427195569734, |
| "grad_norm": 2.851674795150757, |
| "learning_rate": 0.00014159309149439582, |
| "loss": 0.7668, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.3837958225486065, |
| "grad_norm": 2.4043354988098145, |
| "learning_rate": 0.00014145118630866187, |
| "loss": 0.5076, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.38427737314151567, |
| "grad_norm": 2.548334836959839, |
| "learning_rate": 0.000141309180263224, |
| "loss": 0.5664, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.38475892373442483, |
| "grad_norm": 7.332959175109863, |
| "learning_rate": 0.0001411670737036135, |
| "loss": 0.6663, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.385240474327334, |
| "grad_norm": 2.5418202877044678, |
| "learning_rate": 0.0001410248669756065, |
| "loss": 0.6912, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.38572202492024316, |
| "grad_norm": 1.9166046380996704, |
| "learning_rate": 0.00014088256042522264, |
| "loss": 0.5785, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.3862035755131524, |
| "grad_norm": 1.9484670162200928, |
| "learning_rate": 0.00014074015439872458, |
| "loss": 0.7789, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.38668512610606154, |
| "grad_norm": 1.2977544069290161, |
| "learning_rate": 0.00014059764924261703, |
| "loss": 0.448, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.3871666766989707, |
| "grad_norm": 3.7243142127990723, |
| "learning_rate": 0.00014045504530364584, |
| "loss": 0.3638, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.38764822729187987, |
| "grad_norm": 2.931234836578369, |
| "learning_rate": 0.00014031234292879725, |
| "loss": 0.6048, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.38812977788478903, |
| "grad_norm": 2.235635757446289, |
| "learning_rate": 0.00014016954246529696, |
| "loss": 0.741, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.3886113284776982, |
| "grad_norm": 2.4995760917663574, |
| "learning_rate": 0.00014002664426060942, |
| "loss": 0.8794, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.38909287907060736, |
| "grad_norm": 5.880919456481934, |
| "learning_rate": 0.00013988364866243693, |
| "loss": 0.8441, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.3895744296635165, |
| "grad_norm": 2.27232027053833, |
| "learning_rate": 0.00013974055601871868, |
| "loss": 0.5837, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3900559802564257, |
| "grad_norm": 1.846911907196045, |
| "learning_rate": 0.00013959736667762998, |
| "loss": 0.9346, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.39053753084933485, |
| "grad_norm": 1.3983654975891113, |
| "learning_rate": 0.00013945408098758156, |
| "loss": 1.0296, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.391019081442244, |
| "grad_norm": 1.4359188079833984, |
| "learning_rate": 0.0001393106992972184, |
| "loss": 0.5791, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3915006320351532, |
| "grad_norm": 2.0418739318847656, |
| "learning_rate": 0.00013916722195541926, |
| "loss": 0.5045, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.39198218262806234, |
| "grad_norm": 3.6216964721679688, |
| "learning_rate": 0.00013902364931129557, |
| "loss": 0.748, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.39246373322097156, |
| "grad_norm": 2.5726840496063232, |
| "learning_rate": 0.00013887998171419058, |
| "loss": 0.8588, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3929452838138807, |
| "grad_norm": 2.4166088104248047, |
| "learning_rate": 0.00013873621951367862, |
| "loss": 0.8306, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.3934268344067899, |
| "grad_norm": 2.8156697750091553, |
| "learning_rate": 0.00013859236305956425, |
| "loss": 0.7893, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.39390838499969905, |
| "grad_norm": 2.6449344158172607, |
| "learning_rate": 0.00013844841270188132, |
| "loss": 0.7843, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.3943899355926082, |
| "grad_norm": 1.73700749874115, |
| "learning_rate": 0.00013830436879089228, |
| "loss": 0.7855, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.3948714861855174, |
| "grad_norm": 2.2804300785064697, |
| "learning_rate": 0.00013816023167708704, |
| "loss": 0.6568, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.39535303677842654, |
| "grad_norm": 2.1226370334625244, |
| "learning_rate": 0.00013801600171118244, |
| "loss": 0.5294, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3958345873713357, |
| "grad_norm": 2.894469738006592, |
| "learning_rate": 0.00013787167924412112, |
| "loss": 0.8773, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.39631613796424486, |
| "grad_norm": 2.5202245712280273, |
| "learning_rate": 0.0001377272646270709, |
| "loss": 0.381, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.396797688557154, |
| "grad_norm": 3.8328442573547363, |
| "learning_rate": 0.00013758275821142382, |
| "loss": 0.5329, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.3972792391500632, |
| "grad_norm": 2.541353940963745, |
| "learning_rate": 0.00013743816034879523, |
| "loss": 0.5578, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.39776078974297235, |
| "grad_norm": 2.1383888721466064, |
| "learning_rate": 0.000137293471391023, |
| "loss": 0.8876, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.3982423403358815, |
| "grad_norm": 2.633044719696045, |
| "learning_rate": 0.00013714869169016667, |
| "loss": 0.4708, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.3987238909287907, |
| "grad_norm": 4.365309715270996, |
| "learning_rate": 0.00013700382159850656, |
| "loss": 0.4944, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.3992054415216999, |
| "grad_norm": 3.652635097503662, |
| "learning_rate": 0.00013685886146854297, |
| "loss": 0.8842, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.39968699211460906, |
| "grad_norm": 2.1585693359375, |
| "learning_rate": 0.00013671381165299525, |
| "loss": 0.7996, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.4001685427075182, |
| "grad_norm": 2.429353952407837, |
| "learning_rate": 0.00013656867250480098, |
| "loss": 0.8529, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.4006500933004274, |
| "grad_norm": 2.0044384002685547, |
| "learning_rate": 0.00013642344437711512, |
| "loss": 0.5831, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.40113164389333655, |
| "grad_norm": 3.845720052719116, |
| "learning_rate": 0.00013627812762330912, |
| "loss": 1.0989, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4016131944862457, |
| "grad_norm": 2.4020707607269287, |
| "learning_rate": 0.00013613272259697007, |
| "loss": 0.734, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.4020947450791549, |
| "grad_norm": 1.2454372644424438, |
| "learning_rate": 0.00013598722965189986, |
| "loss": 1.1921, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.40257629567206404, |
| "grad_norm": 2.901397228240967, |
| "learning_rate": 0.0001358416491421143, |
| "loss": 0.919, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.4030578462649732, |
| "grad_norm": 2.4206631183624268, |
| "learning_rate": 0.00013569598142184225, |
| "loss": 0.8408, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.40353939685788237, |
| "grad_norm": 3.595640182495117, |
| "learning_rate": 0.00013555022684552483, |
| "loss": 1.245, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.40402094745079153, |
| "grad_norm": 1.4770573377609253, |
| "learning_rate": 0.00013540438576781441, |
| "loss": 0.4859, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.4045024980437007, |
| "grad_norm": 2.7039146423339844, |
| "learning_rate": 0.0001352584585435739, |
| "loss": 0.9747, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.40498404863660986, |
| "grad_norm": 2.615344524383545, |
| "learning_rate": 0.00013511244552787583, |
| "loss": 0.6801, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.405465599229519, |
| "grad_norm": 3.7409796714782715, |
| "learning_rate": 0.00013496634707600147, |
| "loss": 1.1876, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.40594714982242824, |
| "grad_norm": 2.508939743041992, |
| "learning_rate": 0.0001348201635434399, |
| "loss": 0.7794, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4064287004153374, |
| "grad_norm": 3.5549421310424805, |
| "learning_rate": 0.0001346738952858873, |
| "loss": 1.2157, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.40691025100824657, |
| "grad_norm": 0.9292539954185486, |
| "learning_rate": 0.000134527542659246, |
| "loss": 1.4636, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.40739180160115573, |
| "grad_norm": 2.5853211879730225, |
| "learning_rate": 0.00013438110601962362, |
| "loss": 0.6864, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.4078733521940649, |
| "grad_norm": 2.0483453273773193, |
| "learning_rate": 0.00013423458572333214, |
| "loss": 0.816, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.40835490278697406, |
| "grad_norm": 1.1379551887512207, |
| "learning_rate": 0.0001340879821268872, |
| "loss": 0.3914, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.4088364533798832, |
| "grad_norm": 4.26398229598999, |
| "learning_rate": 0.000133941295587007, |
| "loss": 0.7028, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.4093180039727924, |
| "grad_norm": 2.3732380867004395, |
| "learning_rate": 0.00013379452646061164, |
| "loss": 0.5584, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.40979955456570155, |
| "grad_norm": 2.4338433742523193, |
| "learning_rate": 0.0001336476751048222, |
| "loss": 0.9941, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.4102811051586107, |
| "grad_norm": 2.9080970287323, |
| "learning_rate": 0.00013350074187695979, |
| "loss": 1.3025, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.4107626557515199, |
| "grad_norm": 2.6538901329040527, |
| "learning_rate": 0.00013335372713454467, |
| "loss": 0.7301, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.41124420634442904, |
| "grad_norm": 2.5008554458618164, |
| "learning_rate": 0.0001332066312352956, |
| "loss": 0.7566, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4117257569373382, |
| "grad_norm": 2.0874619483947754, |
| "learning_rate": 0.00013305945453712868, |
| "loss": 0.9365, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.4122073075302474, |
| "grad_norm": 2.2397348880767822, |
| "learning_rate": 0.0001329121973981567, |
| "loss": 0.9617, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4126888581231566, |
| "grad_norm": 2.2722036838531494, |
| "learning_rate": 0.00013276486017668807, |
| "loss": 0.4104, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.41317040871606575, |
| "grad_norm": 2.11865234375, |
| "learning_rate": 0.0001326174432312262, |
| "loss": 0.7596, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.4136519593089749, |
| "grad_norm": 2.2710089683532715, |
| "learning_rate": 0.00013246994692046836, |
| "loss": 0.9763, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.4141335099018841, |
| "grad_norm": 3.1512913703918457, |
| "learning_rate": 0.000132322371603305, |
| "loss": 0.8637, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.41461506049479324, |
| "grad_norm": 5.3608622550964355, |
| "learning_rate": 0.0001321747176388188, |
| "loss": 0.4573, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.4150966110877024, |
| "grad_norm": 1.7726064920425415, |
| "learning_rate": 0.00013202698538628376, |
| "loss": 1.0072, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.41557816168061157, |
| "grad_norm": 1.9994593858718872, |
| "learning_rate": 0.00013187917520516448, |
| "loss": 0.6097, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.41605971227352073, |
| "grad_norm": 2.665196418762207, |
| "learning_rate": 0.00013173128745511508, |
| "loss": 0.8823, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4165412628664299, |
| "grad_norm": 1.9170902967453003, |
| "learning_rate": 0.0001315833224959784, |
| "loss": 0.7834, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.41702281345933906, |
| "grad_norm": 1.7230511903762817, |
| "learning_rate": 0.00013143528068778525, |
| "loss": 1.2682, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4175043640522482, |
| "grad_norm": 1.5421873331069946, |
| "learning_rate": 0.00013128716239075338, |
| "loss": 0.4533, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4179859146451574, |
| "grad_norm": 2.3257434368133545, |
| "learning_rate": 0.00013113896796528664, |
| "loss": 0.7117, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.41846746523806655, |
| "grad_norm": 2.053032398223877, |
| "learning_rate": 0.00013099069777197412, |
| "loss": 0.8121, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.41894901583097577, |
| "grad_norm": 5.06425142288208, |
| "learning_rate": 0.0001308423521715893, |
| "loss": 0.7408, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.41943056642388493, |
| "grad_norm": 1.0767431259155273, |
| "learning_rate": 0.00013069393152508906, |
| "loss": 0.6687, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.4199121170167941, |
| "grad_norm": 1.6607890129089355, |
| "learning_rate": 0.00013054543619361303, |
| "loss": 0.6322, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.42039366760970326, |
| "grad_norm": 1.7440071105957031, |
| "learning_rate": 0.0001303968665384824, |
| "loss": 0.889, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.4208752182026124, |
| "grad_norm": 1.4886302947998047, |
| "learning_rate": 0.00013024822292119934, |
| "loss": 0.7009, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4213567687955216, |
| "grad_norm": 3.8570821285247803, |
| "learning_rate": 0.0001300995057034459, |
| "loss": 0.7772, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.42183831938843075, |
| "grad_norm": 2.4680871963500977, |
| "learning_rate": 0.00012995071524708325, |
| "loss": 0.7877, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.4223198699813399, |
| "grad_norm": 2.5244038105010986, |
| "learning_rate": 0.00012980185191415074, |
| "loss": 0.5928, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.4228014205742491, |
| "grad_norm": 1.7207748889923096, |
| "learning_rate": 0.0001296529160668651, |
| "loss": 0.7075, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.42328297116715824, |
| "grad_norm": 1.7171252965927124, |
| "learning_rate": 0.00012950390806761944, |
| "loss": 0.8689, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.4237645217600674, |
| "grad_norm": 1.759418249130249, |
| "learning_rate": 0.0001293548282789825, |
| "loss": 0.4545, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.42424607235297657, |
| "grad_norm": 1.7909297943115234, |
| "learning_rate": 0.00012920567706369758, |
| "loss": 1.3034, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.42472762294588573, |
| "grad_norm": 1.4877880811691284, |
| "learning_rate": 0.00012905645478468192, |
| "loss": 0.3629, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.4252091735387949, |
| "grad_norm": 3.6452713012695312, |
| "learning_rate": 0.00012890716180502564, |
| "loss": 0.6314, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4256907241317041, |
| "grad_norm": 2.424837589263916, |
| "learning_rate": 0.00012875779848799078, |
| "loss": 0.9437, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.4261722747246133, |
| "grad_norm": 2.7232539653778076, |
| "learning_rate": 0.00012860836519701063, |
| "loss": 0.9839, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.42665382531752244, |
| "grad_norm": 1.2569257020950317, |
| "learning_rate": 0.00012845886229568873, |
| "loss": 0.8196, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4271353759104316, |
| "grad_norm": 2.5531201362609863, |
| "learning_rate": 0.00012830929014779797, |
| "loss": 0.8545, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.42761692650334077, |
| "grad_norm": 6.38144588470459, |
| "learning_rate": 0.0001281596491172797, |
| "loss": 0.5451, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.42809847709624993, |
| "grad_norm": 1.4901047945022583, |
| "learning_rate": 0.00012800993956824303, |
| "loss": 0.9357, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4285800276891591, |
| "grad_norm": 2.5083794593811035, |
| "learning_rate": 0.00012786016186496358, |
| "loss": 0.9034, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.42906157828206826, |
| "grad_norm": 2.4690587520599365, |
| "learning_rate": 0.000127710316371883, |
| "loss": 0.7518, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.4295431288749774, |
| "grad_norm": 1.5724667310714722, |
| "learning_rate": 0.0001275604034536077, |
| "loss": 0.4883, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4300246794678866, |
| "grad_norm": 1.8662641048431396, |
| "learning_rate": 0.0001274104234749083, |
| "loss": 0.6713, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.43050623006079575, |
| "grad_norm": 2.4959278106689453, |
| "learning_rate": 0.00012726037680071853, |
| "loss": 0.6975, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4309877806537049, |
| "grad_norm": 2.854491710662842, |
| "learning_rate": 0.00012711026379613434, |
| "loss": 0.5982, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.4314693312466141, |
| "grad_norm": 2.513932704925537, |
| "learning_rate": 0.00012696008482641325, |
| "loss": 0.6691, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.43195088183952324, |
| "grad_norm": 2.1765429973602295, |
| "learning_rate": 0.00012680984025697313, |
| "loss": 0.4283, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.43243243243243246, |
| "grad_norm": 3.172271966934204, |
| "learning_rate": 0.00012665953045339152, |
| "loss": 1.1573, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.4329139830253416, |
| "grad_norm": 1.5411570072174072, |
| "learning_rate": 0.0001265091557814047, |
| "loss": 0.603, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4333955336182508, |
| "grad_norm": 4.4379496574401855, |
| "learning_rate": 0.00012635871660690676, |
| "loss": 0.4462, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.43387708421115995, |
| "grad_norm": 4.627134799957275, |
| "learning_rate": 0.0001262082132959488, |
| "loss": 0.7033, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4343586348040691, |
| "grad_norm": 5.416947364807129, |
| "learning_rate": 0.00012605764621473792, |
| "loss": 1.0499, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.4348401853969783, |
| "grad_norm": 2.7960314750671387, |
| "learning_rate": 0.00012590701572963642, |
| "loss": 1.0619, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.43532173598988744, |
| "grad_norm": 4.81326961517334, |
| "learning_rate": 0.00012575632220716078, |
| "loss": 0.925, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.4358032865827966, |
| "grad_norm": 1.967151403427124, |
| "learning_rate": 0.000125605566013981, |
| "loss": 0.8593, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.43628483717570576, |
| "grad_norm": 3.3491618633270264, |
| "learning_rate": 0.00012545474751691953, |
| "loss": 0.884, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.4367663877686149, |
| "grad_norm": 1.8660701513290405, |
| "learning_rate": 0.00012530386708295036, |
| "loss": 0.7831, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.4372479383615241, |
| "grad_norm": 2.808755874633789, |
| "learning_rate": 0.00012515292507919829, |
| "loss": 0.8822, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.43772948895443325, |
| "grad_norm": 1.8173489570617676, |
| "learning_rate": 0.0001250019218729378, |
| "loss": 0.6888, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4382110395473424, |
| "grad_norm": 2.033569097518921, |
| "learning_rate": 0.00012485085783159238, |
| "loss": 0.3951, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.43869259014025164, |
| "grad_norm": 2.698009729385376, |
| "learning_rate": 0.00012469973332273354, |
| "loss": 0.5259, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.4391741407331608, |
| "grad_norm": 1.5774399042129517, |
| "learning_rate": 0.00012454854871407994, |
| "loss": 0.4619, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.43965569132606996, |
| "grad_norm": 1.6870514154434204, |
| "learning_rate": 0.00012439730437349635, |
| "loss": 0.4305, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.4401372419189791, |
| "grad_norm": 1.871408224105835, |
| "learning_rate": 0.00012424600066899302, |
| "loss": 0.6, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.4406187925118883, |
| "grad_norm": 2.036029577255249, |
| "learning_rate": 0.00012409463796872464, |
| "loss": 1.0179, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.44110034310479745, |
| "grad_norm": 1.8108471632003784, |
| "learning_rate": 0.0001239432166409893, |
| "loss": 0.8426, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4415818936977066, |
| "grad_norm": 3.0200490951538086, |
| "learning_rate": 0.00012379173705422795, |
| "loss": 0.9675, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4420634442906158, |
| "grad_norm": 2.3675003051757812, |
| "learning_rate": 0.00012364019957702315, |
| "loss": 0.6689, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.44254499488352494, |
| "grad_norm": 3.1368930339813232, |
| "learning_rate": 0.00012348860457809838, |
| "loss": 0.8873, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.4430265454764341, |
| "grad_norm": 1.4245840311050415, |
| "learning_rate": 0.00012333695242631705, |
| "loss": 0.8424, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.44350809606934327, |
| "grad_norm": 3.4238197803497314, |
| "learning_rate": 0.0001231852434906817, |
| "loss": 1.2067, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.44398964666225244, |
| "grad_norm": 2.398881196975708, |
| "learning_rate": 0.00012303347814033292, |
| "loss": 0.7952, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.4444711972551616, |
| "grad_norm": 2.153909206390381, |
| "learning_rate": 0.0001228816567445487, |
| "loss": 0.9368, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.44495274784807076, |
| "grad_norm": 2.978900194168091, |
| "learning_rate": 0.0001227297796727433, |
| "loss": 0.6853, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.44543429844098, |
| "grad_norm": 1.9314982891082764, |
| "learning_rate": 0.00012257784729446656, |
| "loss": 0.7932, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.44591584903388914, |
| "grad_norm": 2.3480403423309326, |
| "learning_rate": 0.00012242585997940275, |
| "loss": 0.7998, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.4463973996267983, |
| "grad_norm": 4.056048393249512, |
| "learning_rate": 0.0001222738180973699, |
| "loss": 0.6552, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.44687895021970747, |
| "grad_norm": 3.2654383182525635, |
| "learning_rate": 0.00012212172201831885, |
| "loss": 0.6561, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.44736050081261663, |
| "grad_norm": 3.2897989749908447, |
| "learning_rate": 0.00012196957211233222, |
| "loss": 1.0814, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.4478420514055258, |
| "grad_norm": 1.9905271530151367, |
| "learning_rate": 0.00012181736874962371, |
| "loss": 1.0158, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.44832360199843496, |
| "grad_norm": 2.3979315757751465, |
| "learning_rate": 0.00012166511230053696, |
| "loss": 1.0173, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4488051525913441, |
| "grad_norm": 2.0289697647094727, |
| "learning_rate": 0.00012151280313554486, |
| "loss": 0.9401, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4492867031842533, |
| "grad_norm": 2.8876144886016846, |
| "learning_rate": 0.00012136044162524858, |
| "loss": 0.8686, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.44976825377716245, |
| "grad_norm": 3.0945162773132324, |
| "learning_rate": 0.00012120802814037663, |
| "loss": 0.3943, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.4502498043700716, |
| "grad_norm": 2.6051065921783447, |
| "learning_rate": 0.00012105556305178399, |
| "loss": 0.5688, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.4507313549629808, |
| "grad_norm": 3.5704925060272217, |
| "learning_rate": 0.00012090304673045123, |
| "loss": 0.627, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.45121290555588994, |
| "grad_norm": 4.448431491851807, |
| "learning_rate": 0.00012075047954748353, |
| "loss": 1.0867, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.4516944561487991, |
| "grad_norm": 3.0634100437164307, |
| "learning_rate": 0.00012059786187410984, |
| "loss": 0.7893, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.4521760067417083, |
| "grad_norm": 2.6939537525177, |
| "learning_rate": 0.000120445194081682, |
| "loss": 0.4777, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.4526575573346175, |
| "grad_norm": 1.9854243993759155, |
| "learning_rate": 0.00012029247654167379, |
| "loss": 0.4645, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.45313910792752665, |
| "grad_norm": 2.8124136924743652, |
| "learning_rate": 0.00012013970962568002, |
| "loss": 0.6528, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.4536206585204358, |
| "grad_norm": 2.101633310317993, |
| "learning_rate": 0.00011998689370541562, |
| "loss": 0.72, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.454102209113345, |
| "grad_norm": 2.0931015014648438, |
| "learning_rate": 0.00011983402915271478, |
| "loss": 0.6483, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.45458375970625414, |
| "grad_norm": 3.0055315494537354, |
| "learning_rate": 0.00011968111633953007, |
| "loss": 0.9383, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.4550653102991633, |
| "grad_norm": 2.681931734085083, |
| "learning_rate": 0.0001195281556379314, |
| "loss": 0.7556, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.45554686089207247, |
| "grad_norm": 7.055514335632324, |
| "learning_rate": 0.0001193751474201053, |
| "loss": 0.4541, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.45602841148498163, |
| "grad_norm": 2.157724618911743, |
| "learning_rate": 0.00011922209205835382, |
| "loss": 0.5006, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4565099620778908, |
| "grad_norm": 1.7895299196243286, |
| "learning_rate": 0.0001190689899250938, |
| "loss": 0.8087, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.45699151267079996, |
| "grad_norm": 2.361053228378296, |
| "learning_rate": 0.00011891584139285582, |
| "loss": 1.002, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4574730632637091, |
| "grad_norm": 2.3107917308807373, |
| "learning_rate": 0.00011876264683428344, |
| "loss": 0.5038, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4579546138566183, |
| "grad_norm": 2.193125009536743, |
| "learning_rate": 0.00011860940662213211, |
| "loss": 0.5385, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.4584361644495275, |
| "grad_norm": 2.572331190109253, |
| "learning_rate": 0.00011845612112926843, |
| "loss": 0.7153, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.45891771504243667, |
| "grad_norm": 4.683573246002197, |
| "learning_rate": 0.00011830279072866921, |
| "loss": 0.7073, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.45939926563534583, |
| "grad_norm": 2.2095165252685547, |
| "learning_rate": 0.00011814941579342044, |
| "loss": 0.706, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.459880816228255, |
| "grad_norm": 1.5698320865631104, |
| "learning_rate": 0.00011799599669671654, |
| "loss": 0.364, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.46036236682116416, |
| "grad_norm": 1.8080724477767944, |
| "learning_rate": 0.00011784253381185937, |
| "loss": 0.8959, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4608439174140733, |
| "grad_norm": 1.1480696201324463, |
| "learning_rate": 0.0001176890275122573, |
| "loss": 0.522, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.4613254680069825, |
| "grad_norm": 2.714405059814453, |
| "learning_rate": 0.0001175354781714244, |
| "loss": 0.4145, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.46180701859989165, |
| "grad_norm": 3.001786470413208, |
| "learning_rate": 0.0001173818861629794, |
| "loss": 0.9095, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.4622885691928008, |
| "grad_norm": 1.477941870689392, |
| "learning_rate": 0.00011722825186064494, |
| "loss": 0.3998, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.46277011978571, |
| "grad_norm": 2.0230369567871094, |
| "learning_rate": 0.00011707457563824646, |
| "loss": 0.7196, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.46325167037861914, |
| "grad_norm": 2.4260199069976807, |
| "learning_rate": 0.00011692085786971149, |
| "loss": 0.5469, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4637332209715283, |
| "grad_norm": 2.5478789806365967, |
| "learning_rate": 0.00011676709892906858, |
| "loss": 0.4603, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.46421477156443747, |
| "grad_norm": 2.9565882682800293, |
| "learning_rate": 0.00011661329919044656, |
| "loss": 0.8948, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.46469632215734663, |
| "grad_norm": 2.064451217651367, |
| "learning_rate": 0.00011645945902807341, |
| "loss": 0.4803, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.46517787275025585, |
| "grad_norm": 2.4505929946899414, |
| "learning_rate": 0.00011630557881627553, |
| "loss": 0.6063, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.465659423343165, |
| "grad_norm": 2.06998348236084, |
| "learning_rate": 0.0001161516589294768, |
| "loss": 1.0691, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.4661409739360742, |
| "grad_norm": 2.934462785720825, |
| "learning_rate": 0.00011599769974219757, |
| "loss": 0.5514, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.46662252452898334, |
| "grad_norm": 3.6781392097473145, |
| "learning_rate": 0.0001158437016290539, |
| "loss": 0.7883, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.4671040751218925, |
| "grad_norm": 5.928903102874756, |
| "learning_rate": 0.00011568966496475649, |
| "loss": 0.7908, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.46758562571480167, |
| "grad_norm": 4.225213050842285, |
| "learning_rate": 0.00011553559012410984, |
| "loss": 0.642, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.46806717630771083, |
| "grad_norm": 4.230667591094971, |
| "learning_rate": 0.00011538147748201138, |
| "loss": 1.0245, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.46854872690062, |
| "grad_norm": 2.045747995376587, |
| "learning_rate": 0.00011522732741345053, |
| "loss": 0.8693, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.46903027749352916, |
| "grad_norm": 2.93965744972229, |
| "learning_rate": 0.00011507314029350776, |
| "loss": 0.7032, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.4695118280864383, |
| "grad_norm": 2.0694057941436768, |
| "learning_rate": 0.00011491891649735366, |
| "loss": 0.7536, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4699933786793475, |
| "grad_norm": 2.6590757369995117, |
| "learning_rate": 0.00011476465640024814, |
| "loss": 0.8599, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.47047492927225665, |
| "grad_norm": 2.7925920486450195, |
| "learning_rate": 0.00011461036037753934, |
| "loss": 0.4626, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.4709564798651658, |
| "grad_norm": 1.8391474485397339, |
| "learning_rate": 0.00011445602880466288, |
| "loss": 0.5219, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.471438030458075, |
| "grad_norm": 1.3266628980636597, |
| "learning_rate": 0.00011430166205714088, |
| "loss": 0.6874, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.4719195810509842, |
| "grad_norm": 2.141636848449707, |
| "learning_rate": 0.00011414726051058102, |
| "loss": 0.6873, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.47240113164389336, |
| "grad_norm": 2.55141019821167, |
| "learning_rate": 0.0001139928245406757, |
| "loss": 0.6919, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.4728826822368025, |
| "grad_norm": 1.6124935150146484, |
| "learning_rate": 0.00011383835452320097, |
| "loss": 0.982, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.4733642328297117, |
| "grad_norm": 2.0726158618927, |
| "learning_rate": 0.00011368385083401585, |
| "loss": 1.022, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.47384578342262085, |
| "grad_norm": 3.476106882095337, |
| "learning_rate": 0.00011352931384906125, |
| "loss": 0.5655, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.47432733401553, |
| "grad_norm": 2.5833818912506104, |
| "learning_rate": 0.00011337474394435908, |
| "loss": 0.7119, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.4748088846084392, |
| "grad_norm": 1.4606103897094727, |
| "learning_rate": 0.00011322014149601136, |
| "loss": 0.6343, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.47529043520134834, |
| "grad_norm": 3.8119499683380127, |
| "learning_rate": 0.00011306550688019926, |
| "loss": 0.7238, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.4757719857942575, |
| "grad_norm": 2.314828872680664, |
| "learning_rate": 0.0001129108404731823, |
| "loss": 0.7181, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.47625353638716666, |
| "grad_norm": 1.842475175857544, |
| "learning_rate": 0.0001127561426512973, |
| "loss": 0.7928, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.47673508698007583, |
| "grad_norm": 2.3919920921325684, |
| "learning_rate": 0.0001126014137909575, |
| "loss": 0.6528, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.477216637572985, |
| "grad_norm": 1.3240762948989868, |
| "learning_rate": 0.00011244665426865174, |
| "loss": 0.543, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.47769818816589416, |
| "grad_norm": 2.4185304641723633, |
| "learning_rate": 0.00011229186446094338, |
| "loss": 0.5988, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.4781797387588034, |
| "grad_norm": 1.5180091857910156, |
| "learning_rate": 0.00011213704474446951, |
| "loss": 0.8106, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.47866128935171254, |
| "grad_norm": 3.8423268795013428, |
| "learning_rate": 0.00011198219549594, |
| "loss": 0.7134, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4791428399446217, |
| "grad_norm": 4.835480213165283, |
| "learning_rate": 0.00011182731709213659, |
| "loss": 0.5784, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.47962439053753086, |
| "grad_norm": 4.3305511474609375, |
| "learning_rate": 0.00011167240990991192, |
| "loss": 0.6444, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.48010594113044003, |
| "grad_norm": 1.8703162670135498, |
| "learning_rate": 0.00011151747432618871, |
| "loss": 0.6062, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.4805874917233492, |
| "grad_norm": 3.5100109577178955, |
| "learning_rate": 0.00011136251071795871, |
| "loss": 0.5488, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.48106904231625836, |
| "grad_norm": 2.39043927192688, |
| "learning_rate": 0.00011120751946228197, |
| "loss": 0.7438, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.4815505929091675, |
| "grad_norm": 3.630051612854004, |
| "learning_rate": 0.00011105250093628565, |
| "loss": 0.8574, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4820321435020767, |
| "grad_norm": 2.519382953643799, |
| "learning_rate": 0.00011089745551716344, |
| "loss": 0.8414, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.48251369409498585, |
| "grad_norm": 2.2886648178100586, |
| "learning_rate": 0.00011074238358217437, |
| "loss": 0.9677, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.482995244687895, |
| "grad_norm": 1.639683723449707, |
| "learning_rate": 0.00011058728550864197, |
| "loss": 0.4151, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.4834767952808042, |
| "grad_norm": 2.246243715286255, |
| "learning_rate": 0.00011043216167395344, |
| "loss": 0.8334, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.48395834587371334, |
| "grad_norm": 4.043992519378662, |
| "learning_rate": 0.00011027701245555865, |
| "loss": 1.1405, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.4844398964666225, |
| "grad_norm": 5.069822788238525, |
| "learning_rate": 0.00011012183823096917, |
| "loss": 0.7535, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.4849214470595317, |
| "grad_norm": 1.6714848279953003, |
| "learning_rate": 0.00010996663937775751, |
| "loss": 0.4408, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4854029976524409, |
| "grad_norm": 1.6782582998275757, |
| "learning_rate": 0.000109811416273556, |
| "loss": 1.055, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.48588454824535005, |
| "grad_norm": 2.3483331203460693, |
| "learning_rate": 0.00010965616929605609, |
| "loss": 0.7248, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.4863660988382592, |
| "grad_norm": 2.866668701171875, |
| "learning_rate": 0.0001095008988230072, |
| "loss": 0.8629, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.48684764943116837, |
| "grad_norm": 3.7616584300994873, |
| "learning_rate": 0.00010934560523221602, |
| "loss": 0.952, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.48732920002407754, |
| "grad_norm": 5.05987548828125, |
| "learning_rate": 0.00010919028890154543, |
| "loss": 0.7482, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4878107506169867, |
| "grad_norm": 3.6084094047546387, |
| "learning_rate": 0.00010903495020891375, |
| "loss": 0.8013, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.48829230120989586, |
| "grad_norm": 2.3544795513153076, |
| "learning_rate": 0.00010887958953229349, |
| "loss": 0.9513, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.488773851802805, |
| "grad_norm": 4.078423500061035, |
| "learning_rate": 0.00010872420724971088, |
| "loss": 0.8901, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.4892554023957142, |
| "grad_norm": 3.262572765350342, |
| "learning_rate": 0.0001085688037392446, |
| "loss": 0.7107, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.48973695298862335, |
| "grad_norm": 3.4895589351654053, |
| "learning_rate": 0.000108413379379025, |
| "loss": 0.5975, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.4902185035815325, |
| "grad_norm": 2.3548641204833984, |
| "learning_rate": 0.00010825793454723325, |
| "loss": 0.71, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.4907000541744417, |
| "grad_norm": 2.5070619583129883, |
| "learning_rate": 0.00010810246962210018, |
| "loss": 0.8754, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.49118160476735084, |
| "grad_norm": 2.6222572326660156, |
| "learning_rate": 0.00010794698498190557, |
| "loss": 0.7779, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.49166315536026006, |
| "grad_norm": 2.8594443798065186, |
| "learning_rate": 0.00010779148100497722, |
| "loss": 0.5911, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.4921447059531692, |
| "grad_norm": 3.380793333053589, |
| "learning_rate": 0.00010763595806968996, |
| "loss": 0.8463, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4926262565460784, |
| "grad_norm": 3.048558235168457, |
| "learning_rate": 0.00010748041655446473, |
| "loss": 1.1503, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.49310780713898755, |
| "grad_norm": 3.162221670150757, |
| "learning_rate": 0.00010732485683776768, |
| "loss": 0.9634, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.4935893577318967, |
| "grad_norm": 1.7662273645401, |
| "learning_rate": 0.00010716927929810925, |
| "loss": 0.9218, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.4940709083248059, |
| "grad_norm": 6.701080322265625, |
| "learning_rate": 0.00010701368431404326, |
| "loss": 0.6088, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.49455245891771504, |
| "grad_norm": 1.6572067737579346, |
| "learning_rate": 0.00010685807226416598, |
| "loss": 0.4006, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.4950340095106242, |
| "grad_norm": 2.3362746238708496, |
| "learning_rate": 0.00010670244352711518, |
| "loss": 0.4711, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.49551556010353337, |
| "grad_norm": 3.27119779586792, |
| "learning_rate": 0.00010654679848156925, |
| "loss": 0.5751, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.49599711069644253, |
| "grad_norm": 2.6703665256500244, |
| "learning_rate": 0.00010639113750624625, |
| "loss": 0.3203, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4964786612893517, |
| "grad_norm": 2.749845027923584, |
| "learning_rate": 0.00010623546097990303, |
| "loss": 0.7552, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.49696021188226086, |
| "grad_norm": 1.693564772605896, |
| "learning_rate": 0.00010607976928133423, |
| "loss": 0.3451, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.49744176247517, |
| "grad_norm": 2.492354154586792, |
| "learning_rate": 0.00010592406278937144, |
| "loss": 0.6278, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.4979233130680792, |
| "grad_norm": 3.982508897781372, |
| "learning_rate": 0.00010576834188288226, |
| "loss": 0.9494, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.4984048636609884, |
| "grad_norm": 3.6745517253875732, |
| "learning_rate": 0.00010561260694076935, |
| "loss": 0.8115, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.49888641425389757, |
| "grad_norm": 1.9711278676986694, |
| "learning_rate": 0.00010545685834196948, |
| "loss": 0.7224, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.49936796484680673, |
| "grad_norm": 3.948199510574341, |
| "learning_rate": 0.00010530109646545272, |
| "loss": 0.7509, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.4998495154397159, |
| "grad_norm": 4.0536041259765625, |
| "learning_rate": 0.0001051453216902214, |
| "loss": 0.8095, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.500331066032625, |
| "grad_norm": 3.6049885749816895, |
| "learning_rate": 0.00010498953439530925, |
| "loss": 0.8699, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5008126166255342, |
| "grad_norm": 1.3765301704406738, |
| "learning_rate": 0.00010483373495978046, |
| "loss": 0.6613, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5012941672184434, |
| "grad_norm": 1.5237274169921875, |
| "learning_rate": 0.00010467792376272877, |
| "loss": 0.8436, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.5017757178113526, |
| "grad_norm": 2.1992526054382324, |
| "learning_rate": 0.00010452210118327652, |
| "loss": 0.429, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5022572684042618, |
| "grad_norm": 4.125129222869873, |
| "learning_rate": 0.00010436626760057378, |
| "loss": 0.7708, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.5027388189971709, |
| "grad_norm": 2.204009771347046, |
| "learning_rate": 0.00010421042339379732, |
| "loss": 0.5653, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5032203695900801, |
| "grad_norm": 4.470865726470947, |
| "learning_rate": 0.00010405456894214987, |
| "loss": 0.7858, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.5037019201829892, |
| "grad_norm": 1.1038165092468262, |
| "learning_rate": 0.00010389870462485902, |
| "loss": 1.4328, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5041834707758984, |
| "grad_norm": 1.9314682483673096, |
| "learning_rate": 0.00010374283082117635, |
| "loss": 0.3706, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5046650213688075, |
| "grad_norm": 2.6393468379974365, |
| "learning_rate": 0.00010358694791037653, |
| "loss": 1.1257, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5051465719617168, |
| "grad_norm": 3.338649034500122, |
| "learning_rate": 0.00010343105627175644, |
| "loss": 0.8054, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5056281225546259, |
| "grad_norm": 1.6628873348236084, |
| "learning_rate": 0.00010327515628463415, |
| "loss": 0.4518, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5061096731475351, |
| "grad_norm": 3.3070363998413086, |
| "learning_rate": 0.00010311924832834808, |
| "loss": 1.2035, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5065912237404442, |
| "grad_norm": 2.4879815578460693, |
| "learning_rate": 0.00010296333278225599, |
| "loss": 0.5938, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.5070727743333534, |
| "grad_norm": 3.5677437782287598, |
| "learning_rate": 0.00010280741002573413, |
| "loss": 0.3152, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.5075543249262625, |
| "grad_norm": 2.475534200668335, |
| "learning_rate": 0.00010265148043817632, |
| "loss": 0.789, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5080358755191717, |
| "grad_norm": 3.422375202178955, |
| "learning_rate": 0.00010249554439899298, |
| "loss": 0.8623, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.508517426112081, |
| "grad_norm": 1.3005175590515137, |
| "learning_rate": 0.00010233960228761022, |
| "loss": 0.6675, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5089989767049901, |
| "grad_norm": 1.275846004486084, |
| "learning_rate": 0.00010218365448346893, |
| "loss": 0.7612, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5094805272978993, |
| "grad_norm": 3.3997249603271484, |
| "learning_rate": 0.00010202770136602388, |
| "loss": 0.839, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.5099620778908084, |
| "grad_norm": 1.7658668756484985, |
| "learning_rate": 0.00010187174331474271, |
| "loss": 0.4518, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5104436284837176, |
| "grad_norm": 2.3334131240844727, |
| "learning_rate": 0.00010171578070910512, |
| "loss": 0.4001, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5109251790766267, |
| "grad_norm": 2.203070878982544, |
| "learning_rate": 0.00010155981392860185, |
| "loss": 0.8666, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5114067296695359, |
| "grad_norm": 1.5210875272750854, |
| "learning_rate": 0.00010140384335273386, |
| "loss": 0.8547, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.511888280262445, |
| "grad_norm": 2.5150206089019775, |
| "learning_rate": 0.00010124786936101127, |
| "loss": 0.6131, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5123698308553543, |
| "grad_norm": 2.087355852127075, |
| "learning_rate": 0.00010109189233295255, |
| "loss": 0.7018, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5128513814482634, |
| "grad_norm": 3.2802398204803467, |
| "learning_rate": 0.00010093591264808358, |
| "loss": 0.6533, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.5133329320411726, |
| "grad_norm": 2.5115907192230225, |
| "learning_rate": 0.00010077993068593663, |
| "loss": 0.8199, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5138144826340817, |
| "grad_norm": 3.236037015914917, |
| "learning_rate": 0.00010062394682604963, |
| "loss": 0.649, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5142960332269909, |
| "grad_norm": 2.0290400981903076, |
| "learning_rate": 0.00010046796144796497, |
| "loss": 0.5048, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5147775838199001, |
| "grad_norm": 2.3132944107055664, |
| "learning_rate": 0.0001003119749312289, |
| "loss": 0.8111, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.5152591344128092, |
| "grad_norm": 3.8110101222991943, |
| "learning_rate": 0.00010015598765539031, |
| "loss": 0.831, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5157406850057185, |
| "grad_norm": 2.6409425735473633, |
| "learning_rate": 0.0001, |
| "loss": 0.5713, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5162222355986276, |
| "grad_norm": 2.540348768234253, |
| "learning_rate": 9.984401234460971e-05, |
| "loss": 0.896, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5167037861915368, |
| "grad_norm": 1.605286717414856, |
| "learning_rate": 9.968802506877111e-05, |
| "loss": 0.7826, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5171853367844459, |
| "grad_norm": 1.9852914810180664, |
| "learning_rate": 9.953203855203504e-05, |
| "loss": 0.484, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.5176668873773551, |
| "grad_norm": 2.697453498840332, |
| "learning_rate": 9.93760531739504e-05, |
| "loss": 0.5764, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5181484379702642, |
| "grad_norm": 3.257183074951172, |
| "learning_rate": 9.922006931406338e-05, |
| "loss": 0.8809, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5186299885631734, |
| "grad_norm": 1.4868934154510498, |
| "learning_rate": 9.906408735191643e-05, |
| "loss": 0.3895, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.5191115391560825, |
| "grad_norm": 2.801379919052124, |
| "learning_rate": 9.890810766704745e-05, |
| "loss": 0.6476, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5195930897489918, |
| "grad_norm": 5.3302321434021, |
| "learning_rate": 9.875213063898875e-05, |
| "loss": 0.7303, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5200746403419009, |
| "grad_norm": 8.286264419555664, |
| "learning_rate": 9.859615664726615e-05, |
| "loss": 0.8864, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5205561909348101, |
| "grad_norm": 2.5525264739990234, |
| "learning_rate": 9.844018607139818e-05, |
| "loss": 1.2073, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5210377415277193, |
| "grad_norm": 3.0127081871032715, |
| "learning_rate": 9.828421929089493e-05, |
| "loss": 0.7991, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.5215192921206284, |
| "grad_norm": 3.983294725418091, |
| "learning_rate": 9.812825668525733e-05, |
| "loss": 0.821, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.5220008427135376, |
| "grad_norm": 7.565732479095459, |
| "learning_rate": 9.797229863397615e-05, |
| "loss": 1.2835, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.5224823933064467, |
| "grad_norm": 2.560930013656616, |
| "learning_rate": 9.781634551653108e-05, |
| "loss": 0.7872, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.522963943899356, |
| "grad_norm": 3.7823336124420166, |
| "learning_rate": 9.766039771238982e-05, |
| "loss": 0.9539, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.5234454944922651, |
| "grad_norm": 1.7851648330688477, |
| "learning_rate": 9.750445560100706e-05, |
| "loss": 0.7338, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.5239270450851743, |
| "grad_norm": 2.3627817630767822, |
| "learning_rate": 9.73485195618237e-05, |
| "loss": 0.87, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.5244085956780834, |
| "grad_norm": 3.479341745376587, |
| "learning_rate": 9.719258997426588e-05, |
| "loss": 0.872, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.5248901462709926, |
| "grad_norm": 3.8782670497894287, |
| "learning_rate": 9.703666721774402e-05, |
| "loss": 0.4405, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5253716968639017, |
| "grad_norm": 1.5596513748168945, |
| "learning_rate": 9.688075167165194e-05, |
| "loss": 0.5061, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.525853247456811, |
| "grad_norm": 2.221703290939331, |
| "learning_rate": 9.672484371536586e-05, |
| "loss": 0.4747, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.52633479804972, |
| "grad_norm": 5.022744178771973, |
| "learning_rate": 9.656894372824358e-05, |
| "loss": 1.0149, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.5268163486426293, |
| "grad_norm": 1.9501501321792603, |
| "learning_rate": 9.64130520896235e-05, |
| "loss": 0.7204, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.5272978992355384, |
| "grad_norm": 1.2803010940551758, |
| "learning_rate": 9.625716917882367e-05, |
| "loss": 0.5088, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.5277794498284476, |
| "grad_norm": 1.8832592964172363, |
| "learning_rate": 9.6101295375141e-05, |
| "loss": 0.921, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.5282610004213568, |
| "grad_norm": 2.0987727642059326, |
| "learning_rate": 9.594543105785013e-05, |
| "loss": 0.8486, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.5287425510142659, |
| "grad_norm": 3.9266583919525146, |
| "learning_rate": 9.578957660620267e-05, |
| "loss": 0.5983, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5292241016071751, |
| "grad_norm": 2.6706717014312744, |
| "learning_rate": 9.563373239942623e-05, |
| "loss": 0.617, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5297056522000843, |
| "grad_norm": 1.8419418334960938, |
| "learning_rate": 9.547789881672348e-05, |
| "loss": 0.4538, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5301872027929935, |
| "grad_norm": 1.9119439125061035, |
| "learning_rate": 9.532207623727126e-05, |
| "loss": 0.7275, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5306687533859026, |
| "grad_norm": 3.1396830081939697, |
| "learning_rate": 9.516626504021957e-05, |
| "loss": 0.6206, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5311503039788118, |
| "grad_norm": 2.5384531021118164, |
| "learning_rate": 9.501046560469079e-05, |
| "loss": 1.0057, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5316318545717209, |
| "grad_norm": 3.143725872039795, |
| "learning_rate": 9.485467830977864e-05, |
| "loss": 1.1685, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.5321134051646301, |
| "grad_norm": 4.282426357269287, |
| "learning_rate": 9.469890353454732e-05, |
| "loss": 0.6259, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5325949557575392, |
| "grad_norm": 2.5603525638580322, |
| "learning_rate": 9.454314165803054e-05, |
| "loss": 0.6818, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5330765063504485, |
| "grad_norm": 2.4884443283081055, |
| "learning_rate": 9.438739305923067e-05, |
| "loss": 0.7338, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5335580569433576, |
| "grad_norm": 3.4453368186950684, |
| "learning_rate": 9.423165811711777e-05, |
| "loss": 0.8649, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5340396075362668, |
| "grad_norm": 2.136265516281128, |
| "learning_rate": 9.407593721062859e-05, |
| "loss": 0.5932, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.534521158129176, |
| "grad_norm": 2.1778738498687744, |
| "learning_rate": 9.39202307186658e-05, |
| "loss": 0.3037, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5350027087220851, |
| "grad_norm": 1.9080350399017334, |
| "learning_rate": 9.3764539020097e-05, |
| "loss": 0.7092, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5354842593149943, |
| "grad_norm": 2.198676824569702, |
| "learning_rate": 9.360886249375376e-05, |
| "loss": 1.0817, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5359658099079034, |
| "grad_norm": 1.1177189350128174, |
| "learning_rate": 9.345320151843078e-05, |
| "loss": 0.5078, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5364473605008127, |
| "grad_norm": 3.8491134643554688, |
| "learning_rate": 9.329755647288485e-05, |
| "loss": 0.9873, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5369289110937218, |
| "grad_norm": 4.839039325714111, |
| "learning_rate": 9.314192773583403e-05, |
| "loss": 0.8585, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.537410461686631, |
| "grad_norm": 3.628781795501709, |
| "learning_rate": 9.298631568595674e-05, |
| "loss": 0.9069, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.5378920122795401, |
| "grad_norm": 1.936665415763855, |
| "learning_rate": 9.283072070189075e-05, |
| "loss": 0.6665, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5383735628724493, |
| "grad_norm": 1.807746171951294, |
| "learning_rate": 9.267514316223234e-05, |
| "loss": 0.7337, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5388551134653584, |
| "grad_norm": 2.178152322769165, |
| "learning_rate": 9.251958344553528e-05, |
| "loss": 0.701, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5393366640582676, |
| "grad_norm": 2.263169765472412, |
| "learning_rate": 9.23640419303101e-05, |
| "loss": 0.6482, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5398182146511767, |
| "grad_norm": 2.1158978939056396, |
| "learning_rate": 9.220851899502283e-05, |
| "loss": 0.9083, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.540299765244086, |
| "grad_norm": 2.0844359397888184, |
| "learning_rate": 9.205301501809448e-05, |
| "loss": 0.9297, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.5407813158369951, |
| "grad_norm": 1.7951438426971436, |
| "learning_rate": 9.189753037789987e-05, |
| "loss": 0.7921, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5412628664299043, |
| "grad_norm": 2.2726521492004395, |
| "learning_rate": 9.174206545276677e-05, |
| "loss": 0.8874, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5417444170228135, |
| "grad_norm": 3.393622875213623, |
| "learning_rate": 9.158662062097501e-05, |
| "loss": 1.0911, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5422259676157226, |
| "grad_norm": 1.4040405750274658, |
| "learning_rate": 9.143119626075542e-05, |
| "loss": 0.5292, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.5427075182086318, |
| "grad_norm": 1.302949070930481, |
| "learning_rate": 9.127579275028914e-05, |
| "loss": 1.026, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5431890688015409, |
| "grad_norm": 2.1794188022613525, |
| "learning_rate": 9.112041046770653e-05, |
| "loss": 0.8072, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.5436706193944502, |
| "grad_norm": 2.3347835540771484, |
| "learning_rate": 9.096504979108629e-05, |
| "loss": 0.8512, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5441521699873593, |
| "grad_norm": 2.353959083557129, |
| "learning_rate": 9.080971109845458e-05, |
| "loss": 0.9363, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5446337205802685, |
| "grad_norm": 2.1609857082366943, |
| "learning_rate": 9.0654394767784e-05, |
| "loss": 0.3391, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5451152711731776, |
| "grad_norm": 3.1173667907714844, |
| "learning_rate": 9.049910117699281e-05, |
| "loss": 0.5835, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5455968217660868, |
| "grad_norm": 3.2934017181396484, |
| "learning_rate": 9.034383070394393e-05, |
| "loss": 0.9396, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5460783723589959, |
| "grad_norm": 3.2750277519226074, |
| "learning_rate": 9.0188583726444e-05, |
| "loss": 0.8517, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5465599229519051, |
| "grad_norm": 0.8598925471305847, |
| "learning_rate": 9.00333606222425e-05, |
| "loss": 0.5358, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5470414735448142, |
| "grad_norm": 2.2244086265563965, |
| "learning_rate": 8.987816176903082e-05, |
| "loss": 0.3203, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5475230241377235, |
| "grad_norm": 2.2185556888580322, |
| "learning_rate": 8.972298754444136e-05, |
| "loss": 1.0547, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5480045747306327, |
| "grad_norm": 1.47505784034729, |
| "learning_rate": 8.956783832604654e-05, |
| "loss": 0.4243, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5484861253235418, |
| "grad_norm": 3.940340757369995, |
| "learning_rate": 8.941271449135806e-05, |
| "loss": 0.8955, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.548967675916451, |
| "grad_norm": 2.3822591304779053, |
| "learning_rate": 8.925761641782567e-05, |
| "loss": 0.6393, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5494492265093601, |
| "grad_norm": 1.8161604404449463, |
| "learning_rate": 8.910254448283659e-05, |
| "loss": 0.4928, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.5499307771022693, |
| "grad_norm": 3.2381865978240967, |
| "learning_rate": 8.894749906371439e-05, |
| "loss": 0.862, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5504123276951784, |
| "grad_norm": 2.2119295597076416, |
| "learning_rate": 8.87924805377181e-05, |
| "loss": 0.4778, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.5508938782880877, |
| "grad_norm": 2.175503730773926, |
| "learning_rate": 8.863748928204131e-05, |
| "loss": 0.3811, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.5513754288809968, |
| "grad_norm": 3.0040910243988037, |
| "learning_rate": 8.848252567381131e-05, |
| "loss": 0.5659, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.551856979473906, |
| "grad_norm": 2.7777600288391113, |
| "learning_rate": 8.83275900900881e-05, |
| "loss": 0.5106, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.5523385300668151, |
| "grad_norm": 4.878298282623291, |
| "learning_rate": 8.817268290786343e-05, |
| "loss": 0.5554, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.5528200806597243, |
| "grad_norm": 5.596711158752441, |
| "learning_rate": 8.801780450406002e-05, |
| "loss": 0.4911, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5533016312526334, |
| "grad_norm": 3.1435718536376953, |
| "learning_rate": 8.786295525553053e-05, |
| "loss": 0.3324, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5537831818455426, |
| "grad_norm": 3.044595956802368, |
| "learning_rate": 8.770813553905664e-05, |
| "loss": 0.6101, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5542647324384519, |
| "grad_norm": 2.739715576171875, |
| "learning_rate": 8.755334573134829e-05, |
| "loss": 0.6972, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.554746283031361, |
| "grad_norm": 1.3641911745071411, |
| "learning_rate": 8.739858620904251e-05, |
| "loss": 0.4947, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5552278336242702, |
| "grad_norm": 1.8812917470932007, |
| "learning_rate": 8.724385734870271e-05, |
| "loss": 0.8228, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.5557093842171793, |
| "grad_norm": 3.0910966396331787, |
| "learning_rate": 8.708915952681769e-05, |
| "loss": 0.5776, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.5561909348100885, |
| "grad_norm": 2.192817449569702, |
| "learning_rate": 8.693449311980074e-05, |
| "loss": 0.935, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5566724854029976, |
| "grad_norm": 2.3270866870880127, |
| "learning_rate": 8.677985850398866e-05, |
| "loss": 0.5251, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.5571540359959068, |
| "grad_norm": 3.0047972202301025, |
| "learning_rate": 8.662525605564093e-05, |
| "loss": 0.9796, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.557635586588816, |
| "grad_norm": 2.3164725303649902, |
| "learning_rate": 8.647068615093875e-05, |
| "loss": 1.551, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.5581171371817252, |
| "grad_norm": 3.4601895809173584, |
| "learning_rate": 8.631614916598419e-05, |
| "loss": 0.7455, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.5585986877746343, |
| "grad_norm": 3.388256549835205, |
| "learning_rate": 8.616164547679906e-05, |
| "loss": 0.5484, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5590802383675435, |
| "grad_norm": 2.2302229404449463, |
| "learning_rate": 8.600717545932435e-05, |
| "loss": 0.5789, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.5595617889604526, |
| "grad_norm": 2.5507445335388184, |
| "learning_rate": 8.5852739489419e-05, |
| "loss": 0.2962, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.5600433395533618, |
| "grad_norm": 2.2931394577026367, |
| "learning_rate": 8.569833794285915e-05, |
| "loss": 0.9057, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.5605248901462709, |
| "grad_norm": 2.3694357872009277, |
| "learning_rate": 8.554397119533714e-05, |
| "loss": 0.9051, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.5610064407391802, |
| "grad_norm": 2.3861167430877686, |
| "learning_rate": 8.538963962246069e-05, |
| "loss": 0.6481, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5614879913320894, |
| "grad_norm": 3.3635268211364746, |
| "learning_rate": 8.523534359975189e-05, |
| "loss": 0.6873, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.5619695419249985, |
| "grad_norm": 2.3280465602874756, |
| "learning_rate": 8.508108350264635e-05, |
| "loss": 0.4409, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.5624510925179077, |
| "grad_norm": 2.3025784492492676, |
| "learning_rate": 8.492685970649228e-05, |
| "loss": 0.3629, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.5629326431108168, |
| "grad_norm": 4.259292125701904, |
| "learning_rate": 8.477267258654949e-05, |
| "loss": 0.7646, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.563414193703726, |
| "grad_norm": 3.045973777770996, |
| "learning_rate": 8.461852251798866e-05, |
| "loss": 0.8309, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5638957442966351, |
| "grad_norm": 2.590165376663208, |
| "learning_rate": 8.44644098758902e-05, |
| "loss": 0.4435, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.5643772948895444, |
| "grad_norm": 2.0724105834960938, |
| "learning_rate": 8.431033503524354e-05, |
| "loss": 0.4976, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.5648588454824535, |
| "grad_norm": 3.144411087036133, |
| "learning_rate": 8.415629837094611e-05, |
| "loss": 0.9775, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.5653403960753627, |
| "grad_norm": 2.584644079208374, |
| "learning_rate": 8.400230025780243e-05, |
| "loss": 0.6065, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.5658219466682718, |
| "grad_norm": 1.8154007196426392, |
| "learning_rate": 8.384834107052321e-05, |
| "loss": 0.3035, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.566303497261181, |
| "grad_norm": 3.097371816635132, |
| "learning_rate": 8.369442118372447e-05, |
| "loss": 0.6747, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.5667850478540901, |
| "grad_norm": 1.322751522064209, |
| "learning_rate": 8.35405409719266e-05, |
| "loss": 0.6194, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.5672665984469993, |
| "grad_norm": 2.8619985580444336, |
| "learning_rate": 8.338670080955349e-05, |
| "loss": 0.9159, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.5677481490399086, |
| "grad_norm": 1.4597111940383911, |
| "learning_rate": 8.323290107093143e-05, |
| "loss": 0.5528, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.5682296996328177, |
| "grad_norm": 1.321386694908142, |
| "learning_rate": 8.307914213028856e-05, |
| "loss": 0.5454, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5687112502257269, |
| "grad_norm": 2.653350591659546, |
| "learning_rate": 8.292542436175356e-05, |
| "loss": 0.6959, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.569192800818636, |
| "grad_norm": 3.2664124965667725, |
| "learning_rate": 8.277174813935508e-05, |
| "loss": 0.9298, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.5696743514115452, |
| "grad_norm": 3.0547754764556885, |
| "learning_rate": 8.261811383702061e-05, |
| "loss": 0.7422, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.5701559020044543, |
| "grad_norm": 2.19242000579834, |
| "learning_rate": 8.246452182857562e-05, |
| "loss": 0.7436, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.5706374525973635, |
| "grad_norm": 4.479813098907471, |
| "learning_rate": 8.231097248774274e-05, |
| "loss": 1.1304, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5711190031902726, |
| "grad_norm": 2.662180185317993, |
| "learning_rate": 8.215746618814067e-05, |
| "loss": 0.6066, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.5716005537831819, |
| "grad_norm": 3.7930872440338135, |
| "learning_rate": 8.200400330328348e-05, |
| "loss": 0.7421, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.572082104376091, |
| "grad_norm": 2.9955811500549316, |
| "learning_rate": 8.185058420657957e-05, |
| "loss": 1.1659, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.5725636549690002, |
| "grad_norm": 1.8238601684570312, |
| "learning_rate": 8.16972092713308e-05, |
| "loss": 0.636, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.5730452055619093, |
| "grad_norm": 2.3906164169311523, |
| "learning_rate": 8.154387887073158e-05, |
| "loss": 0.4951, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5735267561548185, |
| "grad_norm": 1.853758692741394, |
| "learning_rate": 8.139059337786792e-05, |
| "loss": 0.7715, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.5740083067477277, |
| "grad_norm": 2.8323585987091064, |
| "learning_rate": 8.12373531657166e-05, |
| "loss": 1.0706, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.5744898573406368, |
| "grad_norm": 1.3406877517700195, |
| "learning_rate": 8.108415860714418e-05, |
| "loss": 0.3461, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.5749714079335461, |
| "grad_norm": 2.031278371810913, |
| "learning_rate": 8.093101007490622e-05, |
| "loss": 0.8868, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.5754529585264552, |
| "grad_norm": 3.345834255218506, |
| "learning_rate": 8.077790794164619e-05, |
| "loss": 0.4278, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5759345091193644, |
| "grad_norm": 2.130840301513672, |
| "learning_rate": 8.062485257989471e-05, |
| "loss": 1.0242, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.5764160597122735, |
| "grad_norm": 2.4846746921539307, |
| "learning_rate": 8.047184436206864e-05, |
| "loss": 0.7, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.5768976103051827, |
| "grad_norm": 2.193743944168091, |
| "learning_rate": 8.031888366046998e-05, |
| "loss": 0.6467, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.5773791608980918, |
| "grad_norm": 1.9895037412643433, |
| "learning_rate": 8.016597084728526e-05, |
| "loss": 0.7244, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.577860711491001, |
| "grad_norm": 2.5619122982025146, |
| "learning_rate": 8.001310629458443e-05, |
| "loss": 0.9385, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5783422620839102, |
| "grad_norm": 1.9781352281570435, |
| "learning_rate": 7.986029037432002e-05, |
| "loss": 0.8172, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.5788238126768194, |
| "grad_norm": 3.2591843605041504, |
| "learning_rate": 7.970752345832623e-05, |
| "loss": 0.7278, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.5793053632697285, |
| "grad_norm": 1.6107450723648071, |
| "learning_rate": 7.9554805918318e-05, |
| "loss": 0.3799, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.5797869138626377, |
| "grad_norm": 3.175673484802246, |
| "learning_rate": 7.940213812589018e-05, |
| "loss": 0.5979, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.5802684644555468, |
| "grad_norm": 2.0690531730651855, |
| "learning_rate": 7.92495204525165e-05, |
| "loss": 0.6842, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.580750015048456, |
| "grad_norm": 3.2871673107147217, |
| "learning_rate": 7.909695326954878e-05, |
| "loss": 1.0002, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.5812315656413652, |
| "grad_norm": 1.0341432094573975, |
| "learning_rate": 7.894443694821602e-05, |
| "loss": 0.516, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.5817131162342744, |
| "grad_norm": 2.580730676651001, |
| "learning_rate": 7.879197185962339e-05, |
| "loss": 0.7898, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.5821946668271836, |
| "grad_norm": 4.864838600158691, |
| "learning_rate": 7.863955837475144e-05, |
| "loss": 0.8172, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.5826762174200927, |
| "grad_norm": 1.060434341430664, |
| "learning_rate": 7.848719686445515e-05, |
| "loss": 0.3784, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5831577680130019, |
| "grad_norm": 3.191971778869629, |
| "learning_rate": 7.833488769946306e-05, |
| "loss": 0.8063, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.583639318605911, |
| "grad_norm": 2.525768518447876, |
| "learning_rate": 7.818263125037633e-05, |
| "loss": 0.6985, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.5841208691988202, |
| "grad_norm": 2.8149242401123047, |
| "learning_rate": 7.803042788766777e-05, |
| "loss": 1.064, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.5846024197917293, |
| "grad_norm": 3.0168797969818115, |
| "learning_rate": 7.787827798168115e-05, |
| "loss": 0.387, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.5850839703846386, |
| "grad_norm": 2.1874630451202393, |
| "learning_rate": 7.772618190263009e-05, |
| "loss": 0.5811, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5855655209775477, |
| "grad_norm": 1.8578369617462158, |
| "learning_rate": 7.757414002059726e-05, |
| "loss": 0.5424, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.5860470715704569, |
| "grad_norm": 3.507887840270996, |
| "learning_rate": 7.742215270553349e-05, |
| "loss": 0.5704, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.586528622163366, |
| "grad_norm": 1.8217120170593262, |
| "learning_rate": 7.727022032725672e-05, |
| "loss": 0.72, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.5870101727562752, |
| "grad_norm": 1.2904176712036133, |
| "learning_rate": 7.711834325545135e-05, |
| "loss": 0.4966, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.5874917233491844, |
| "grad_norm": 1.9854986667633057, |
| "learning_rate": 7.696652185966711e-05, |
| "loss": 0.6202, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5879732739420935, |
| "grad_norm": 2.831481456756592, |
| "learning_rate": 7.681475650931834e-05, |
| "loss": 0.568, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.5884548245350028, |
| "grad_norm": 2.528315305709839, |
| "learning_rate": 7.666304757368297e-05, |
| "loss": 0.9762, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.5889363751279119, |
| "grad_norm": 1.8668657541275024, |
| "learning_rate": 7.651139542190164e-05, |
| "loss": 0.7539, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.5894179257208211, |
| "grad_norm": 2.6514816284179688, |
| "learning_rate": 7.635980042297687e-05, |
| "loss": 0.6104, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.5898994763137302, |
| "grad_norm": 2.8228659629821777, |
| "learning_rate": 7.620826294577208e-05, |
| "loss": 0.5398, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.5903810269066394, |
| "grad_norm": 1.8238624334335327, |
| "learning_rate": 7.605678335901071e-05, |
| "loss": 0.4965, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.5908625774995485, |
| "grad_norm": 2.332958221435547, |
| "learning_rate": 7.59053620312754e-05, |
| "loss": 1.0528, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.5913441280924577, |
| "grad_norm": 3.5963058471679688, |
| "learning_rate": 7.575399933100697e-05, |
| "loss": 0.5706, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.5918256786853668, |
| "grad_norm": 3.345517873764038, |
| "learning_rate": 7.560269562650368e-05, |
| "loss": 1.0137, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.5923072292782761, |
| "grad_norm": 5.635433673858643, |
| "learning_rate": 7.54514512859201e-05, |
| "loss": 0.4088, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5927887798711852, |
| "grad_norm": 2.0128109455108643, |
| "learning_rate": 7.530026667726645e-05, |
| "loss": 0.5574, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.5932703304640944, |
| "grad_norm": 2.09451961517334, |
| "learning_rate": 7.51491421684076e-05, |
| "loss": 0.9239, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.5937518810570036, |
| "grad_norm": 3.1210248470306396, |
| "learning_rate": 7.49980781270622e-05, |
| "loss": 0.9972, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.5942334316499127, |
| "grad_norm": 2.9423022270202637, |
| "learning_rate": 7.484707492080172e-05, |
| "loss": 0.9545, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.5947149822428219, |
| "grad_norm": 8.651620864868164, |
| "learning_rate": 7.469613291704962e-05, |
| "loss": 0.9859, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.595196532835731, |
| "grad_norm": 2.376633644104004, |
| "learning_rate": 7.45452524830805e-05, |
| "loss": 0.6038, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.5956780834286403, |
| "grad_norm": 1.0200681686401367, |
| "learning_rate": 7.439443398601903e-05, |
| "loss": 0.4735, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.5961596340215494, |
| "grad_norm": 3.08100962638855, |
| "learning_rate": 7.424367779283926e-05, |
| "loss": 0.9614, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.5966411846144586, |
| "grad_norm": 1.8693490028381348, |
| "learning_rate": 7.409298427036364e-05, |
| "loss": 0.4885, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.5971227352073677, |
| "grad_norm": 3.25297474861145, |
| "learning_rate": 7.39423537852621e-05, |
| "loss": 0.7068, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5976042858002769, |
| "grad_norm": 2.2627036571502686, |
| "learning_rate": 7.379178670405123e-05, |
| "loss": 0.9651, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.598085836393186, |
| "grad_norm": 3.4235429763793945, |
| "learning_rate": 7.364128339309326e-05, |
| "loss": 0.9293, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.5985673869860952, |
| "grad_norm": 2.710484743118286, |
| "learning_rate": 7.349084421859533e-05, |
| "loss": 0.6263, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.5990489375790043, |
| "grad_norm": 2.2872800827026367, |
| "learning_rate": 7.334046954660852e-05, |
| "loss": 0.4224, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.5995304881719136, |
| "grad_norm": 1.7476857900619507, |
| "learning_rate": 7.31901597430269e-05, |
| "loss": 0.7634, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.6000120387648227, |
| "grad_norm": 2.7267417907714844, |
| "learning_rate": 7.303991517358678e-05, |
| "loss": 0.845, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.6004935893577319, |
| "grad_norm": 2.053980827331543, |
| "learning_rate": 7.288973620386568e-05, |
| "loss": 0.8618, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.6009751399506411, |
| "grad_norm": 1.9945694208145142, |
| "learning_rate": 7.273962319928151e-05, |
| "loss": 0.7425, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.6014566905435502, |
| "grad_norm": 1.3418025970458984, |
| "learning_rate": 7.258957652509171e-05, |
| "loss": 0.6352, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.6019382411364594, |
| "grad_norm": 2.392909288406372, |
| "learning_rate": 7.24395965463923e-05, |
| "loss": 0.4533, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6024197917293685, |
| "grad_norm": 2.3010342121124268, |
| "learning_rate": 7.228968362811702e-05, |
| "loss": 0.4342, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.6029013423222778, |
| "grad_norm": 3.485913038253784, |
| "learning_rate": 7.21398381350364e-05, |
| "loss": 0.9649, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.6033828929151869, |
| "grad_norm": 0.9633323550224304, |
| "learning_rate": 7.199006043175698e-05, |
| "loss": 0.7225, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.6038644435080961, |
| "grad_norm": 2.767479658126831, |
| "learning_rate": 7.184035088272028e-05, |
| "loss": 0.5086, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.6043459941010052, |
| "grad_norm": 3.417263984680176, |
| "learning_rate": 7.169070985220208e-05, |
| "loss": 0.7542, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.6048275446939144, |
| "grad_norm": 1.0457793474197388, |
| "learning_rate": 7.154113770431132e-05, |
| "loss": 1.0051, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.6053090952868235, |
| "grad_norm": 1.780932068824768, |
| "learning_rate": 7.13916348029894e-05, |
| "loss": 0.7171, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.6057906458797327, |
| "grad_norm": 3.2504794597625732, |
| "learning_rate": 7.124220151200926e-05, |
| "loss": 0.4477, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.6062721964726419, |
| "grad_norm": 3.2658979892730713, |
| "learning_rate": 7.10928381949744e-05, |
| "loss": 0.5208, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.6067537470655511, |
| "grad_norm": 2.36083984375, |
| "learning_rate": 7.094354521531807e-05, |
| "loss": 0.8187, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.6072352976584603, |
| "grad_norm": 1.9257503747940063, |
| "learning_rate": 7.079432293630244e-05, |
| "loss": 0.9669, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.6077168482513694, |
| "grad_norm": 3.070887804031372, |
| "learning_rate": 7.064517172101753e-05, |
| "loss": 0.8842, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.6081983988442786, |
| "grad_norm": 2.950284481048584, |
| "learning_rate": 7.04960919323806e-05, |
| "loss": 0.6997, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.6086799494371877, |
| "grad_norm": 3.656165838241577, |
| "learning_rate": 7.034708393313493e-05, |
| "loss": 0.7774, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.609161500030097, |
| "grad_norm": 3.879746198654175, |
| "learning_rate": 7.019814808584928e-05, |
| "loss": 0.6871, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.609643050623006, |
| "grad_norm": 2.684112310409546, |
| "learning_rate": 7.004928475291678e-05, |
| "loss": 0.36, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.6101246012159153, |
| "grad_norm": 4.9579644203186035, |
| "learning_rate": 6.990049429655412e-05, |
| "loss": 0.888, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.6106061518088244, |
| "grad_norm": 2.2652103900909424, |
| "learning_rate": 6.97517770788007e-05, |
| "loss": 0.6242, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.6110877024017336, |
| "grad_norm": 2.9428718090057373, |
| "learning_rate": 6.960313346151761e-05, |
| "loss": 0.5431, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.6115692529946427, |
| "grad_norm": 3.530306339263916, |
| "learning_rate": 6.9454563806387e-05, |
| "loss": 1.0434, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.6120508035875519, |
| "grad_norm": 1.2338889837265015, |
| "learning_rate": 6.930606847491094e-05, |
| "loss": 0.7309, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.612532354180461, |
| "grad_norm": 2.873732328414917, |
| "learning_rate": 6.915764782841072e-05, |
| "loss": 0.8321, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.6130139047733703, |
| "grad_norm": 2.2155025005340576, |
| "learning_rate": 6.900930222802588e-05, |
| "loss": 0.3917, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.6134954553662795, |
| "grad_norm": 1.7441500425338745, |
| "learning_rate": 6.886103203471337e-05, |
| "loss": 0.587, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.6139770059591886, |
| "grad_norm": 3.1032984256744385, |
| "learning_rate": 6.871283760924665e-05, |
| "loss": 0.6219, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.6144585565520978, |
| "grad_norm": 1.5018118619918823, |
| "learning_rate": 6.856471931221478e-05, |
| "loss": 0.8532, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.6149401071450069, |
| "grad_norm": 2.236863851547241, |
| "learning_rate": 6.841667750402162e-05, |
| "loss": 0.4704, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.6154216577379161, |
| "grad_norm": 2.4868059158325195, |
| "learning_rate": 6.826871254488496e-05, |
| "loss": 0.688, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.6159032083308252, |
| "grad_norm": 1.9790339469909668, |
| "learning_rate": 6.812082479483553e-05, |
| "loss": 0.2572, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.6163847589237345, |
| "grad_norm": 8.326330184936523, |
| "learning_rate": 6.797301461371625e-05, |
| "loss": 0.7398, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6168663095166436, |
| "grad_norm": 2.0423824787139893, |
| "learning_rate": 6.782528236118124e-05, |
| "loss": 0.6242, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.6173478601095528, |
| "grad_norm": 5.033036708831787, |
| "learning_rate": 6.767762839669503e-05, |
| "loss": 0.9255, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.6178294107024619, |
| "grad_norm": 1.7515184879302979, |
| "learning_rate": 6.753005307953167e-05, |
| "loss": 0.536, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.6183109612953711, |
| "grad_norm": 3.3410611152648926, |
| "learning_rate": 6.738255676877381e-05, |
| "loss": 0.6655, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.6187925118882802, |
| "grad_norm": 2.5926554203033447, |
| "learning_rate": 6.723513982331195e-05, |
| "loss": 0.7555, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.6192740624811894, |
| "grad_norm": 3.253159761428833, |
| "learning_rate": 6.708780260184333e-05, |
| "loss": 0.5316, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.6197556130740985, |
| "grad_norm": 3.1270864009857178, |
| "learning_rate": 6.694054546287132e-05, |
| "loss": 0.6255, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.6202371636670078, |
| "grad_norm": 5.128495216369629, |
| "learning_rate": 6.679336876470441e-05, |
| "loss": 0.7771, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.620718714259917, |
| "grad_norm": 1.7940768003463745, |
| "learning_rate": 6.664627286545535e-05, |
| "loss": 0.7788, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.6212002648528261, |
| "grad_norm": 2.9516167640686035, |
| "learning_rate": 6.649925812304025e-05, |
| "loss": 0.3909, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.6216818154457353, |
| "grad_norm": 1.452250599861145, |
| "learning_rate": 6.635232489517782e-05, |
| "loss": 0.6476, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.6221633660386444, |
| "grad_norm": 1.570677638053894, |
| "learning_rate": 6.620547353938836e-05, |
| "loss": 0.4986, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.6226449166315536, |
| "grad_norm": 2.5657029151916504, |
| "learning_rate": 6.605870441299302e-05, |
| "loss": 0.7346, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.6231264672244627, |
| "grad_norm": 3.6876044273376465, |
| "learning_rate": 6.591201787311285e-05, |
| "loss": 1.2753, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.623608017817372, |
| "grad_norm": 2.211846113204956, |
| "learning_rate": 6.57654142766679e-05, |
| "loss": 0.8497, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.6240895684102811, |
| "grad_norm": 2.1840100288391113, |
| "learning_rate": 6.561889398037643e-05, |
| "loss": 0.4188, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.6245711190031903, |
| "grad_norm": 1.7685606479644775, |
| "learning_rate": 6.547245734075403e-05, |
| "loss": 0.6529, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.6250526695960994, |
| "grad_norm": 1.6937798261642456, |
| "learning_rate": 6.532610471411274e-05, |
| "loss": 0.5592, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.6255342201890086, |
| "grad_norm": 2.8329896926879883, |
| "learning_rate": 6.517983645656014e-05, |
| "loss": 0.6343, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.6260157707819177, |
| "grad_norm": 2.330113172531128, |
| "learning_rate": 6.503365292399857e-05, |
| "loss": 1.0539, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6264973213748269, |
| "grad_norm": 2.074939489364624, |
| "learning_rate": 6.488755447212418e-05, |
| "loss": 0.7005, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.6269788719677362, |
| "grad_norm": 1.091862678527832, |
| "learning_rate": 6.474154145642612e-05, |
| "loss": 0.9088, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.6274604225606453, |
| "grad_norm": 2.839646577835083, |
| "learning_rate": 6.459561423218561e-05, |
| "loss": 0.8255, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.6279419731535545, |
| "grad_norm": 2.919734477996826, |
| "learning_rate": 6.444977315447521e-05, |
| "loss": 0.5693, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.6284235237464636, |
| "grad_norm": 3.9232397079467773, |
| "learning_rate": 6.430401857815776e-05, |
| "loss": 0.9091, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.6289050743393728, |
| "grad_norm": 2.6297950744628906, |
| "learning_rate": 6.415835085788575e-05, |
| "loss": 0.6015, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.6293866249322819, |
| "grad_norm": 4.350391387939453, |
| "learning_rate": 6.401277034810017e-05, |
| "loss": 0.4089, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.6298681755251911, |
| "grad_norm": 2.0582656860351562, |
| "learning_rate": 6.386727740302994e-05, |
| "loss": 0.5737, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.6303497261181003, |
| "grad_norm": 2.8593883514404297, |
| "learning_rate": 6.37218723766909e-05, |
| "loss": 0.5349, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.6308312767110095, |
| "grad_norm": 2.048414945602417, |
| "learning_rate": 6.357655562288488e-05, |
| "loss": 0.928, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6313128273039186, |
| "grad_norm": 2.7405006885528564, |
| "learning_rate": 6.343132749519902e-05, |
| "loss": 0.9519, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.6317943778968278, |
| "grad_norm": 1.9664356708526611, |
| "learning_rate": 6.328618834700474e-05, |
| "loss": 0.5531, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.6322759284897369, |
| "grad_norm": 3.6196768283843994, |
| "learning_rate": 6.314113853145703e-05, |
| "loss": 0.9089, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.6327574790826461, |
| "grad_norm": 2.040229082107544, |
| "learning_rate": 6.299617840149349e-05, |
| "loss": 0.7539, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.6332390296755553, |
| "grad_norm": 3.340404748916626, |
| "learning_rate": 6.285130830983339e-05, |
| "loss": 0.4569, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.6337205802684645, |
| "grad_norm": 1.9362350702285767, |
| "learning_rate": 6.270652860897704e-05, |
| "loss": 0.6094, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.6342021308613737, |
| "grad_norm": 3.0248935222625732, |
| "learning_rate": 6.25618396512048e-05, |
| "loss": 0.9306, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.6346836814542828, |
| "grad_norm": 4.514108657836914, |
| "learning_rate": 6.24172417885762e-05, |
| "loss": 0.672, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.635165232047192, |
| "grad_norm": 1.5764905214309692, |
| "learning_rate": 6.227273537292911e-05, |
| "loss": 0.8099, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.6356467826401011, |
| "grad_norm": 1.775439739227295, |
| "learning_rate": 6.212832075587891e-05, |
| "loss": 0.6518, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6361283332330103, |
| "grad_norm": 2.8492040634155273, |
| "learning_rate": 6.19839982888176e-05, |
| "loss": 0.6318, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6366098838259194, |
| "grad_norm": 3.787897825241089, |
| "learning_rate": 6.183976832291296e-05, |
| "loss": 0.546, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.6370914344188287, |
| "grad_norm": 1.812030553817749, |
| "learning_rate": 6.169563120910775e-05, |
| "loss": 0.9272, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6375729850117378, |
| "grad_norm": 3.6685822010040283, |
| "learning_rate": 6.155158729811867e-05, |
| "loss": 0.9627, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.638054535604647, |
| "grad_norm": 2.6122891902923584, |
| "learning_rate": 6.140763694043578e-05, |
| "loss": 0.6109, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6385360861975561, |
| "grad_norm": 1.1181974411010742, |
| "learning_rate": 6.126378048632139e-05, |
| "loss": 0.6805, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6390176367904653, |
| "grad_norm": 2.0275444984436035, |
| "learning_rate": 6.112001828580944e-05, |
| "loss": 0.9841, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6394991873833744, |
| "grad_norm": 2.3112661838531494, |
| "learning_rate": 6.0976350688704455e-05, |
| "loss": 0.4051, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.6399807379762836, |
| "grad_norm": 2.2878177165985107, |
| "learning_rate": 6.083277804458072e-05, |
| "loss": 0.6933, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6404622885691929, |
| "grad_norm": 2.3939578533172607, |
| "learning_rate": 6.068930070278159e-05, |
| "loss": 0.7104, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.640943839162102, |
| "grad_norm": 2.7695722579956055, |
| "learning_rate": 6.054591901241846e-05, |
| "loss": 0.592, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.6414253897550112, |
| "grad_norm": 1.4362547397613525, |
| "learning_rate": 6.040263332237002e-05, |
| "loss": 0.7355, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.6419069403479203, |
| "grad_norm": 1.8200515508651733, |
| "learning_rate": 6.025944398128137e-05, |
| "loss": 0.6226, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.6423884909408295, |
| "grad_norm": 3.0382542610168457, |
| "learning_rate": 6.011635133756309e-05, |
| "loss": 0.5577, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.6428700415337386, |
| "grad_norm": 1.6607255935668945, |
| "learning_rate": 5.99733557393906e-05, |
| "loss": 0.7285, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.6433515921266478, |
| "grad_norm": 5.5869951248168945, |
| "learning_rate": 5.983045753470308e-05, |
| "loss": 1.0281, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.6438331427195569, |
| "grad_norm": 1.662786841392517, |
| "learning_rate": 5.96876570712028e-05, |
| "loss": 0.9, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.6443146933124662, |
| "grad_norm": 1.6657735109329224, |
| "learning_rate": 5.954495469635417e-05, |
| "loss": 0.3676, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.6447962439053753, |
| "grad_norm": 2.297683000564575, |
| "learning_rate": 5.940235075738296e-05, |
| "loss": 0.8609, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.6452777944982845, |
| "grad_norm": 3.4080722332000732, |
| "learning_rate": 5.925984560127542e-05, |
| "loss": 1.11, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6457593450911936, |
| "grad_norm": 5.633896350860596, |
| "learning_rate": 5.911743957477739e-05, |
| "loss": 1.1069, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.6462408956841028, |
| "grad_norm": 3.4594554901123047, |
| "learning_rate": 5.897513302439355e-05, |
| "loss": 0.5313, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.646722446277012, |
| "grad_norm": 2.593113660812378, |
| "learning_rate": 5.883292629638651e-05, |
| "loss": 0.7902, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.6472039968699211, |
| "grad_norm": 1.7572481632232666, |
| "learning_rate": 5.869081973677604e-05, |
| "loss": 0.6139, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.6476855474628304, |
| "grad_norm": 2.4023494720458984, |
| "learning_rate": 5.8548813691338134e-05, |
| "loss": 0.9859, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6481670980557395, |
| "grad_norm": 2.5072319507598877, |
| "learning_rate": 5.84069085056042e-05, |
| "loss": 1.0116, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.6486486486486487, |
| "grad_norm": 4.375543117523193, |
| "learning_rate": 5.826510452486027e-05, |
| "loss": 0.4556, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.6491301992415578, |
| "grad_norm": 2.4621849060058594, |
| "learning_rate": 5.81234020941461e-05, |
| "loss": 0.422, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.649611749834467, |
| "grad_norm": 3.9239776134490967, |
| "learning_rate": 5.798180155825437e-05, |
| "loss": 0.8455, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.6500933004273761, |
| "grad_norm": 2.0162253379821777, |
| "learning_rate": 5.784030326172981e-05, |
| "loss": 0.9106, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6505748510202853, |
| "grad_norm": 2.1763408184051514, |
| "learning_rate": 5.7698907548868395e-05, |
| "loss": 0.3975, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.6510564016131944, |
| "grad_norm": 1.5609049797058105, |
| "learning_rate": 5.755761476371653e-05, |
| "loss": 0.4149, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.6515379522061037, |
| "grad_norm": 2.1389899253845215, |
| "learning_rate": 5.741642525007003e-05, |
| "loss": 1.0683, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.6520195027990128, |
| "grad_norm": 2.574646472930908, |
| "learning_rate": 5.727533935147359e-05, |
| "loss": 0.6677, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.652501053391922, |
| "grad_norm": 3.0155136585235596, |
| "learning_rate": 5.713435741121975e-05, |
| "loss": 0.5586, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.6529826039848312, |
| "grad_norm": 2.190906524658203, |
| "learning_rate": 5.699347977234799e-05, |
| "loss": 0.7389, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.6534641545777403, |
| "grad_norm": 3.7313098907470703, |
| "learning_rate": 5.685270677764412e-05, |
| "loss": 0.6318, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.6539457051706495, |
| "grad_norm": 1.6453399658203125, |
| "learning_rate": 5.671203876963931e-05, |
| "loss": 0.6455, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.6544272557635586, |
| "grad_norm": 2.063249111175537, |
| "learning_rate": 5.657147609060924e-05, |
| "loss": 0.7916, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.6549088063564679, |
| "grad_norm": 2.5297508239746094, |
| "learning_rate": 5.643101908257333e-05, |
| "loss": 0.7939, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.655390356949377, |
| "grad_norm": 3.1960248947143555, |
| "learning_rate": 5.629066808729385e-05, |
| "loss": 0.4917, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.6558719075422862, |
| "grad_norm": 2.5569260120391846, |
| "learning_rate": 5.6150423446275144e-05, |
| "loss": 0.54, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.6563534581351953, |
| "grad_norm": 2.1092185974121094, |
| "learning_rate": 5.601028550076277e-05, |
| "loss": 0.5214, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.6568350087281045, |
| "grad_norm": 1.965410828590393, |
| "learning_rate": 5.587025459174271e-05, |
| "loss": 0.5952, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.6573165593210136, |
| "grad_norm": 1.7591605186462402, |
| "learning_rate": 5.573033105994038e-05, |
| "loss": 0.7113, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.6577981099139228, |
| "grad_norm": 1.566158652305603, |
| "learning_rate": 5.559051524582002e-05, |
| "loss": 0.7087, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.658279660506832, |
| "grad_norm": 2.3267264366149902, |
| "learning_rate": 5.5450807489583777e-05, |
| "loss": 0.673, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.6587612110997412, |
| "grad_norm": 2.1096274852752686, |
| "learning_rate": 5.531120813117085e-05, |
| "loss": 0.6511, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.6592427616926503, |
| "grad_norm": 2.117785692214966, |
| "learning_rate": 5.517171751025667e-05, |
| "loss": 0.6863, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.6597243122855595, |
| "grad_norm": 2.8077008724212646, |
| "learning_rate": 5.5032335966252103e-05, |
| "loss": 0.3785, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6602058628784687, |
| "grad_norm": 3.291800022125244, |
| "learning_rate": 5.489306383830258e-05, |
| "loss": 0.4787, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.6606874134713778, |
| "grad_norm": 2.1516401767730713, |
| "learning_rate": 5.475390146528738e-05, |
| "loss": 0.6011, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.661168964064287, |
| "grad_norm": 1.5693684816360474, |
| "learning_rate": 5.461484918581858e-05, |
| "loss": 0.4216, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.6616505146571962, |
| "grad_norm": 3.2931742668151855, |
| "learning_rate": 5.4475907338240494e-05, |
| "loss": 0.4253, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.6621320652501054, |
| "grad_norm": 4.190717697143555, |
| "learning_rate": 5.43370762606287e-05, |
| "loss": 1.0326, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.6626136158430145, |
| "grad_norm": 2.5259127616882324, |
| "learning_rate": 5.4198356290789276e-05, |
| "loss": 0.738, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.6630951664359237, |
| "grad_norm": 3.3138790130615234, |
| "learning_rate": 5.405974776625785e-05, |
| "loss": 0.4473, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.6635767170288328, |
| "grad_norm": 1.9713718891143799, |
| "learning_rate": 5.392125102429899e-05, |
| "loss": 0.6931, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.664058267621742, |
| "grad_norm": 1.5703426599502563, |
| "learning_rate": 5.378286640190522e-05, |
| "loss": 0.6073, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.6645398182146511, |
| "grad_norm": 2.653319835662842, |
| "learning_rate": 5.364459423579629e-05, |
| "loss": 0.7751, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6650213688075604, |
| "grad_norm": 1.566805124282837, |
| "learning_rate": 5.350643486241825e-05, |
| "loss": 0.4636, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.6655029194004695, |
| "grad_norm": 1.8555259704589844, |
| "learning_rate": 5.33683886179428e-05, |
| "loss": 0.4216, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.6659844699933787, |
| "grad_norm": 5.087174892425537, |
| "learning_rate": 5.3230455838266266e-05, |
| "loss": 0.5842, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.6664660205862879, |
| "grad_norm": 3.006080150604248, |
| "learning_rate": 5.309263685900898e-05, |
| "loss": 0.4825, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.666947571179197, |
| "grad_norm": 1.3487999439239502, |
| "learning_rate": 5.295493201551433e-05, |
| "loss": 0.4206, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6674291217721062, |
| "grad_norm": 3.11458683013916, |
| "learning_rate": 5.281734164284802e-05, |
| "loss": 0.7871, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.6679106723650153, |
| "grad_norm": 2.8389225006103516, |
| "learning_rate": 5.26798660757971e-05, |
| "loss": 0.7236, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.6683922229579246, |
| "grad_norm": 2.2241663932800293, |
| "learning_rate": 5.2542505648869434e-05, |
| "loss": 0.7008, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.6688737735508337, |
| "grad_norm": 1.7703624963760376, |
| "learning_rate": 5.240526069629265e-05, |
| "loss": 0.7416, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.6693553241437429, |
| "grad_norm": 2.581017017364502, |
| "learning_rate": 5.22681315520134e-05, |
| "loss": 1.014, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.669836874736652, |
| "grad_norm": 1.8126165866851807, |
| "learning_rate": 5.213111854969661e-05, |
| "loss": 0.7268, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.6703184253295612, |
| "grad_norm": 2.5835635662078857, |
| "learning_rate": 5.199422202272448e-05, |
| "loss": 0.6623, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.6707999759224703, |
| "grad_norm": 1.6041021347045898, |
| "learning_rate": 5.185744230419589e-05, |
| "loss": 0.6665, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.6712815265153795, |
| "grad_norm": 1.2473118305206299, |
| "learning_rate": 5.172077972692553e-05, |
| "loss": 0.5992, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.6717630771082886, |
| "grad_norm": 2.415090560913086, |
| "learning_rate": 5.1584234623442974e-05, |
| "loss": 0.7947, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.6722446277011979, |
| "grad_norm": 1.156660795211792, |
| "learning_rate": 5.1447807325992025e-05, |
| "loss": 0.4969, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.6727261782941071, |
| "grad_norm": 1.7031502723693848, |
| "learning_rate": 5.13114981665298e-05, |
| "loss": 0.7214, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.6732077288870162, |
| "grad_norm": 1.3150161504745483, |
| "learning_rate": 5.117530747672603e-05, |
| "loss": 0.6689, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.6736892794799254, |
| "grad_norm": 4.105159282684326, |
| "learning_rate": 5.103923558796203e-05, |
| "loss": 0.8401, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.6741708300728345, |
| "grad_norm": 1.6519443988800049, |
| "learning_rate": 5.090328283133019e-05, |
| "loss": 0.603, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6746523806657437, |
| "grad_norm": 2.9713187217712402, |
| "learning_rate": 5.0767449537632986e-05, |
| "loss": 0.5331, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.6751339312586528, |
| "grad_norm": 2.6898117065429688, |
| "learning_rate": 5.06317360373822e-05, |
| "loss": 0.7912, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.6756154818515621, |
| "grad_norm": 2.4432663917541504, |
| "learning_rate": 5.049614266079813e-05, |
| "loss": 0.5751, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.6760970324444712, |
| "grad_norm": 2.470055103302002, |
| "learning_rate": 5.036066973780882e-05, |
| "loss": 0.483, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.6765785830373804, |
| "grad_norm": 1.4328922033309937, |
| "learning_rate": 5.022531759804918e-05, |
| "loss": 0.6776, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6770601336302895, |
| "grad_norm": 3.8581573963165283, |
| "learning_rate": 5.009008657086025e-05, |
| "loss": 1.1248, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.6775416842231987, |
| "grad_norm": 2.339750289916992, |
| "learning_rate": 4.9954976985288395e-05, |
| "loss": 0.644, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.6780232348161078, |
| "grad_norm": 4.777864456176758, |
| "learning_rate": 4.981998917008448e-05, |
| "loss": 0.6807, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.678504785409017, |
| "grad_norm": 1.9307043552398682, |
| "learning_rate": 4.9685123453703e-05, |
| "loss": 0.6934, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.6789863360019261, |
| "grad_norm": 2.9564454555511475, |
| "learning_rate": 4.955038016430149e-05, |
| "loss": 0.6737, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6794678865948354, |
| "grad_norm": 2.540689468383789, |
| "learning_rate": 4.9415759629739455e-05, |
| "loss": 0.5258, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.6799494371877446, |
| "grad_norm": 1.9452462196350098, |
| "learning_rate": 4.928126217757782e-05, |
| "loss": 0.9632, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.6804309877806537, |
| "grad_norm": 2.1210763454437256, |
| "learning_rate": 4.914688813507797e-05, |
| "loss": 0.981, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.6809125383735629, |
| "grad_norm": 2.85506010055542, |
| "learning_rate": 4.901263782920105e-05, |
| "loss": 0.6188, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.681394088966472, |
| "grad_norm": 2.3669497966766357, |
| "learning_rate": 4.887851158660706e-05, |
| "loss": 0.4394, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.6818756395593812, |
| "grad_norm": 1.358422040939331, |
| "learning_rate": 4.8744509733654184e-05, |
| "loss": 0.6346, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.6823571901522903, |
| "grad_norm": 1.982836127281189, |
| "learning_rate": 4.861063259639793e-05, |
| "loss": 0.5481, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.6828387407451996, |
| "grad_norm": 2.6741561889648438, |
| "learning_rate": 4.847688050059033e-05, |
| "loss": 0.7687, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.6833202913381087, |
| "grad_norm": 2.1041994094848633, |
| "learning_rate": 4.8343253771679155e-05, |
| "loss": 0.6466, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.6838018419310179, |
| "grad_norm": 1.9289573431015015, |
| "learning_rate": 4.82097527348072e-05, |
| "loss": 0.8381, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.684283392523927, |
| "grad_norm": 1.3552838563919067, |
| "learning_rate": 4.8076377714811284e-05, |
| "loss": 0.654, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.6847649431168362, |
| "grad_norm": 2.0205588340759277, |
| "learning_rate": 4.7943129036221735e-05, |
| "loss": 0.6172, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.6852464937097453, |
| "grad_norm": 1.8615128993988037, |
| "learning_rate": 4.781000702326142e-05, |
| "loss": 0.502, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.6857280443026545, |
| "grad_norm": 3.413642406463623, |
| "learning_rate": 4.767701199984497e-05, |
| "loss": 0.7401, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.6862095948955638, |
| "grad_norm": 1.818594217300415, |
| "learning_rate": 4.7544144289578066e-05, |
| "loss": 0.3476, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6866911454884729, |
| "grad_norm": 2.0355210304260254, |
| "learning_rate": 4.7411404215756594e-05, |
| "loss": 0.6143, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.6871726960813821, |
| "grad_norm": 2.3590846061706543, |
| "learning_rate": 4.7278792101365866e-05, |
| "loss": 0.7234, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.6876542466742912, |
| "grad_norm": 4.059364318847656, |
| "learning_rate": 4.714630826907985e-05, |
| "loss": 0.8293, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.6881357972672004, |
| "grad_norm": 5.450283050537109, |
| "learning_rate": 4.701395304126038e-05, |
| "loss": 0.9174, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.6886173478601095, |
| "grad_norm": 1.6425899267196655, |
| "learning_rate": 4.6881726739956375e-05, |
| "loss": 0.482, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6890988984530187, |
| "grad_norm": 1.5643469095230103, |
| "learning_rate": 4.6749629686902984e-05, |
| "loss": 0.8827, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.6895804490459279, |
| "grad_norm": 4.321664333343506, |
| "learning_rate": 4.661766220352097e-05, |
| "loss": 0.5119, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.6900619996388371, |
| "grad_norm": 3.4666759967803955, |
| "learning_rate": 4.64858246109157e-05, |
| "loss": 1.1991, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.6905435502317462, |
| "grad_norm": 4.904367923736572, |
| "learning_rate": 4.63541172298766e-05, |
| "loss": 0.8087, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.6910251008246554, |
| "grad_norm": 2.414597272872925, |
| "learning_rate": 4.622254038087622e-05, |
| "loss": 0.9569, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.6915066514175645, |
| "grad_norm": 2.7692532539367676, |
| "learning_rate": 4.60910943840695e-05, |
| "loss": 0.705, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.6919882020104737, |
| "grad_norm": 1.4934767484664917, |
| "learning_rate": 4.5959779559292985e-05, |
| "loss": 0.3923, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.6924697526033828, |
| "grad_norm": 1.7524209022521973, |
| "learning_rate": 4.582859622606406e-05, |
| "loss": 0.4808, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.6929513031962921, |
| "grad_norm": 1.5376161336898804, |
| "learning_rate": 4.569754470358014e-05, |
| "loss": 0.7108, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.6934328537892013, |
| "grad_norm": 2.8887248039245605, |
| "learning_rate": 4.556662531071796e-05, |
| "loss": 0.7365, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6939144043821104, |
| "grad_norm": 1.2393244504928589, |
| "learning_rate": 4.54358383660327e-05, |
| "loss": 0.6929, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.6943959549750196, |
| "grad_norm": 1.984318494796753, |
| "learning_rate": 4.530518418775733e-05, |
| "loss": 0.5759, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.6948775055679287, |
| "grad_norm": 3.4806067943573, |
| "learning_rate": 4.5174663093801674e-05, |
| "loss": 0.963, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.6953590561608379, |
| "grad_norm": 0.9544948935508728, |
| "learning_rate": 4.504427540175181e-05, |
| "loss": 0.4253, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.695840606753747, |
| "grad_norm": 1.5310953855514526, |
| "learning_rate": 4.491402142886922e-05, |
| "loss": 0.6396, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.6963221573466563, |
| "grad_norm": 1.5412194728851318, |
| "learning_rate": 4.4783901492089984e-05, |
| "loss": 0.8048, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.6968037079395654, |
| "grad_norm": 0.9687153100967407, |
| "learning_rate": 4.465391590802407e-05, |
| "loss": 0.3689, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.6972852585324746, |
| "grad_norm": 2.8478314876556396, |
| "learning_rate": 4.4524064992954516e-05, |
| "loss": 0.6788, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.6977668091253837, |
| "grad_norm": 4.530721187591553, |
| "learning_rate": 4.4394349062836736e-05, |
| "loss": 0.7302, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.6982483597182929, |
| "grad_norm": 2.6478817462921143, |
| "learning_rate": 4.4264768433297565e-05, |
| "loss": 0.8899, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.698729910311202, |
| "grad_norm": 2.0213184356689453, |
| "learning_rate": 4.4135323419634766e-05, |
| "loss": 0.9633, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.6992114609041112, |
| "grad_norm": 3.5041468143463135, |
| "learning_rate": 4.4006014336816035e-05, |
| "loss": 1.0225, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.6996930114970205, |
| "grad_norm": 1.1038386821746826, |
| "learning_rate": 4.387684149947837e-05, |
| "loss": 0.3247, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.7001745620899296, |
| "grad_norm": 2.288525342941284, |
| "learning_rate": 4.374780522192726e-05, |
| "loss": 1.0528, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.7006561126828388, |
| "grad_norm": 4.361599922180176, |
| "learning_rate": 4.3618905818135805e-05, |
| "loss": 0.5694, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.7011376632757479, |
| "grad_norm": 2.766280174255371, |
| "learning_rate": 4.349014360174417e-05, |
| "loss": 0.5461, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.7016192138686571, |
| "grad_norm": 2.252498149871826, |
| "learning_rate": 4.336151888605871e-05, |
| "loss": 0.6858, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.7021007644615662, |
| "grad_norm": 2.4805188179016113, |
| "learning_rate": 4.323303198405117e-05, |
| "loss": 0.9368, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.7025823150544754, |
| "grad_norm": 3.281759738922119, |
| "learning_rate": 4.310468320835796e-05, |
| "loss": 0.9059, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.7030638656473845, |
| "grad_norm": 1.8752919435501099, |
| "learning_rate": 4.297647287127946e-05, |
| "loss": 0.3884, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.7035454162402938, |
| "grad_norm": 2.1307055950164795, |
| "learning_rate": 4.284840128477913e-05, |
| "loss": 0.8951, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.7040269668332029, |
| "grad_norm": 1.1077980995178223, |
| "learning_rate": 4.2720468760482854e-05, |
| "loss": 0.5871, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.7045085174261121, |
| "grad_norm": 1.9439555406570435, |
| "learning_rate": 4.2592675609678135e-05, |
| "loss": 0.5813, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.7049900680190212, |
| "grad_norm": 2.4993746280670166, |
| "learning_rate": 4.24650221433134e-05, |
| "loss": 0.671, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.7054716186119304, |
| "grad_norm": 2.181760787963867, |
| "learning_rate": 4.2337508671997086e-05, |
| "loss": 0.4199, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.7059531692048396, |
| "grad_norm": 2.025681495666504, |
| "learning_rate": 4.221013550599707e-05, |
| "loss": 0.4202, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.7064347197977487, |
| "grad_norm": 3.3956892490386963, |
| "learning_rate": 4.208290295523984e-05, |
| "loss": 0.8027, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.706916270390658, |
| "grad_norm": 2.7422797679901123, |
| "learning_rate": 4.1955811329309746e-05, |
| "loss": 1.2046, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.7073978209835671, |
| "grad_norm": 2.5726048946380615, |
| "learning_rate": 4.182886093744813e-05, |
| "loss": 1.1736, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.7078793715764763, |
| "grad_norm": 2.2286531925201416, |
| "learning_rate": 4.170205208855281e-05, |
| "loss": 0.5392, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.7083609221693854, |
| "grad_norm": 1.7045278549194336, |
| "learning_rate": 4.157538509117714e-05, |
| "loss": 0.7592, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.7088424727622946, |
| "grad_norm": 3.2489068508148193, |
| "learning_rate": 4.144886025352934e-05, |
| "loss": 0.6095, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.7093240233552037, |
| "grad_norm": 2.089620351791382, |
| "learning_rate": 4.13224778834717e-05, |
| "loss": 0.4248, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.7098055739481129, |
| "grad_norm": 1.5423088073730469, |
| "learning_rate": 4.1196238288519874e-05, |
| "loss": 0.2669, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.710287124541022, |
| "grad_norm": 1.412553071975708, |
| "learning_rate": 4.107014177584211e-05, |
| "loss": 0.3754, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.7107686751339313, |
| "grad_norm": 1.9968864917755127, |
| "learning_rate": 4.094418865225853e-05, |
| "loss": 0.5111, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.7112502257268404, |
| "grad_norm": 2.4750101566314697, |
| "learning_rate": 4.081837922424027e-05, |
| "loss": 0.6448, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.7117317763197496, |
| "grad_norm": 2.239208221435547, |
| "learning_rate": 4.069271379790891e-05, |
| "loss": 0.4287, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.7122133269126587, |
| "grad_norm": 7.368317604064941, |
| "learning_rate": 4.0567192679035636e-05, |
| "loss": 1.0325, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.7126948775055679, |
| "grad_norm": 1.8279064893722534, |
| "learning_rate": 4.044181617304048e-05, |
| "loss": 0.2988, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.7131764280984771, |
| "grad_norm": 1.781420111656189, |
| "learning_rate": 4.03165845849916e-05, |
| "loss": 0.5195, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.7136579786913863, |
| "grad_norm": 4.007209777832031, |
| "learning_rate": 4.019149821960455e-05, |
| "loss": 0.6838, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.7141395292842955, |
| "grad_norm": 2.4389944076538086, |
| "learning_rate": 4.006655738124152e-05, |
| "loss": 0.773, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.7146210798772046, |
| "grad_norm": 1.686699628829956, |
| "learning_rate": 3.9941762373910586e-05, |
| "loss": 0.3119, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.7151026304701138, |
| "grad_norm": 3.3148396015167236, |
| "learning_rate": 3.9817113501265016e-05, |
| "loss": 0.5543, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.7155841810630229, |
| "grad_norm": 1.4443938732147217, |
| "learning_rate": 3.9692611066602516e-05, |
| "loss": 0.6294, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.7160657316559321, |
| "grad_norm": 1.5491441488265991, |
| "learning_rate": 3.956825537286436e-05, |
| "loss": 0.312, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.7165472822488412, |
| "grad_norm": 2.059058427810669, |
| "learning_rate": 3.944404672263494e-05, |
| "loss": 0.7961, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.7170288328417505, |
| "grad_norm": 2.7234976291656494, |
| "learning_rate": 3.931998541814069e-05, |
| "loss": 0.7906, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.7175103834346596, |
| "grad_norm": 3.2691609859466553, |
| "learning_rate": 3.919607176124966e-05, |
| "loss": 0.9895, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.7179919340275688, |
| "grad_norm": 4.118697166442871, |
| "learning_rate": 3.9072306053470566e-05, |
| "loss": 0.5686, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.7184734846204779, |
| "grad_norm": 0.7744289040565491, |
| "learning_rate": 3.8948688595952164e-05, |
| "loss": 0.3498, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.7189550352133871, |
| "grad_norm": 2.5896894931793213, |
| "learning_rate": 3.882521968948246e-05, |
| "loss": 0.9491, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.7194365858062963, |
| "grad_norm": 3.2697811126708984, |
| "learning_rate": 3.8701899634488014e-05, |
| "loss": 0.6669, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.7199181363992054, |
| "grad_norm": 1.5220059156417847, |
| "learning_rate": 3.857872873103322e-05, |
| "loss": 0.5544, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.7203996869921147, |
| "grad_norm": 2.07112717628479, |
| "learning_rate": 3.8455707278819507e-05, |
| "loss": 0.5567, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.7208812375850238, |
| "grad_norm": 2.2222588062286377, |
| "learning_rate": 3.833283557718471e-05, |
| "loss": 0.4158, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.721362788177933, |
| "grad_norm": 2.1287026405334473, |
| "learning_rate": 3.821011392510228e-05, |
| "loss": 0.4907, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.7218443387708421, |
| "grad_norm": 3.9196484088897705, |
| "learning_rate": 3.808754262118046e-05, |
| "loss": 0.3054, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.7223258893637513, |
| "grad_norm": 1.4229110479354858, |
| "learning_rate": 3.796512196366182e-05, |
| "loss": 0.7671, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7228074399566604, |
| "grad_norm": 1.968087911605835, |
| "learning_rate": 3.784285225042229e-05, |
| "loss": 0.9449, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.7232889905495696, |
| "grad_norm": 3.0822694301605225, |
| "learning_rate": 3.772073377897052e-05, |
| "loss": 0.8556, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.7237705411424787, |
| "grad_norm": 2.186847686767578, |
| "learning_rate": 3.7598766846447184e-05, |
| "loss": 0.364, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.724252091735388, |
| "grad_norm": 2.3544540405273438, |
| "learning_rate": 3.747695174962423e-05, |
| "loss": 0.9461, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.7247336423282971, |
| "grad_norm": 2.437185525894165, |
| "learning_rate": 3.7355288784904116e-05, |
| "loss": 0.6636, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.7252151929212063, |
| "grad_norm": 4.94728422164917, |
| "learning_rate": 3.7233778248319176e-05, |
| "loss": 0.4558, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.7256967435141155, |
| "grad_norm": 1.2178354263305664, |
| "learning_rate": 3.7112420435530845e-05, |
| "loss": 0.4775, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.7261782941070246, |
| "grad_norm": 2.4430301189422607, |
| "learning_rate": 3.69912156418289e-05, |
| "loss": 0.385, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.7266598446999338, |
| "grad_norm": 1.444451093673706, |
| "learning_rate": 3.687016416213084e-05, |
| "loss": 0.5812, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.7271413952928429, |
| "grad_norm": 1.2562270164489746, |
| "learning_rate": 3.674926629098113e-05, |
| "loss": 0.2545, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.7276229458857522, |
| "grad_norm": 3.6411561965942383, |
| "learning_rate": 3.6628522322550394e-05, |
| "loss": 0.4228, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.7281044964786613, |
| "grad_norm": 2.335496425628662, |
| "learning_rate": 3.6507932550634846e-05, |
| "loss": 0.4863, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.7285860470715705, |
| "grad_norm": 2.558220386505127, |
| "learning_rate": 3.638749726865552e-05, |
| "loss": 0.31, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.7290675976644796, |
| "grad_norm": 1.8757025003433228, |
| "learning_rate": 3.6267216769657485e-05, |
| "loss": 0.722, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.7295491482573888, |
| "grad_norm": 1.8420372009277344, |
| "learning_rate": 3.6147091346309224e-05, |
| "loss": 0.7818, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.7300306988502979, |
| "grad_norm": 3.078178644180298, |
| "learning_rate": 3.602712129090189e-05, |
| "loss": 0.5627, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.7305122494432071, |
| "grad_norm": 1.5147814750671387, |
| "learning_rate": 3.590730689534857e-05, |
| "loss": 0.6291, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.7309938000361162, |
| "grad_norm": 1.8767939805984497, |
| "learning_rate": 3.578764845118362e-05, |
| "loss": 0.3796, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.7314753506290255, |
| "grad_norm": 3.034921646118164, |
| "learning_rate": 3.566814624956194e-05, |
| "loss": 0.5662, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.7319569012219346, |
| "grad_norm": 0.8887065649032593, |
| "learning_rate": 3.554880058125819e-05, |
| "loss": 0.4554, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.7324384518148438, |
| "grad_norm": 3.390536308288574, |
| "learning_rate": 3.5429611736666235e-05, |
| "loss": 0.543, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.732920002407753, |
| "grad_norm": 1.751465082168579, |
| "learning_rate": 3.53105800057983e-05, |
| "loss": 0.4838, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.7334015530006621, |
| "grad_norm": 1.9569932222366333, |
| "learning_rate": 3.519170567828435e-05, |
| "loss": 0.557, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.7338831035935713, |
| "grad_norm": 1.79653799533844, |
| "learning_rate": 3.507298904337134e-05, |
| "loss": 0.7246, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.7343646541864804, |
| "grad_norm": 1.8159929513931274, |
| "learning_rate": 3.495443038992253e-05, |
| "loss": 0.3555, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.7348462047793897, |
| "grad_norm": 3.199507474899292, |
| "learning_rate": 3.4836030006416775e-05, |
| "loss": 0.5046, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.7353277553722988, |
| "grad_norm": 2.1682991981506348, |
| "learning_rate": 3.471778818094785e-05, |
| "loss": 0.7456, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.735809305965208, |
| "grad_norm": 1.0989952087402344, |
| "learning_rate": 3.459970520122364e-05, |
| "loss": 0.5804, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.7362908565581171, |
| "grad_norm": 1.0416721105575562, |
| "learning_rate": 3.44817813545656e-05, |
| "loss": 0.3529, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.7367724071510263, |
| "grad_norm": 4.703090667724609, |
| "learning_rate": 3.4364016927907974e-05, |
| "loss": 0.5961, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.7372539577439354, |
| "grad_norm": 2.4348576068878174, |
| "learning_rate": 3.424641220779711e-05, |
| "loss": 1.0376, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.7377355083368446, |
| "grad_norm": 1.9386546611785889, |
| "learning_rate": 3.412896748039067e-05, |
| "loss": 0.5634, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.7382170589297538, |
| "grad_norm": 1.232994556427002, |
| "learning_rate": 3.401168303145713e-05, |
| "loss": 0.4579, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.738698609522663, |
| "grad_norm": 1.5528676509857178, |
| "learning_rate": 3.3894559146374924e-05, |
| "loss": 0.4419, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.7391801601155722, |
| "grad_norm": 2.1310155391693115, |
| "learning_rate": 3.37775961101318e-05, |
| "loss": 0.5981, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.7396617107084813, |
| "grad_norm": 3.4211032390594482, |
| "learning_rate": 3.366079420732413e-05, |
| "loss": 1.0065, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7401432613013905, |
| "grad_norm": 2.1246776580810547, |
| "learning_rate": 3.3544153722156216e-05, |
| "loss": 0.4723, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.7406248118942996, |
| "grad_norm": 3.7902305126190186, |
| "learning_rate": 3.3427674938439594e-05, |
| "loss": 0.7686, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.7411063624872088, |
| "grad_norm": 1.1903222799301147, |
| "learning_rate": 3.3311358139592317e-05, |
| "loss": 0.425, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.741587913080118, |
| "grad_norm": 2.2395949363708496, |
| "learning_rate": 3.319520360863837e-05, |
| "loss": 0.511, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7420694636730272, |
| "grad_norm": 3.4029898643493652, |
| "learning_rate": 3.3079211628206854e-05, |
| "loss": 0.3296, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.7425510142659363, |
| "grad_norm": 2.042520523071289, |
| "learning_rate": 3.296338248053129e-05, |
| "loss": 0.3447, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.7430325648588455, |
| "grad_norm": 1.8394999504089355, |
| "learning_rate": 3.2847716447449096e-05, |
| "loss": 0.8341, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.7435141154517546, |
| "grad_norm": 1.8122001886367798, |
| "learning_rate": 3.2732213810400745e-05, |
| "loss": 0.5026, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.7439956660446638, |
| "grad_norm": 1.094519853591919, |
| "learning_rate": 3.261687485042915e-05, |
| "loss": 0.3282, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.7444772166375729, |
| "grad_norm": 1.8451745510101318, |
| "learning_rate": 3.250169984817897e-05, |
| "loss": 0.598, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.7449587672304822, |
| "grad_norm": 2.1285202503204346, |
| "learning_rate": 3.238668908389586e-05, |
| "loss": 0.5509, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.7454403178233914, |
| "grad_norm": 5.398340702056885, |
| "learning_rate": 3.227184283742591e-05, |
| "loss": 0.8054, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.7459218684163005, |
| "grad_norm": 4.164507865905762, |
| "learning_rate": 3.215716138821488e-05, |
| "loss": 0.6616, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.7464034190092097, |
| "grad_norm": 2.449247360229492, |
| "learning_rate": 3.204264501530756e-05, |
| "loss": 0.5928, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.7468849696021188, |
| "grad_norm": 2.73966121673584, |
| "learning_rate": 3.192829399734706e-05, |
| "loss": 0.8957, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.747366520195028, |
| "grad_norm": 1.7563470602035522, |
| "learning_rate": 3.181410861257413e-05, |
| "loss": 0.7951, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.7478480707879371, |
| "grad_norm": 2.797558307647705, |
| "learning_rate": 3.170008913882656e-05, |
| "loss": 0.4921, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.7483296213808464, |
| "grad_norm": 0.7847899794578552, |
| "learning_rate": 3.1586235853538325e-05, |
| "loss": 0.6741, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.7488111719737555, |
| "grad_norm": 3.5709075927734375, |
| "learning_rate": 3.1472549033739126e-05, |
| "loss": 0.3847, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.7492927225666647, |
| "grad_norm": 2.81365704536438, |
| "learning_rate": 3.1359028956053615e-05, |
| "loss": 0.4534, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.7497742731595738, |
| "grad_norm": 2.762085199356079, |
| "learning_rate": 3.1245675896700685e-05, |
| "loss": 1.0397, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.750255823752483, |
| "grad_norm": 1.423474907875061, |
| "learning_rate": 3.113249013149284e-05, |
| "loss": 0.3242, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.7507373743453921, |
| "grad_norm": 2.8285672664642334, |
| "learning_rate": 3.101947193583557e-05, |
| "loss": 0.7633, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.7512189249383013, |
| "grad_norm": 1.8334670066833496, |
| "learning_rate": 3.0906621584726546e-05, |
| "loss": 0.9668, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7517004755312104, |
| "grad_norm": 4.472631454467773, |
| "learning_rate": 3.079393935275513e-05, |
| "loss": 0.752, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.7521820261241197, |
| "grad_norm": 1.2868741750717163, |
| "learning_rate": 3.068142551410155e-05, |
| "loss": 0.2786, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.7526635767170289, |
| "grad_norm": 1.9108686447143555, |
| "learning_rate": 3.0569080342536347e-05, |
| "loss": 0.4188, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.753145127309938, |
| "grad_norm": 3.5402438640594482, |
| "learning_rate": 3.0456904111419572e-05, |
| "loss": 0.7682, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.7536266779028472, |
| "grad_norm": 2.0236620903015137, |
| "learning_rate": 3.034489709370033e-05, |
| "loss": 0.4578, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.7541082284957563, |
| "grad_norm": 1.6213475465774536, |
| "learning_rate": 3.0233059561915855e-05, |
| "loss": 0.625, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.7545897790886655, |
| "grad_norm": 2.4623217582702637, |
| "learning_rate": 3.01213917881911e-05, |
| "loss": 0.6995, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.7550713296815746, |
| "grad_norm": 7.693182468414307, |
| "learning_rate": 3.0009894044237907e-05, |
| "loss": 0.5778, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.7555528802744839, |
| "grad_norm": 1.2127306461334229, |
| "learning_rate": 2.9898566601354418e-05, |
| "loss": 0.7593, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.756034430867393, |
| "grad_norm": 2.333127021789551, |
| "learning_rate": 2.9787409730424374e-05, |
| "loss": 0.5861, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7565159814603022, |
| "grad_norm": 2.391322612762451, |
| "learning_rate": 2.96764237019165e-05, |
| "loss": 0.6207, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.7569975320532113, |
| "grad_norm": 2.2498581409454346, |
| "learning_rate": 2.9565608785883815e-05, |
| "loss": 0.7908, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.7574790826461205, |
| "grad_norm": 1.9790698289871216, |
| "learning_rate": 2.9454965251962973e-05, |
| "loss": 0.9975, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.7579606332390296, |
| "grad_norm": 1.974587321281433, |
| "learning_rate": 2.9344493369373637e-05, |
| "loss": 0.4825, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.7584421838319388, |
| "grad_norm": 4.309758186340332, |
| "learning_rate": 2.9234193406917833e-05, |
| "loss": 0.6722, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7589237344248481, |
| "grad_norm": 8.978368759155273, |
| "learning_rate": 2.912406563297916e-05, |
| "loss": 1.2436, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.7594052850177572, |
| "grad_norm": 2.3713653087615967, |
| "learning_rate": 2.901411031552236e-05, |
| "loss": 0.6931, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.7598868356106664, |
| "grad_norm": 2.390103340148926, |
| "learning_rate": 2.8904327722092495e-05, |
| "loss": 0.7875, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.7603683862035755, |
| "grad_norm": 2.7292089462280273, |
| "learning_rate": 2.879471811981437e-05, |
| "loss": 0.9226, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.7608499367964847, |
| "grad_norm": 1.6569634675979614, |
| "learning_rate": 2.868528177539187e-05, |
| "loss": 0.9006, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7613314873893938, |
| "grad_norm": 2.9300882816314697, |
| "learning_rate": 2.8576018955107285e-05, |
| "loss": 0.7848, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.761813037982303, |
| "grad_norm": 3.6729085445404053, |
| "learning_rate": 2.8466929924820705e-05, |
| "loss": 0.9429, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.7622945885752122, |
| "grad_norm": 2.4138500690460205, |
| "learning_rate": 2.8358014949969334e-05, |
| "loss": 0.8423, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.7627761391681214, |
| "grad_norm": 2.518306255340576, |
| "learning_rate": 2.8249274295566864e-05, |
| "loss": 0.5066, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.7632576897610305, |
| "grad_norm": 2.244164228439331, |
| "learning_rate": 2.8140708226202884e-05, |
| "loss": 0.5005, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.7637392403539397, |
| "grad_norm": 2.3682243824005127, |
| "learning_rate": 2.803231700604204e-05, |
| "loss": 0.5431, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.7642207909468488, |
| "grad_norm": 2.6888134479522705, |
| "learning_rate": 2.7924100898823702e-05, |
| "loss": 0.6596, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.764702341539758, |
| "grad_norm": 2.3632571697235107, |
| "learning_rate": 2.7816060167861002e-05, |
| "loss": 0.6924, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.7651838921326672, |
| "grad_norm": 1.6576805114746094, |
| "learning_rate": 2.7708195076040445e-05, |
| "loss": 0.5694, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.7656654427255764, |
| "grad_norm": 1.40910005569458, |
| "learning_rate": 2.760050588582114e-05, |
| "loss": 0.6316, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7661469933184856, |
| "grad_norm": 4.426036834716797, |
| "learning_rate": 2.749299285923417e-05, |
| "loss": 0.7936, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.7666285439113947, |
| "grad_norm": 2.89945387840271, |
| "learning_rate": 2.7385656257881997e-05, |
| "loss": 0.4305, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.7671100945043039, |
| "grad_norm": 7.421449184417725, |
| "learning_rate": 2.7278496342937788e-05, |
| "loss": 0.4538, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.767591645097213, |
| "grad_norm": 1.8523188829421997, |
| "learning_rate": 2.717151337514482e-05, |
| "loss": 0.4911, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.7680731956901222, |
| "grad_norm": 1.7590643167495728, |
| "learning_rate": 2.7064707614815776e-05, |
| "loss": 0.5798, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.7685547462830313, |
| "grad_norm": 2.223667860031128, |
| "learning_rate": 2.6958079321832185e-05, |
| "loss": 0.4897, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.7690362968759406, |
| "grad_norm": 2.7905259132385254, |
| "learning_rate": 2.6851628755643776e-05, |
| "loss": 0.5537, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.7695178474688497, |
| "grad_norm": 4.461797714233398, |
| "learning_rate": 2.6745356175267765e-05, |
| "loss": 0.4501, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.7699993980617589, |
| "grad_norm": 4.621416091918945, |
| "learning_rate": 2.6639261839288343e-05, |
| "loss": 0.5464, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.770480948654668, |
| "grad_norm": 6.821358680725098, |
| "learning_rate": 2.6533346005855987e-05, |
| "loss": 0.824, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7709624992475772, |
| "grad_norm": 1.811643123626709, |
| "learning_rate": 2.6427608932686843e-05, |
| "loss": 0.43, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.7714440498404863, |
| "grad_norm": 4.649533271789551, |
| "learning_rate": 2.6322050877062064e-05, |
| "loss": 0.7568, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.7719256004333955, |
| "grad_norm": 4.929988861083984, |
| "learning_rate": 2.6216672095827266e-05, |
| "loss": 0.8517, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.7724071510263048, |
| "grad_norm": 1.7828702926635742, |
| "learning_rate": 2.6111472845391827e-05, |
| "loss": 0.5364, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.7728887016192139, |
| "grad_norm": 0.7335327863693237, |
| "learning_rate": 2.6006453381728236e-05, |
| "loss": 0.306, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.7733702522121231, |
| "grad_norm": 2.5753631591796875, |
| "learning_rate": 2.5901613960371585e-05, |
| "loss": 0.3027, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.7738518028050322, |
| "grad_norm": 2.3606786727905273, |
| "learning_rate": 2.5796954836418884e-05, |
| "loss": 0.6378, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.7743333533979414, |
| "grad_norm": 2.3741252422332764, |
| "learning_rate": 2.569247626452842e-05, |
| "loss": 0.7027, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.7748149039908505, |
| "grad_norm": 3.609069347381592, |
| "learning_rate": 2.558817849891918e-05, |
| "loss": 0.5579, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.7752964545837597, |
| "grad_norm": 3.4135854244232178, |
| "learning_rate": 2.548406179337015e-05, |
| "loss": 0.6868, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7757780051766688, |
| "grad_norm": 1.7354921102523804, |
| "learning_rate": 2.5380126401219807e-05, |
| "loss": 0.4444, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.7762595557695781, |
| "grad_norm": 3.9232022762298584, |
| "learning_rate": 2.527637257536547e-05, |
| "loss": 0.5952, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.7767411063624872, |
| "grad_norm": 3.5635814666748047, |
| "learning_rate": 2.517280056826262e-05, |
| "loss": 0.6536, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.7772226569553964, |
| "grad_norm": 2.9686269760131836, |
| "learning_rate": 2.5069410631924385e-05, |
| "loss": 0.8749, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.7777042075483055, |
| "grad_norm": 3.6469061374664307, |
| "learning_rate": 2.4966203017920818e-05, |
| "loss": 0.6617, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.7781857581412147, |
| "grad_norm": 2.2301876544952393, |
| "learning_rate": 2.4863177977378392e-05, |
| "loss": 0.5759, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.7786673087341239, |
| "grad_norm": 1.627908706665039, |
| "learning_rate": 2.4760335760979312e-05, |
| "loss": 0.6987, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.779148859327033, |
| "grad_norm": 3.955803632736206, |
| "learning_rate": 2.4657676618960944e-05, |
| "loss": 0.8698, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.7796304099199423, |
| "grad_norm": 2.231527328491211, |
| "learning_rate": 2.455520080111522e-05, |
| "loss": 0.996, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.7801119605128514, |
| "grad_norm": 3.9988443851470947, |
| "learning_rate": 2.4452908556787912e-05, |
| "loss": 0.771, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7805935111057606, |
| "grad_norm": 2.5822057723999023, |
| "learning_rate": 2.4350800134878203e-05, |
| "loss": 0.6595, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.7810750616986697, |
| "grad_norm": 2.1049022674560547, |
| "learning_rate": 2.4248875783837987e-05, |
| "loss": 0.2905, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.7815566122915789, |
| "grad_norm": 3.057828426361084, |
| "learning_rate": 2.414713575167129e-05, |
| "loss": 0.455, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.782038162884488, |
| "grad_norm": 3.8854563236236572, |
| "learning_rate": 2.4045580285933557e-05, |
| "loss": 0.5088, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.7825197134773972, |
| "grad_norm": 3.774930715560913, |
| "learning_rate": 2.3944209633731242e-05, |
| "loss": 0.828, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7830012640703063, |
| "grad_norm": 2.168914794921875, |
| "learning_rate": 2.3843024041721053e-05, |
| "loss": 0.499, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.7834828146632156, |
| "grad_norm": 4.615421772003174, |
| "learning_rate": 2.3742023756109456e-05, |
| "loss": 0.579, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.7839643652561247, |
| "grad_norm": 2.246866226196289, |
| "learning_rate": 2.3641209022651976e-05, |
| "loss": 0.7131, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.7844459158490339, |
| "grad_norm": 2.1677560806274414, |
| "learning_rate": 2.3540580086652675e-05, |
| "loss": 0.7653, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.7849274664419431, |
| "grad_norm": 2.1894803047180176, |
| "learning_rate": 2.344013719296353e-05, |
| "loss": 0.5755, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7854090170348522, |
| "grad_norm": 1.5638082027435303, |
| "learning_rate": 2.3339880585983842e-05, |
| "loss": 0.8477, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.7858905676277614, |
| "grad_norm": 0.8193626999855042, |
| "learning_rate": 2.3239810509659597e-05, |
| "loss": 0.5981, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.7863721182206705, |
| "grad_norm": 2.181163787841797, |
| "learning_rate": 2.313992720748295e-05, |
| "loss": 0.6006, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.7868536688135798, |
| "grad_norm": 2.4740288257598877, |
| "learning_rate": 2.304023092249159e-05, |
| "loss": 0.7376, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.7873352194064889, |
| "grad_norm": 3.2138454914093018, |
| "learning_rate": 2.2940721897268136e-05, |
| "loss": 1.0772, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7878167699993981, |
| "grad_norm": 3.062891960144043, |
| "learning_rate": 2.2841400373939592e-05, |
| "loss": 0.9387, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.7882983205923072, |
| "grad_norm": 1.856158971786499, |
| "learning_rate": 2.274226659417671e-05, |
| "loss": 0.891, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.7887798711852164, |
| "grad_norm": 1.4928967952728271, |
| "learning_rate": 2.2643320799193402e-05, |
| "loss": 0.3832, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.7892614217781255, |
| "grad_norm": 2.0665640830993652, |
| "learning_rate": 2.2544563229746218e-05, |
| "loss": 0.5602, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.7897429723710347, |
| "grad_norm": 2.119544506072998, |
| "learning_rate": 2.2445994126133708e-05, |
| "loss": 0.8366, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7902245229639439, |
| "grad_norm": 1.5489791631698608, |
| "learning_rate": 2.234761372819577e-05, |
| "loss": 0.5582, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.7907060735568531, |
| "grad_norm": 1.4361308813095093, |
| "learning_rate": 2.2249422275313214e-05, |
| "loss": 0.3052, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.7911876241497622, |
| "grad_norm": 2.6924610137939453, |
| "learning_rate": 2.215142000640714e-05, |
| "loss": 0.4776, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.7916691747426714, |
| "grad_norm": 2.889003038406372, |
| "learning_rate": 2.2053607159938195e-05, |
| "loss": 0.5702, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.7921507253355806, |
| "grad_norm": 3.6985273361206055, |
| "learning_rate": 2.1955983973906236e-05, |
| "loss": 0.6528, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7926322759284897, |
| "grad_norm": 1.6268264055252075, |
| "learning_rate": 2.1858550685849578e-05, |
| "loss": 0.7206, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.793113826521399, |
| "grad_norm": 2.64440655708313, |
| "learning_rate": 2.17613075328445e-05, |
| "loss": 0.742, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.793595377114308, |
| "grad_norm": 2.0996243953704834, |
| "learning_rate": 2.1664254751504642e-05, |
| "loss": 0.6661, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.7940769277072173, |
| "grad_norm": 1.764198660850525, |
| "learning_rate": 2.1567392577980393e-05, |
| "loss": 0.3963, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.7945584783001264, |
| "grad_norm": 2.0087742805480957, |
| "learning_rate": 2.1470721247958404e-05, |
| "loss": 0.906, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7950400288930356, |
| "grad_norm": 2.0296401977539062, |
| "learning_rate": 2.137424099666091e-05, |
| "loss": 0.7582, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.7955215794859447, |
| "grad_norm": 4.00960636138916, |
| "learning_rate": 2.1277952058845284e-05, |
| "loss": 0.5171, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.7960031300788539, |
| "grad_norm": 1.5805654525756836, |
| "learning_rate": 2.118185466880327e-05, |
| "loss": 0.867, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.796484680671763, |
| "grad_norm": 1.1289174556732178, |
| "learning_rate": 2.1085949060360654e-05, |
| "loss": 0.7591, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.7969662312646723, |
| "grad_norm": 2.465733528137207, |
| "learning_rate": 2.0990235466876517e-05, |
| "loss": 0.7738, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.7974477818575814, |
| "grad_norm": 1.9576550722122192, |
| "learning_rate": 2.089471412124274e-05, |
| "loss": 0.5989, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.7979293324504906, |
| "grad_norm": 2.245087146759033, |
| "learning_rate": 2.079938525588342e-05, |
| "loss": 0.6204, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.7984108830433998, |
| "grad_norm": 2.1866295337677, |
| "learning_rate": 2.0704249102754324e-05, |
| "loss": 0.986, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.7988924336363089, |
| "grad_norm": 2.3913660049438477, |
| "learning_rate": 2.0609305893342278e-05, |
| "loss": 0.4221, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.7993739842292181, |
| "grad_norm": 2.4920084476470947, |
| "learning_rate": 2.0514555858664663e-05, |
| "loss": 0.7775, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7998555348221272, |
| "grad_norm": 1.5726318359375, |
| "learning_rate": 2.0419999229268805e-05, |
| "loss": 0.3526, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.8003370854150365, |
| "grad_norm": 3.00834584236145, |
| "learning_rate": 2.032563623523147e-05, |
| "loss": 0.7506, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.8008186360079456, |
| "grad_norm": 2.3384523391723633, |
| "learning_rate": 2.0231467106158186e-05, |
| "loss": 0.5321, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.8013001866008548, |
| "grad_norm": 2.5204219818115234, |
| "learning_rate": 2.0137492071182863e-05, |
| "loss": 0.8753, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.8017817371937639, |
| "grad_norm": 1.7203079462051392, |
| "learning_rate": 2.0043711358967043e-05, |
| "loss": 1.6074, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.8022632877866731, |
| "grad_norm": 3.3237717151641846, |
| "learning_rate": 1.9950125197699508e-05, |
| "loss": 0.5971, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.8027448383795822, |
| "grad_norm": 3.3281543254852295, |
| "learning_rate": 1.985673381509565e-05, |
| "loss": 0.6587, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.8032263889724914, |
| "grad_norm": 3.3492562770843506, |
| "learning_rate": 1.9763537438396894e-05, |
| "loss": 0.9115, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.8037079395654005, |
| "grad_norm": 1.2501789331436157, |
| "learning_rate": 1.96705362943702e-05, |
| "loss": 0.4328, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.8041894901583098, |
| "grad_norm": 1.199196219444275, |
| "learning_rate": 1.9577730609307454e-05, |
| "loss": 0.283, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.804671040751219, |
| "grad_norm": 2.56499981880188, |
| "learning_rate": 1.9485120609024975e-05, |
| "loss": 0.6122, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.8051525913441281, |
| "grad_norm": 2.273665189743042, |
| "learning_rate": 1.9392706518862935e-05, |
| "loss": 0.9137, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.8056341419370373, |
| "grad_norm": 2.3677797317504883, |
| "learning_rate": 1.9300488563684804e-05, |
| "loss": 0.398, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.8061156925299464, |
| "grad_norm": 1.3238352537155151, |
| "learning_rate": 1.920846696787684e-05, |
| "loss": 0.8935, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.8065972431228556, |
| "grad_norm": 2.1932382583618164, |
| "learning_rate": 1.9116641955347446e-05, |
| "loss": 0.5614, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.8070787937157647, |
| "grad_norm": 1.0041841268539429, |
| "learning_rate": 1.9025013749526767e-05, |
| "loss": 0.5811, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.807560344308674, |
| "grad_norm": 3.299774169921875, |
| "learning_rate": 1.8933582573366036e-05, |
| "loss": 0.8145, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.8080418949015831, |
| "grad_norm": 2.4343202114105225, |
| "learning_rate": 1.8842348649337116e-05, |
| "loss": 0.6614, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.8085234454944923, |
| "grad_norm": 2.8237218856811523, |
| "learning_rate": 1.875131219943187e-05, |
| "loss": 1.1075, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.8090049960874014, |
| "grad_norm": 2.5553195476531982, |
| "learning_rate": 1.8660473445161663e-05, |
| "loss": 0.604, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.8094865466803106, |
| "grad_norm": 1.5066719055175781, |
| "learning_rate": 1.856983260755686e-05, |
| "loss": 0.6099, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.8099680972732197, |
| "grad_norm": 1.4844105243682861, |
| "learning_rate": 1.8479389907166223e-05, |
| "loss": 0.2163, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.8104496478661289, |
| "grad_norm": 2.3299903869628906, |
| "learning_rate": 1.8389145564056387e-05, |
| "loss": 0.4654, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.810931198459038, |
| "grad_norm": 1.9507116079330444, |
| "learning_rate": 1.829909979781137e-05, |
| "loss": 0.3359, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.8114127490519473, |
| "grad_norm": 2.243999481201172, |
| "learning_rate": 1.820925282753201e-05, |
| "loss": 0.5519, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.8118942996448565, |
| "grad_norm": 1.5406464338302612, |
| "learning_rate": 1.8119604871835437e-05, |
| "loss": 0.6571, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.8123758502377656, |
| "grad_norm": 2.3631114959716797, |
| "learning_rate": 1.8030156148854492e-05, |
| "loss": 0.7404, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.8128574008306748, |
| "grad_norm": 5.411351680755615, |
| "learning_rate": 1.7940906876237284e-05, |
| "loss": 0.4588, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.8133389514235839, |
| "grad_norm": 1.9725611209869385, |
| "learning_rate": 1.78518572711466e-05, |
| "loss": 0.6191, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.8138205020164931, |
| "grad_norm": 2.150035858154297, |
| "learning_rate": 1.776300755025939e-05, |
| "loss": 1.0107, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.8143020526094022, |
| "grad_norm": 1.634321928024292, |
| "learning_rate": 1.767435792976626e-05, |
| "loss": 0.42, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.8147836032023115, |
| "grad_norm": 1.756266474723816, |
| "learning_rate": 1.7585908625370905e-05, |
| "loss": 0.7629, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.8152651537952206, |
| "grad_norm": 2.3073647022247314, |
| "learning_rate": 1.749765985228963e-05, |
| "loss": 0.8333, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.8157467043881298, |
| "grad_norm": 2.3972604274749756, |
| "learning_rate": 1.740961182525077e-05, |
| "loss": 0.4132, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.8162282549810389, |
| "grad_norm": 2.476473569869995, |
| "learning_rate": 1.7321764758494252e-05, |
| "loss": 0.8872, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.8167098055739481, |
| "grad_norm": 2.487661361694336, |
| "learning_rate": 1.7234118865770987e-05, |
| "loss": 0.685, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.8171913561668572, |
| "grad_norm": 1.8796521425247192, |
| "learning_rate": 1.7146674360342373e-05, |
| "loss": 0.6886, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.8176729067597664, |
| "grad_norm": 1.8261488676071167, |
| "learning_rate": 1.7059431454979824e-05, |
| "loss": 0.8278, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.8181544573526757, |
| "grad_norm": 4.842952728271484, |
| "learning_rate": 1.6972390361964195e-05, |
| "loss": 1.1315, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.8186360079455848, |
| "grad_norm": 1.3907809257507324, |
| "learning_rate": 1.688555129308531e-05, |
| "loss": 0.5094, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.819117558538494, |
| "grad_norm": 2.8094334602355957, |
| "learning_rate": 1.6798914459641434e-05, |
| "loss": 0.6765, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.8195991091314031, |
| "grad_norm": 1.7565284967422485, |
| "learning_rate": 1.6712480072438662e-05, |
| "loss": 0.8474, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.8200806597243123, |
| "grad_norm": 3.5661885738372803, |
| "learning_rate": 1.6626248341790596e-05, |
| "loss": 1.1639, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.8205622103172214, |
| "grad_norm": 2.280489683151245, |
| "learning_rate": 1.6540219477517684e-05, |
| "loss": 0.6265, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.8210437609101306, |
| "grad_norm": 1.485849380493164, |
| "learning_rate": 1.6454393688946767e-05, |
| "loss": 0.4715, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.8215253115030398, |
| "grad_norm": 2.863246202468872, |
| "learning_rate": 1.6368771184910557e-05, |
| "loss": 0.4076, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.822006862095949, |
| "grad_norm": 1.4689639806747437, |
| "learning_rate": 1.6283352173747145e-05, |
| "loss": 0.6784, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.8224884126888581, |
| "grad_norm": 1.777126431465149, |
| "learning_rate": 1.619813686329946e-05, |
| "loss": 0.6577, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.8229699632817673, |
| "grad_norm": 2.7305054664611816, |
| "learning_rate": 1.611312546091476e-05, |
| "loss": 0.8134, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.8234515138746764, |
| "grad_norm": 1.6367037296295166, |
| "learning_rate": 1.6028318173444202e-05, |
| "loss": 0.7774, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.8239330644675856, |
| "grad_norm": 1.054276943206787, |
| "learning_rate": 1.594371520724226e-05, |
| "loss": 0.6494, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.8244146150604948, |
| "grad_norm": 1.8237320184707642, |
| "learning_rate": 1.5859316768166244e-05, |
| "loss": 0.784, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.824896165653404, |
| "grad_norm": 2.3219659328460693, |
| "learning_rate": 1.5775123061575836e-05, |
| "loss": 0.8381, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.8253777162463132, |
| "grad_norm": 2.7865233421325684, |
| "learning_rate": 1.569113429233252e-05, |
| "loss": 0.4768, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.8258592668392223, |
| "grad_norm": 2.3359994888305664, |
| "learning_rate": 1.5607350664799157e-05, |
| "loss": 0.7649, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.8263408174321315, |
| "grad_norm": 4.821800708770752, |
| "learning_rate": 1.552377238283943e-05, |
| "loss": 0.5414, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.8268223680250406, |
| "grad_norm": 1.802925944328308, |
| "learning_rate": 1.5440399649817385e-05, |
| "loss": 0.2233, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.8273039186179498, |
| "grad_norm": 3.2591664791107178, |
| "learning_rate": 1.5357232668596933e-05, |
| "loss": 1.2976, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.8277854692108589, |
| "grad_norm": 2.02854585647583, |
| "learning_rate": 1.5274271641541295e-05, |
| "loss": 0.5666, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.8282670198037682, |
| "grad_norm": 2.4530251026153564, |
| "learning_rate": 1.5191516770512649e-05, |
| "loss": 0.7718, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.8287485703966773, |
| "grad_norm": 2.288060188293457, |
| "learning_rate": 1.5108968256871437e-05, |
| "loss": 0.333, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.8292301209895865, |
| "grad_norm": 1.380737543106079, |
| "learning_rate": 1.5026626301476087e-05, |
| "loss": 0.4997, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.8297116715824956, |
| "grad_norm": 3.533025026321411, |
| "learning_rate": 1.4944491104682379e-05, |
| "loss": 0.7909, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.8301932221754048, |
| "grad_norm": 2.4573423862457275, |
| "learning_rate": 1.4862562866343034e-05, |
| "loss": 0.6396, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.8306747727683139, |
| "grad_norm": 2.839277744293213, |
| "learning_rate": 1.4780841785807164e-05, |
| "loss": 0.6966, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.8311563233612231, |
| "grad_norm": 1.8692930936813354, |
| "learning_rate": 1.4699328061919848e-05, |
| "loss": 0.5262, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.8316378739541324, |
| "grad_norm": 2.2407124042510986, |
| "learning_rate": 1.4618021893021605e-05, |
| "loss": 0.3409, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.8321194245470415, |
| "grad_norm": 1.3630995750427246, |
| "learning_rate": 1.453692347694794e-05, |
| "loss": 0.3524, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.8326009751399507, |
| "grad_norm": 2.1543772220611572, |
| "learning_rate": 1.4456033011028835e-05, |
| "loss": 0.442, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.8330825257328598, |
| "grad_norm": 3.9747824668884277, |
| "learning_rate": 1.437535069208833e-05, |
| "loss": 0.8306, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.833564076325769, |
| "grad_norm": 2.9707400798797607, |
| "learning_rate": 1.4294876716443906e-05, |
| "loss": 0.3712, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.8340456269186781, |
| "grad_norm": 1.9354028701782227, |
| "learning_rate": 1.4214611279906187e-05, |
| "loss": 0.2021, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.8345271775115873, |
| "grad_norm": 2.2027204036712646, |
| "learning_rate": 1.4134554577778337e-05, |
| "loss": 0.7172, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.8350087281044964, |
| "grad_norm": 1.4204658269882202, |
| "learning_rate": 1.4054706804855634e-05, |
| "loss": 0.8734, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.8354902786974057, |
| "grad_norm": 2.0968925952911377, |
| "learning_rate": 1.3975068155424976e-05, |
| "loss": 0.9967, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.8359718292903148, |
| "grad_norm": 2.840298891067505, |
| "learning_rate": 1.3895638823264446e-05, |
| "loss": 0.6381, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.836453379883224, |
| "grad_norm": 1.5930904150009155, |
| "learning_rate": 1.3816419001642777e-05, |
| "loss": 0.5605, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.8369349304761331, |
| "grad_norm": 4.177980899810791, |
| "learning_rate": 1.3737408883318948e-05, |
| "loss": 0.5922, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.8374164810690423, |
| "grad_norm": 1.7408493757247925, |
| "learning_rate": 1.365860866054165e-05, |
| "loss": 0.4055, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.8378980316619515, |
| "grad_norm": 1.257311224937439, |
| "learning_rate": 1.358001852504891e-05, |
| "loss": 0.2734, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.8383795822548606, |
| "grad_norm": 1.8963124752044678, |
| "learning_rate": 1.3501638668067485e-05, |
| "loss": 0.7453, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.8388611328477699, |
| "grad_norm": 1.7414535284042358, |
| "learning_rate": 1.3423469280312562e-05, |
| "loss": 0.6258, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.839342683440679, |
| "grad_norm": 1.7837656736373901, |
| "learning_rate": 1.3345510551987128e-05, |
| "loss": 0.3573, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.8398242340335882, |
| "grad_norm": 2.218170404434204, |
| "learning_rate": 1.326776267278167e-05, |
| "loss": 0.6641, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.8403057846264973, |
| "grad_norm": 4.239348411560059, |
| "learning_rate": 1.3190225831873581e-05, |
| "loss": 0.7345, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.8407873352194065, |
| "grad_norm": 1.7612202167510986, |
| "learning_rate": 1.3112900217926782e-05, |
| "loss": 0.6602, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.8412688858123156, |
| "grad_norm": 5.180617332458496, |
| "learning_rate": 1.3035786019091223e-05, |
| "loss": 0.7354, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.8417504364052248, |
| "grad_norm": 2.2071621417999268, |
| "learning_rate": 1.2958883423002422e-05, |
| "loss": 0.93, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.842231986998134, |
| "grad_norm": 2.929159164428711, |
| "learning_rate": 1.288219261678103e-05, |
| "loss": 0.7676, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.8427135375910432, |
| "grad_norm": 1.524143934249878, |
| "learning_rate": 1.2805713787032381e-05, |
| "loss": 0.269, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.8431950881839523, |
| "grad_norm": 1.5855472087860107, |
| "learning_rate": 1.2729447119846016e-05, |
| "loss": 0.4037, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.8436766387768615, |
| "grad_norm": 2.4679388999938965, |
| "learning_rate": 1.265339280079525e-05, |
| "loss": 0.7235, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.8441581893697706, |
| "grad_norm": 3.3254940509796143, |
| "learning_rate": 1.257755101493665e-05, |
| "loss": 0.6291, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.8446397399626798, |
| "grad_norm": 1.8408324718475342, |
| "learning_rate": 1.2501921946809714e-05, |
| "loss": 0.3552, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.845121290555589, |
| "grad_norm": 2.5367562770843506, |
| "learning_rate": 1.2426505780436326e-05, |
| "loss": 0.8439, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.8456028411484982, |
| "grad_norm": 2.7886507511138916, |
| "learning_rate": 1.2351302699320332e-05, |
| "loss": 0.7676, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.8460843917414074, |
| "grad_norm": 4.4448628425598145, |
| "learning_rate": 1.2276312886447106e-05, |
| "loss": 0.8199, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.8465659423343165, |
| "grad_norm": 2.484957218170166, |
| "learning_rate": 1.2201536524283074e-05, |
| "loss": 0.6567, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.8470474929272257, |
| "grad_norm": 1.730948805809021, |
| "learning_rate": 1.2126973794775343e-05, |
| "loss": 0.4775, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.8475290435201348, |
| "grad_norm": 3.7568891048431396, |
| "learning_rate": 1.2052624879351104e-05, |
| "loss": 0.8885, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.848010594113044, |
| "grad_norm": 2.2398736476898193, |
| "learning_rate": 1.1978489958917382e-05, |
| "loss": 0.6513, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.8484921447059531, |
| "grad_norm": 2.391688346862793, |
| "learning_rate": 1.1904569213860472e-05, |
| "loss": 0.7705, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.8489736952988624, |
| "grad_norm": 1.2668508291244507, |
| "learning_rate": 1.1830862824045552e-05, |
| "loss": 0.7412, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.8494552458917715, |
| "grad_norm": 2.4677586555480957, |
| "learning_rate": 1.1757370968816217e-05, |
| "loss": 0.4662, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.8499367964846807, |
| "grad_norm": 1.866142988204956, |
| "learning_rate": 1.1684093826994024e-05, |
| "loss": 0.4521, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.8504183470775898, |
| "grad_norm": 2.4763362407684326, |
| "learning_rate": 1.1611031576878117e-05, |
| "loss": 0.601, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.850899897670499, |
| "grad_norm": 1.6049933433532715, |
| "learning_rate": 1.1538184396244778e-05, |
| "loss": 0.2667, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.8513814482634082, |
| "grad_norm": 2.2135348320007324, |
| "learning_rate": 1.146555246234694e-05, |
| "loss": 0.6749, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.8518629988563173, |
| "grad_norm": 3.2478649616241455, |
| "learning_rate": 1.1393135951913824e-05, |
| "loss": 0.6464, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.8523445494492266, |
| "grad_norm": 1.7736784219741821, |
| "learning_rate": 1.132093504115046e-05, |
| "loss": 0.5814, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.8528261000421357, |
| "grad_norm": 4.978511333465576, |
| "learning_rate": 1.1248949905737283e-05, |
| "loss": 0.5157, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.8533076506350449, |
| "grad_norm": 0.9166672229766846, |
| "learning_rate": 1.1177180720829694e-05, |
| "loss": 0.1709, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.853789201227954, |
| "grad_norm": 1.008035659790039, |
| "learning_rate": 1.1105627661057671e-05, |
| "loss": 0.4628, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.8542707518208632, |
| "grad_norm": 2.2385506629943848, |
| "learning_rate": 1.103429090052528e-05, |
| "loss": 1.1356, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.8547523024137723, |
| "grad_norm": 3.872480630874634, |
| "learning_rate": 1.096317061281027e-05, |
| "loss": 0.5905, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8552338530066815, |
| "grad_norm": 1.684135913848877, |
| "learning_rate": 1.0892266970963704e-05, |
| "loss": 0.5081, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.8557154035995906, |
| "grad_norm": 3.901571035385132, |
| "learning_rate": 1.082158014750948e-05, |
| "loss": 0.9633, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.8561969541924999, |
| "grad_norm": 2.208216905593872, |
| "learning_rate": 1.0751110314443958e-05, |
| "loss": 0.7731, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.856678504785409, |
| "grad_norm": 2.4418656826019287, |
| "learning_rate": 1.0680857643235431e-05, |
| "loss": 0.6918, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.8571600553783182, |
| "grad_norm": 1.6305257081985474, |
| "learning_rate": 1.0610822304823887e-05, |
| "loss": 0.5908, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8576416059712274, |
| "grad_norm": 1.6165392398834229, |
| "learning_rate": 1.0541004469620452e-05, |
| "loss": 0.6767, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.8581231565641365, |
| "grad_norm": 2.7876946926116943, |
| "learning_rate": 1.0471404307507016e-05, |
| "loss": 1.0515, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.8586047071570457, |
| "grad_norm": 3.3915517330169678, |
| "learning_rate": 1.0402021987835831e-05, |
| "loss": 0.8213, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.8590862577499548, |
| "grad_norm": 3.31449031829834, |
| "learning_rate": 1.0332857679429098e-05, |
| "loss": 0.4672, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.8595678083428641, |
| "grad_norm": 3.6324501037597656, |
| "learning_rate": 1.0263911550578531e-05, |
| "loss": 1.0045, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8600493589357732, |
| "grad_norm": 1.995388150215149, |
| "learning_rate": 1.0195183769045013e-05, |
| "loss": 0.3615, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.8605309095286824, |
| "grad_norm": 3.063302993774414, |
| "learning_rate": 1.0126674502058054e-05, |
| "loss": 0.8128, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.8610124601215915, |
| "grad_norm": 1.1554100513458252, |
| "learning_rate": 1.005838391631555e-05, |
| "loss": 0.6989, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.8614940107145007, |
| "grad_norm": 1.5388157367706299, |
| "learning_rate": 9.990312177983263e-06, |
| "loss": 0.6449, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.8619755613074098, |
| "grad_norm": 1.3487037420272827, |
| "learning_rate": 9.922459452694466e-06, |
| "loss": 0.7874, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.862457111900319, |
| "grad_norm": 1.2846475839614868, |
| "learning_rate": 9.854825905549503e-06, |
| "loss": 0.6292, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.8629386624932281, |
| "grad_norm": 2.74332594871521, |
| "learning_rate": 9.787411701115456e-06, |
| "loss": 0.5169, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8634202130861374, |
| "grad_norm": 1.1050007343292236, |
| "learning_rate": 9.720217003425647e-06, |
| "loss": 0.6624, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.8639017636790465, |
| "grad_norm": 2.6142866611480713, |
| "learning_rate": 9.65324197597931e-06, |
| "loss": 0.5766, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.8643833142719557, |
| "grad_norm": 1.3400239944458008, |
| "learning_rate": 9.58648678174121e-06, |
| "loss": 0.8115, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8648648648648649, |
| "grad_norm": 1.4871549606323242, |
| "learning_rate": 9.51995158314113e-06, |
| "loss": 0.7247, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.865346415457774, |
| "grad_norm": 3.412703037261963, |
| "learning_rate": 9.45363654207363e-06, |
| "loss": 0.4651, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.8658279660506832, |
| "grad_norm": 1.186317801475525, |
| "learning_rate": 9.387541819897549e-06, |
| "loss": 0.504, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.8663095166435923, |
| "grad_norm": 3.1554412841796875, |
| "learning_rate": 9.321667577435634e-06, |
| "loss": 0.6253, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.8667910672365016, |
| "grad_norm": 2.272794246673584, |
| "learning_rate": 9.256013974974175e-06, |
| "loss": 0.5426, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8672726178294107, |
| "grad_norm": 1.4032080173492432, |
| "learning_rate": 9.19058117226258e-06, |
| "loss": 0.4761, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.8677541684223199, |
| "grad_norm": 2.6849613189697266, |
| "learning_rate": 9.125369328513034e-06, |
| "loss": 0.7514, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.868235719015229, |
| "grad_norm": 4.494041442871094, |
| "learning_rate": 9.060378602400054e-06, |
| "loss": 0.5857, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.8687172696081382, |
| "grad_norm": 1.7596466541290283, |
| "learning_rate": 8.995609152060136e-06, |
| "loss": 0.7958, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.8691988202010473, |
| "grad_norm": 1.3782743215560913, |
| "learning_rate": 8.931061135091357e-06, |
| "loss": 0.7378, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.8696803707939565, |
| "grad_norm": 2.1466805934906006, |
| "learning_rate": 8.866734708553015e-06, |
| "loss": 1.0608, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.8701619213868657, |
| "grad_norm": 3.3685004711151123, |
| "learning_rate": 8.802630028965242e-06, |
| "loss": 0.4598, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.8706434719797749, |
| "grad_norm": 2.506319999694824, |
| "learning_rate": 8.738747252308555e-06, |
| "loss": 0.5106, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.8711250225726841, |
| "grad_norm": 0.8574779629707336, |
| "learning_rate": 8.675086534023591e-06, |
| "loss": 0.3669, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.8716065731655932, |
| "grad_norm": 2.1670174598693848, |
| "learning_rate": 8.611648029010643e-06, |
| "loss": 0.33, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8720881237585024, |
| "grad_norm": 3.5678937435150146, |
| "learning_rate": 8.548431891629316e-06, |
| "loss": 0.7334, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.8725696743514115, |
| "grad_norm": 2.3840737342834473, |
| "learning_rate": 8.485438275698154e-06, |
| "loss": 0.3852, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.8730512249443207, |
| "grad_norm": 7.21331262588501, |
| "learning_rate": 8.422667334494249e-06, |
| "loss": 0.5615, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.8735327755372299, |
| "grad_norm": 5.849119186401367, |
| "learning_rate": 8.360119220752893e-06, |
| "loss": 0.4217, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.8740143261301391, |
| "grad_norm": 2.1622002124786377, |
| "learning_rate": 8.297794086667165e-06, |
| "loss": 0.8654, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.8744958767230482, |
| "grad_norm": 1.9863747358322144, |
| "learning_rate": 8.235692083887613e-06, |
| "loss": 0.5413, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.8749774273159574, |
| "grad_norm": 2.883000135421753, |
| "learning_rate": 8.173813363521843e-06, |
| "loss": 1.2884, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.8754589779088665, |
| "grad_norm": 2.683244466781616, |
| "learning_rate": 8.112158076134157e-06, |
| "loss": 0.6079, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.8759405285017757, |
| "grad_norm": 1.6069995164871216, |
| "learning_rate": 8.05072637174522e-06, |
| "loss": 0.56, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.8764220790946848, |
| "grad_norm": 3.1434903144836426, |
| "learning_rate": 7.989518399831641e-06, |
| "loss": 0.5649, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.876903629687594, |
| "grad_norm": 3.7238409519195557, |
| "learning_rate": 7.928534309325675e-06, |
| "loss": 0.729, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.8773851802805033, |
| "grad_norm": 1.1204873323440552, |
| "learning_rate": 7.8677742486148e-06, |
| "loss": 0.4625, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.8778667308734124, |
| "grad_norm": 2.0625314712524414, |
| "learning_rate": 7.807238365541391e-06, |
| "loss": 0.4157, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.8783482814663216, |
| "grad_norm": 2.398089647293091, |
| "learning_rate": 7.746926807402344e-06, |
| "loss": 0.611, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.8788298320592307, |
| "grad_norm": 4.007481098175049, |
| "learning_rate": 7.686839720948736e-06, |
| "loss": 1.2355, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.8793113826521399, |
| "grad_norm": 3.5721206665039062, |
| "learning_rate": 7.6269772523854365e-06, |
| "loss": 0.4283, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.879792933245049, |
| "grad_norm": 2.9199283123016357, |
| "learning_rate": 7.567339547370789e-06, |
| "loss": 0.4685, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.8802744838379583, |
| "grad_norm": 1.7368232011795044, |
| "learning_rate": 7.507926751016248e-06, |
| "loss": 0.5865, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.8807560344308674, |
| "grad_norm": 2.2206578254699707, |
| "learning_rate": 7.4487390078859855e-06, |
| "loss": 0.7996, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.8812375850237766, |
| "grad_norm": 1.2094279527664185, |
| "learning_rate": 7.389776461996578e-06, |
| "loss": 0.5491, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8817191356166857, |
| "grad_norm": 1.2842280864715576, |
| "learning_rate": 7.331039256816663e-06, |
| "loss": 0.9658, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.8822006862095949, |
| "grad_norm": 3.4786460399627686, |
| "learning_rate": 7.27252753526656e-06, |
| "loss": 1.0196, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.882682236802504, |
| "grad_norm": 3.5382659435272217, |
| "learning_rate": 7.214241439717962e-06, |
| "loss": 1.0331, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.8831637873954132, |
| "grad_norm": 1.174157738685608, |
| "learning_rate": 7.1561811119935425e-06, |
| "loss": 0.4535, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.8836453379883223, |
| "grad_norm": 2.7539329528808594, |
| "learning_rate": 7.098346693366642e-06, |
| "loss": 0.5103, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.8841268885812316, |
| "grad_norm": 2.5821847915649414, |
| "learning_rate": 7.0407383245609136e-06, |
| "loss": 0.4046, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.8846084391741408, |
| "grad_norm": 1.5927815437316895, |
| "learning_rate": 6.983356145749975e-06, |
| "loss": 0.3623, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.8850899897670499, |
| "grad_norm": 2.2626142501831055, |
| "learning_rate": 6.9262002965570835e-06, |
| "loss": 0.6639, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.8855715403599591, |
| "grad_norm": 3.217414617538452, |
| "learning_rate": 6.869270916054782e-06, |
| "loss": 0.519, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.8860530909528682, |
| "grad_norm": 1.9122174978256226, |
| "learning_rate": 6.812568142764575e-06, |
| "loss": 0.5984, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8865346415457774, |
| "grad_norm": 2.481517791748047, |
| "learning_rate": 6.756092114656587e-06, |
| "loss": 0.8017, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.8870161921386865, |
| "grad_norm": 2.714883327484131, |
| "learning_rate": 6.699842969149195e-06, |
| "loss": 0.5422, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.8874977427315958, |
| "grad_norm": 2.3089113235473633, |
| "learning_rate": 6.64382084310875e-06, |
| "loss": 0.5783, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.8879792933245049, |
| "grad_norm": 2.165722608566284, |
| "learning_rate": 6.5880258728491905e-06, |
| "loss": 0.354, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.8884608439174141, |
| "grad_norm": 4.5781426429748535, |
| "learning_rate": 6.532458194131763e-06, |
| "loss": 0.8101, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8889423945103232, |
| "grad_norm": 3.533600091934204, |
| "learning_rate": 6.477117942164657e-06, |
| "loss": 0.9167, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.8894239451032324, |
| "grad_norm": 1.031320333480835, |
| "learning_rate": 6.422005251602658e-06, |
| "loss": 0.4298, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.8899054956961415, |
| "grad_norm": 2.306194543838501, |
| "learning_rate": 6.367120256546888e-06, |
| "loss": 0.4655, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.8903870462890507, |
| "grad_norm": 1.633102297782898, |
| "learning_rate": 6.312463090544396e-06, |
| "loss": 0.4393, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.89086859688196, |
| "grad_norm": 1.7181764841079712, |
| "learning_rate": 6.258033886587911e-06, |
| "loss": 0.8858, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8913501474748691, |
| "grad_norm": 2.9164364337921143, |
| "learning_rate": 6.2038327771154485e-06, |
| "loss": 0.4769, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.8918316980677783, |
| "grad_norm": 3.113100290298462, |
| "learning_rate": 6.1498598940100346e-06, |
| "loss": 0.7217, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.8923132486606874, |
| "grad_norm": 2.327969551086426, |
| "learning_rate": 6.0961153685993646e-06, |
| "loss": 0.5315, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.8927947992535966, |
| "grad_norm": 2.7124183177948, |
| "learning_rate": 6.0425993316554965e-06, |
| "loss": 0.5386, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.8932763498465057, |
| "grad_norm": 2.0107133388519287, |
| "learning_rate": 5.989311913394546e-06, |
| "loss": 0.4306, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8937579004394149, |
| "grad_norm": 1.8808348178863525, |
| "learning_rate": 5.93625324347632e-06, |
| "loss": 0.4791, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.894239451032324, |
| "grad_norm": 3.451119899749756, |
| "learning_rate": 5.8834234510040335e-06, |
| "loss": 0.8056, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.8947210016252333, |
| "grad_norm": 3.1976735591888428, |
| "learning_rate": 5.830822664523994e-06, |
| "loss": 0.3994, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.8952025522181424, |
| "grad_norm": 2.1719510555267334, |
| "learning_rate": 5.77845101202531e-06, |
| "loss": 0.738, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.8956841028110516, |
| "grad_norm": 4.383519172668457, |
| "learning_rate": 5.726308620939536e-06, |
| "loss": 0.5899, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8961656534039607, |
| "grad_norm": 2.9527339935302734, |
| "learning_rate": 5.674395618140393e-06, |
| "loss": 0.3893, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.8966472039968699, |
| "grad_norm": 4.863363742828369, |
| "learning_rate": 5.622712129943453e-06, |
| "loss": 0.6031, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.8971287545897791, |
| "grad_norm": 2.3513472080230713, |
| "learning_rate": 5.571258282105829e-06, |
| "loss": 0.8987, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.8976103051826883, |
| "grad_norm": 4.060399055480957, |
| "learning_rate": 5.520034199825841e-06, |
| "loss": 0.9195, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.8980918557755975, |
| "grad_norm": 3.192730188369751, |
| "learning_rate": 5.469040007742776e-06, |
| "loss": 0.6854, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.8985734063685066, |
| "grad_norm": 3.811521530151367, |
| "learning_rate": 5.418275829936537e-06, |
| "loss": 1.2028, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.8990549569614158, |
| "grad_norm": 2.9288651943206787, |
| "learning_rate": 5.36774178992735e-06, |
| "loss": 0.6378, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.8995365075543249, |
| "grad_norm": 2.91579008102417, |
| "learning_rate": 5.317438010675469e-06, |
| "loss": 0.5374, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.9000180581472341, |
| "grad_norm": 2.687274217605591, |
| "learning_rate": 5.267364614580861e-06, |
| "loss": 0.4775, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.9004996087401432, |
| "grad_norm": 6.526017189025879, |
| "learning_rate": 5.217521723482943e-06, |
| "loss": 0.6156, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.9009811593330525, |
| "grad_norm": 2.754613161087036, |
| "learning_rate": 5.167909458660258e-06, |
| "loss": 0.9845, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.9014627099259616, |
| "grad_norm": 3.1940438747406006, |
| "learning_rate": 5.118527940830165e-06, |
| "loss": 1.0082, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.9019442605188708, |
| "grad_norm": 1.7706068754196167, |
| "learning_rate": 5.069377290148602e-06, |
| "loss": 0.3283, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.9024258111117799, |
| "grad_norm": 1.1937077045440674, |
| "learning_rate": 5.020457626209707e-06, |
| "loss": 0.252, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.9029073617046891, |
| "grad_norm": 1.5468496084213257, |
| "learning_rate": 4.971769068045628e-06, |
| "loss": 0.6309, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.9033889122975982, |
| "grad_norm": 2.2661030292510986, |
| "learning_rate": 4.923311734126135e-06, |
| "loss": 0.6594, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.9038704628905074, |
| "grad_norm": 1.7421146631240845, |
| "learning_rate": 4.875085742358432e-06, |
| "loss": 0.6087, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.9043520134834167, |
| "grad_norm": 2.4468908309936523, |
| "learning_rate": 4.827091210086776e-06, |
| "loss": 0.7757, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.9048335640763258, |
| "grad_norm": 2.273754596710205, |
| "learning_rate": 4.779328254092252e-06, |
| "loss": 1.082, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.905315114669235, |
| "grad_norm": 2.547562837600708, |
| "learning_rate": 4.731796990592452e-06, |
| "loss": 0.5908, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.9057966652621441, |
| "grad_norm": 1.9437663555145264, |
| "learning_rate": 4.68449753524125e-06, |
| "loss": 0.9443, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.9062782158550533, |
| "grad_norm": 1.4457284212112427, |
| "learning_rate": 4.637430003128429e-06, |
| "loss": 0.5939, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.9067597664479624, |
| "grad_norm": 4.274806976318359, |
| "learning_rate": 4.5905945087794996e-06, |
| "loss": 0.756, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.9072413170408716, |
| "grad_norm": 1.607393503189087, |
| "learning_rate": 4.543991166155337e-06, |
| "loss": 0.6922, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.9077228676337807, |
| "grad_norm": 3.3161227703094482, |
| "learning_rate": 4.497620088651966e-06, |
| "loss": 0.7247, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.90820441822669, |
| "grad_norm": 3.3778584003448486, |
| "learning_rate": 4.451481389100232e-06, |
| "loss": 0.5756, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.9086859688195991, |
| "grad_norm": 4.653063774108887, |
| "learning_rate": 4.405575179765586e-06, |
| "loss": 0.6268, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.9091675194125083, |
| "grad_norm": 1.695256233215332, |
| "learning_rate": 4.359901572347758e-06, |
| "loss": 0.8092, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.9096490700054174, |
| "grad_norm": 2.5265443325042725, |
| "learning_rate": 4.314460677980537e-06, |
| "loss": 0.5014, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.9101306205983266, |
| "grad_norm": 1.132360816001892, |
| "learning_rate": 4.269252607231422e-06, |
| "loss": 0.418, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.9106121711912358, |
| "grad_norm": 1.7842097282409668, |
| "learning_rate": 4.224277470101445e-06, |
| "loss": 0.8378, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.9110937217841449, |
| "grad_norm": 1.7560157775878906, |
| "learning_rate": 4.179535376024857e-06, |
| "loss": 0.7296, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.9115752723770542, |
| "grad_norm": 1.5116153955459595, |
| "learning_rate": 4.135026433868827e-06, |
| "loss": 0.7794, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.9120568229699633, |
| "grad_norm": 2.2078778743743896, |
| "learning_rate": 4.090750751933248e-06, |
| "loss": 0.9489, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.9125383735628725, |
| "grad_norm": 2.9103267192840576, |
| "learning_rate": 4.046708437950464e-06, |
| "loss": 0.733, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.9130199241557816, |
| "grad_norm": 2.259371280670166, |
| "learning_rate": 4.0028995990849084e-06, |
| "loss": 0.476, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.9135014747486908, |
| "grad_norm": 2.4890007972717285, |
| "learning_rate": 3.95932434193299e-06, |
| "loss": 0.4656, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.9139830253415999, |
| "grad_norm": 3.960632562637329, |
| "learning_rate": 3.915982772522719e-06, |
| "loss": 0.74, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.9144645759345091, |
| "grad_norm": 1.7056382894515991, |
| "learning_rate": 3.872874996313513e-06, |
| "loss": 0.5293, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.9149461265274182, |
| "grad_norm": 2.551649332046509, |
| "learning_rate": 3.830001118195936e-06, |
| "loss": 0.5079, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.9154276771203275, |
| "grad_norm": 1.9389688968658447, |
| "learning_rate": 3.787361242491394e-06, |
| "loss": 0.3823, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.9159092277132366, |
| "grad_norm": 1.6648590564727783, |
| "learning_rate": 3.744955472951928e-06, |
| "loss": 0.3093, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.9163907783061458, |
| "grad_norm": 3.3412230014801025, |
| "learning_rate": 3.702783912759955e-06, |
| "loss": 0.8416, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.916872328899055, |
| "grad_norm": 1.401397943496704, |
| "learning_rate": 3.660846664528006e-06, |
| "loss": 0.544, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.9173538794919641, |
| "grad_norm": 3.2457292079925537, |
| "learning_rate": 3.6191438302984772e-06, |
| "loss": 0.7385, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.9178354300848733, |
| "grad_norm": 2.4073991775512695, |
| "learning_rate": 3.577675511543388e-06, |
| "loss": 0.5313, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.9183169806777824, |
| "grad_norm": 1.6431150436401367, |
| "learning_rate": 3.5364418091641373e-06, |
| "loss": 0.9428, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.9187985312706917, |
| "grad_norm": 2.662550926208496, |
| "learning_rate": 3.495442823491224e-06, |
| "loss": 0.8064, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.9192800818636008, |
| "grad_norm": 2.9634780883789062, |
| "learning_rate": 3.4546786542840605e-06, |
| "loss": 0.646, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.91976163245651, |
| "grad_norm": 2.133837938308716, |
| "learning_rate": 3.4141494007306816e-06, |
| "loss": 0.419, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.9202431830494191, |
| "grad_norm": 1.918086051940918, |
| "learning_rate": 3.373855161447548e-06, |
| "loss": 0.8063, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.9207247336423283, |
| "grad_norm": 1.7618401050567627, |
| "learning_rate": 3.333796034479242e-06, |
| "loss": 0.5835, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.9212062842352374, |
| "grad_norm": 3.4800868034362793, |
| "learning_rate": 3.293972117298294e-06, |
| "loss": 0.7598, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.9216878348281466, |
| "grad_norm": 2.787062168121338, |
| "learning_rate": 3.2543835068049255e-06, |
| "loss": 0.4116, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.9221693854210558, |
| "grad_norm": 2.82818341255188, |
| "learning_rate": 3.21503029932676e-06, |
| "loss": 0.7228, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.922650936013965, |
| "grad_norm": 1.5212979316711426, |
| "learning_rate": 3.1759125906186793e-06, |
| "loss": 0.4513, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.9231324866068741, |
| "grad_norm": 3.6214406490325928, |
| "learning_rate": 3.137030475862535e-06, |
| "loss": 1.0936, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.9236140371997833, |
| "grad_norm": 2.9912490844726562, |
| "learning_rate": 3.098384049666925e-06, |
| "loss": 0.3953, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.9240955877926925, |
| "grad_norm": 1.5457743406295776, |
| "learning_rate": 3.059973406066963e-06, |
| "loss": 0.4376, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.9245771383856016, |
| "grad_norm": 3.1872262954711914, |
| "learning_rate": 3.0217986385240537e-06, |
| "loss": 0.5669, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.9250586889785108, |
| "grad_norm": 2.594231605529785, |
| "learning_rate": 2.983859839925662e-06, |
| "loss": 0.6722, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.92554023957142, |
| "grad_norm": 2.3845884799957275, |
| "learning_rate": 2.94615710258509e-06, |
| "loss": 0.8501, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.9260217901643292, |
| "grad_norm": 3.532987117767334, |
| "learning_rate": 2.908690518241275e-06, |
| "loss": 0.544, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.9265033407572383, |
| "grad_norm": 2.7089667320251465, |
| "learning_rate": 2.8714601780584937e-06, |
| "loss": 0.2766, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.9269848913501475, |
| "grad_norm": 2.490257740020752, |
| "learning_rate": 2.834466172626238e-06, |
| "loss": 0.2841, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.9274664419430566, |
| "grad_norm": 1.7176368236541748, |
| "learning_rate": 2.7977085919589254e-06, |
| "loss": 0.3851, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.9279479925359658, |
| "grad_norm": 2.444751024246216, |
| "learning_rate": 2.76118752549569e-06, |
| "loss": 0.4477, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.9284295431288749, |
| "grad_norm": 1.8582123517990112, |
| "learning_rate": 2.7249030621001924e-06, |
| "loss": 0.6806, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.9289110937217842, |
| "grad_norm": 1.904004454612732, |
| "learning_rate": 2.688855290060399e-06, |
| "loss": 0.619, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.9293926443146933, |
| "grad_norm": 2.0747194290161133, |
| "learning_rate": 2.653044297088314e-06, |
| "loss": 0.7597, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.9298741949076025, |
| "grad_norm": 2.5333268642425537, |
| "learning_rate": 2.6174701703198468e-06, |
| "loss": 0.792, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.9303557455005117, |
| "grad_norm": 2.685360908508301, |
| "learning_rate": 2.5821329963145347e-06, |
| "loss": 0.7001, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.9308372960934208, |
| "grad_norm": 3.6770102977752686, |
| "learning_rate": 2.547032861055376e-06, |
| "loss": 0.6245, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.93131884668633, |
| "grad_norm": 1.6112256050109863, |
| "learning_rate": 2.5121698499485757e-06, |
| "loss": 0.6619, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.9318003972792391, |
| "grad_norm": 2.173598289489746, |
| "learning_rate": 2.4775440478233993e-06, |
| "loss": 0.7539, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.9322819478721484, |
| "grad_norm": 2.8748226165771484, |
| "learning_rate": 2.4431555389319074e-06, |
| "loss": 0.6625, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.9327634984650575, |
| "grad_norm": 2.1532397270202637, |
| "learning_rate": 2.4090044069487784e-06, |
| "loss": 0.9355, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.9332450490579667, |
| "grad_norm": 4.027218341827393, |
| "learning_rate": 2.3750907349711084e-06, |
| "loss": 0.5421, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.9337265996508758, |
| "grad_norm": 2.222975015640259, |
| "learning_rate": 2.3414146055182106e-06, |
| "loss": 0.8391, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.934208150243785, |
| "grad_norm": 3.7854230403900146, |
| "learning_rate": 2.307976100531384e-06, |
| "loss": 0.4316, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.9346897008366941, |
| "grad_norm": 1.5810474157333374, |
| "learning_rate": 2.274775301373744e-06, |
| "loss": 0.5887, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.9351712514296033, |
| "grad_norm": 1.0357911586761475, |
| "learning_rate": 2.241812288830003e-06, |
| "loss": 0.594, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.9356528020225124, |
| "grad_norm": 3.268486261367798, |
| "learning_rate": 2.2090871431063253e-06, |
| "loss": 0.4582, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.9361343526154217, |
| "grad_norm": 2.9726314544677734, |
| "learning_rate": 2.176599943830071e-06, |
| "loss": 0.8042, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.9366159032083309, |
| "grad_norm": 1.949102520942688, |
| "learning_rate": 2.144350770049597e-06, |
| "loss": 0.4591, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.93709745380124, |
| "grad_norm": 2.1186540126800537, |
| "learning_rate": 2.112339700234156e-06, |
| "loss": 0.8347, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.9375790043941492, |
| "grad_norm": 4.00067138671875, |
| "learning_rate": 2.0805668122735767e-06, |
| "loss": 0.5937, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.9380605549870583, |
| "grad_norm": 2.5911052227020264, |
| "learning_rate": 2.0490321834781833e-06, |
| "loss": 0.5266, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.9385421055799675, |
| "grad_norm": 1.883213758468628, |
| "learning_rate": 2.0177358905785537e-06, |
| "loss": 0.6082, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.9390236561728766, |
| "grad_norm": 1.9823429584503174, |
| "learning_rate": 1.986678009725329e-06, |
| "loss": 0.5017, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.9395052067657859, |
| "grad_norm": 1.3896251916885376, |
| "learning_rate": 1.955858616489059e-06, |
| "loss": 0.8347, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.939986757358695, |
| "grad_norm": 6.104365348815918, |
| "learning_rate": 1.9252777858599915e-06, |
| "loss": 0.7993, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.9404683079516042, |
| "grad_norm": 2.0376524925231934, |
| "learning_rate": 1.8949355922479151e-06, |
| "loss": 0.4812, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.9409498585445133, |
| "grad_norm": 4.846323013305664, |
| "learning_rate": 1.8648321094819287e-06, |
| "loss": 0.5424, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.9414314091374225, |
| "grad_norm": 1.7472106218338013, |
| "learning_rate": 1.8349674108103288e-06, |
| "loss": 0.56, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.9419129597303316, |
| "grad_norm": 1.900530219078064, |
| "learning_rate": 1.8053415689003872e-06, |
| "loss": 0.6446, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.9423945103232408, |
| "grad_norm": 4.014410972595215, |
| "learning_rate": 1.7759546558381967e-06, |
| "loss": 0.4733, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.94287606091615, |
| "grad_norm": 1.2827335596084595, |
| "learning_rate": 1.7468067431284707e-06, |
| "loss": 0.4226, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.9433576115090592, |
| "grad_norm": 1.8222554922103882, |
| "learning_rate": 1.7178979016943764e-06, |
| "loss": 0.3871, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.9438391621019684, |
| "grad_norm": 2.0032083988189697, |
| "learning_rate": 1.6892282018773908e-06, |
| "loss": 0.6295, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.9443207126948775, |
| "grad_norm": 3.710329055786133, |
| "learning_rate": 1.6607977134370789e-06, |
| "loss": 1.0069, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.9448022632877867, |
| "grad_norm": 1.8269236087799072, |
| "learning_rate": 1.6326065055510043e-06, |
| "loss": 0.8347, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.9452838138806958, |
| "grad_norm": 2.4813623428344727, |
| "learning_rate": 1.6046546468144407e-06, |
| "loss": 0.7641, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.945765364473605, |
| "grad_norm": 3.3731300830841064, |
| "learning_rate": 1.576942205240317e-06, |
| "loss": 0.5967, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.9462469150665141, |
| "grad_norm": 2.570126533508301, |
| "learning_rate": 1.5494692482590057e-06, |
| "loss": 0.5784, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.9467284656594234, |
| "grad_norm": 2.100484609603882, |
| "learning_rate": 1.522235842718156e-06, |
| "loss": 0.4698, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.9472100162523325, |
| "grad_norm": 2.475597620010376, |
| "learning_rate": 1.4952420548825285e-06, |
| "loss": 0.4489, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.9476915668452417, |
| "grad_norm": 2.690720796585083, |
| "learning_rate": 1.468487950433839e-06, |
| "loss": 0.7515, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.9481731174381508, |
| "grad_norm": 1.925948977470398, |
| "learning_rate": 1.441973594470636e-06, |
| "loss": 0.5974, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.94865466803106, |
| "grad_norm": 3.1811184883117676, |
| "learning_rate": 1.415699051508068e-06, |
| "loss": 0.4421, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.9491362186239691, |
| "grad_norm": 1.8990108966827393, |
| "learning_rate": 1.3896643854777847e-06, |
| "loss": 0.641, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.9496177692168783, |
| "grad_norm": 0.7047016024589539, |
| "learning_rate": 1.3638696597277679e-06, |
| "loss": 0.273, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.9500993198097876, |
| "grad_norm": 1.2902841567993164, |
| "learning_rate": 1.3383149370221449e-06, |
| "loss": 0.382, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.9505808704026967, |
| "grad_norm": 2.200690984725952, |
| "learning_rate": 1.313000279541121e-06, |
| "loss": 0.677, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.9510624209956059, |
| "grad_norm": 1.2776658535003662, |
| "learning_rate": 1.287925748880703e-06, |
| "loss": 0.4875, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.951543971588515, |
| "grad_norm": 3.179433822631836, |
| "learning_rate": 1.2630914060526522e-06, |
| "loss": 0.7287, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.9520255221814242, |
| "grad_norm": 1.7882678508758545, |
| "learning_rate": 1.2384973114843101e-06, |
| "loss": 0.6366, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.9525070727743333, |
| "grad_norm": 2.128645420074463, |
| "learning_rate": 1.2141435250184185e-06, |
| "loss": 0.3159, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.9529886233672425, |
| "grad_norm": 2.0305099487304688, |
| "learning_rate": 1.1900301059130093e-06, |
| "loss": 0.7256, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.9534701739601517, |
| "grad_norm": 3.4359750747680664, |
| "learning_rate": 1.1661571128412596e-06, |
| "loss": 0.5136, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9539517245530609, |
| "grad_norm": 2.7383172512054443, |
| "learning_rate": 1.142524603891315e-06, |
| "loss": 1.0318, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.95443327514597, |
| "grad_norm": 2.489351511001587, |
| "learning_rate": 1.1191326365661892e-06, |
| "loss": 0.6141, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.9549148257388792, |
| "grad_norm": 2.3964600563049316, |
| "learning_rate": 1.0959812677835968e-06, |
| "loss": 0.4736, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.9553963763317883, |
| "grad_norm": 5.277529716491699, |
| "learning_rate": 1.0730705538758322e-06, |
| "loss": 0.8058, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.9558779269246975, |
| "grad_norm": 2.238236904144287, |
| "learning_rate": 1.0504005505896141e-06, |
| "loss": 0.5026, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.9563594775176067, |
| "grad_norm": 1.928312063217163, |
| "learning_rate": 1.0279713130859514e-06, |
| "loss": 0.8141, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.9568410281105159, |
| "grad_norm": 2.1432981491088867, |
| "learning_rate": 1.005782895940055e-06, |
| "loss": 1.2314, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.9573225787034251, |
| "grad_norm": 5.187903881072998, |
| "learning_rate": 9.838353531411272e-07, |
| "loss": 0.5629, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.9578041292963342, |
| "grad_norm": 2.8405728340148926, |
| "learning_rate": 9.62128738092294e-07, |
| "loss": 0.5505, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.9582856798892434, |
| "grad_norm": 2.8619463443756104, |
| "learning_rate": 9.406631036104508e-07, |
| "loss": 0.3591, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.9587672304821525, |
| "grad_norm": 2.144484758377075, |
| "learning_rate": 9.194385019261287e-07, |
| "loss": 0.7098, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.9592487810750617, |
| "grad_norm": 1.5289490222930908, |
| "learning_rate": 8.984549846833612e-07, |
| "loss": 0.5621, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.9597303316679708, |
| "grad_norm": 1.5724802017211914, |
| "learning_rate": 8.777126029396065e-07, |
| "loss": 0.3373, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.9602118822608801, |
| "grad_norm": 3.027939796447754, |
| "learning_rate": 8.572114071655479e-07, |
| "loss": 0.4875, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.9606934328537892, |
| "grad_norm": 2.6502504348754883, |
| "learning_rate": 8.369514472450379e-07, |
| "loss": 0.5928, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.9611749834466984, |
| "grad_norm": 1.704206943511963, |
| "learning_rate": 8.169327724749543e-07, |
| "loss": 0.642, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.9616565340396075, |
| "grad_norm": 2.1913046836853027, |
| "learning_rate": 7.971554315650442e-07, |
| "loss": 0.6125, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.9621380846325167, |
| "grad_norm": 2.5087268352508545, |
| "learning_rate": 7.776194726378583e-07, |
| "loss": 0.7641, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.9626196352254258, |
| "grad_norm": 2.2763671875, |
| "learning_rate": 7.583249432286277e-07, |
| "loss": 0.7061, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.963101185818335, |
| "grad_norm": 3.4482977390289307, |
| "learning_rate": 7.392718902850981e-07, |
| "loss": 0.7786, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9635827364112443, |
| "grad_norm": 3.177415132522583, |
| "learning_rate": 7.204603601674853e-07, |
| "loss": 0.6386, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.9640642870041534, |
| "grad_norm": 3.915245771408081, |
| "learning_rate": 7.018903986483083e-07, |
| "loss": 0.5658, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.9645458375970626, |
| "grad_norm": 1.8997553586959839, |
| "learning_rate": 6.835620509122897e-07, |
| "loss": 0.571, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.9650273881899717, |
| "grad_norm": 3.1082587242126465, |
| "learning_rate": 6.65475361556267e-07, |
| "loss": 0.6074, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.9655089387828809, |
| "grad_norm": 2.994706153869629, |
| "learning_rate": 6.47630374589081e-07, |
| "loss": 0.7855, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.96599048937579, |
| "grad_norm": 1.8663792610168457, |
| "learning_rate": 6.300271334314434e-07, |
| "loss": 0.2545, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.9664720399686992, |
| "grad_norm": 1.586185097694397, |
| "learning_rate": 6.126656809158359e-07, |
| "loss": 0.8023, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.9669535905616083, |
| "grad_norm": 1.375166416168213, |
| "learning_rate": 5.955460592864337e-07, |
| "loss": 0.4675, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.9674351411545176, |
| "grad_norm": 2.125596761703491, |
| "learning_rate": 5.78668310198982e-07, |
| "loss": 0.7644, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.9679166917474267, |
| "grad_norm": 1.8324317932128906, |
| "learning_rate": 5.620324747207084e-07, |
| "loss": 0.4852, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9683982423403359, |
| "grad_norm": 1.296980619430542, |
| "learning_rate": 5.456385933301777e-07, |
| "loss": 0.1305, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.968879792933245, |
| "grad_norm": 3.3267123699188232, |
| "learning_rate": 5.294867059172592e-07, |
| "loss": 0.744, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.9693613435261542, |
| "grad_norm": 1.4626914262771606, |
| "learning_rate": 5.135768517829819e-07, |
| "loss": 0.6947, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.9698428941190634, |
| "grad_norm": 2.6075901985168457, |
| "learning_rate": 4.979090696394795e-07, |
| "loss": 0.4833, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.9703244447119725, |
| "grad_norm": 1.679829478263855, |
| "learning_rate": 4.824833976098453e-07, |
| "loss": 0.6612, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.9708059953048818, |
| "grad_norm": 3.0018815994262695, |
| "learning_rate": 4.6729987322807757e-07, |
| "loss": 0.8277, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.9712875458977909, |
| "grad_norm": 1.4263994693756104, |
| "learning_rate": 4.523585334389679e-07, |
| "loss": 0.6912, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.9717690964907001, |
| "grad_norm": 3.329765558242798, |
| "learning_rate": 4.3765941459804614e-07, |
| "loss": 0.7018, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.9722506470836092, |
| "grad_norm": 3.2026407718658447, |
| "learning_rate": 4.232025524714356e-07, |
| "loss": 0.7325, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.9727321976765184, |
| "grad_norm": 2.716898202896118, |
| "learning_rate": 4.0898798223582e-07, |
| "loss": 0.6388, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9732137482694275, |
| "grad_norm": 1.2587398290634155, |
| "learning_rate": 3.950157384783104e-07, |
| "loss": 0.7916, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.9736952988623367, |
| "grad_norm": 1.3127981424331665, |
| "learning_rate": 3.8128585519640046e-07, |
| "loss": 0.4851, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.9741768494552459, |
| "grad_norm": 2.5416173934936523, |
| "learning_rate": 3.677983657978779e-07, |
| "loss": 0.6746, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.9746584000481551, |
| "grad_norm": 2.9663803577423096, |
| "learning_rate": 3.545533031007131e-07, |
| "loss": 0.7857, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.9751399506410642, |
| "grad_norm": 4.189087867736816, |
| "learning_rate": 3.415506993330153e-07, |
| "loss": 0.4633, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.9756215012339734, |
| "grad_norm": 3.007153034210205, |
| "learning_rate": 3.2879058613292105e-07, |
| "loss": 0.64, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.9761030518268826, |
| "grad_norm": 1.7615995407104492, |
| "learning_rate": 3.1627299454856095e-07, |
| "loss": 0.6468, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.9765846024197917, |
| "grad_norm": 3.9861655235290527, |
| "learning_rate": 3.0399795503793793e-07, |
| "loss": 1.0444, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.9770661530127009, |
| "grad_norm": 1.9470893144607544, |
| "learning_rate": 2.9196549746888235e-07, |
| "loss": 0.516, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.97754770360561, |
| "grad_norm": 1.5506056547164917, |
| "learning_rate": 2.801756511189524e-07, |
| "loss": 0.2129, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9780292541985193, |
| "grad_norm": 4.4033331871032715, |
| "learning_rate": 2.686284446754006e-07, |
| "loss": 0.7624, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.9785108047914284, |
| "grad_norm": 2.394306182861328, |
| "learning_rate": 2.573239062350963e-07, |
| "loss": 0.8467, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.9789923553843376, |
| "grad_norm": 2.18649959564209, |
| "learning_rate": 2.4626206330440326e-07, |
| "loss": 0.9826, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.9794739059772467, |
| "grad_norm": 2.4696927070617676, |
| "learning_rate": 2.3544294279918e-07, |
| "loss": 0.6252, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.9799554565701559, |
| "grad_norm": 0.9971011877059937, |
| "learning_rate": 2.2486657104471286e-07, |
| "loss": 0.6913, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.980437007163065, |
| "grad_norm": 2.627206563949585, |
| "learning_rate": 2.1453297377557191e-07, |
| "loss": 0.3573, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.9809185577559743, |
| "grad_norm": 2.772587299346924, |
| "learning_rate": 2.044421761356552e-07, |
| "loss": 0.8796, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.9814001083488834, |
| "grad_norm": 1.3121188879013062, |
| "learning_rate": 1.9459420267804452e-07, |
| "loss": 0.7171, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.9818816589417926, |
| "grad_norm": 3.698637008666992, |
| "learning_rate": 1.8498907736499426e-07, |
| "loss": 0.6321, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.9823632095347017, |
| "grad_norm": 2.19526743888855, |
| "learning_rate": 1.7562682356786487e-07, |
| "loss": 0.961, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9828447601276109, |
| "grad_norm": 3.142582893371582, |
| "learning_rate": 1.665074640670228e-07, |
| "loss": 0.9126, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.9833263107205201, |
| "grad_norm": 2.962191343307495, |
| "learning_rate": 1.576310210518517e-07, |
| "loss": 0.566, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.9838078613134292, |
| "grad_norm": 2.7862355709075928, |
| "learning_rate": 1.489975161206636e-07, |
| "loss": 0.5524, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.9842894119063385, |
| "grad_norm": 2.1677451133728027, |
| "learning_rate": 1.406069702806323e-07, |
| "loss": 0.4326, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.9847709624992476, |
| "grad_norm": 2.6661462783813477, |
| "learning_rate": 1.324594039477822e-07, |
| "loss": 1.088, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.9852525130921568, |
| "grad_norm": 1.7038089036941528, |
| "learning_rate": 1.2455483694689962e-07, |
| "loss": 0.8435, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.9857340636850659, |
| "grad_norm": 2.608567237854004, |
| "learning_rate": 1.1689328851151038e-07, |
| "loss": 0.7363, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.9862156142779751, |
| "grad_norm": 1.127517580986023, |
| "learning_rate": 1.0947477728381339e-07, |
| "loss": 0.9246, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.9866971648708842, |
| "grad_norm": 3.015097141265869, |
| "learning_rate": 1.0229932131465836e-07, |
| "loss": 0.8423, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.9871787154637934, |
| "grad_norm": 2.7446672916412354, |
| "learning_rate": 9.536693806347919e-08, |
| "loss": 1.2947, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9876602660567025, |
| "grad_norm": 2.5686793327331543, |
| "learning_rate": 8.867764439826065e-08, |
| "loss": 0.6687, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.9881418166496118, |
| "grad_norm": 1.1108747720718384, |
| "learning_rate": 8.223145659550513e-08, |
| "loss": 0.7631, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.9886233672425209, |
| "grad_norm": 2.3981330394744873, |
| "learning_rate": 7.602839034017706e-08, |
| "loss": 0.6197, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.9891049178354301, |
| "grad_norm": 3.0359103679656982, |
| "learning_rate": 7.006846072568074e-08, |
| "loss": 0.5404, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.9895864684283393, |
| "grad_norm": 2.437429189682007, |
| "learning_rate": 6.435168225381594e-08, |
| "loss": 0.5185, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.9900680190212484, |
| "grad_norm": 1.1326676607131958, |
| "learning_rate": 5.887806883474456e-08, |
| "loss": 0.2804, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.9905495696141576, |
| "grad_norm": 3.8812267780303955, |
| "learning_rate": 5.364763378694626e-08, |
| "loss": 0.6195, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.9910311202070667, |
| "grad_norm": 2.3206794261932373, |
| "learning_rate": 4.8660389837207334e-08, |
| "loss": 0.9294, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.991512670799976, |
| "grad_norm": 1.1168718338012695, |
| "learning_rate": 4.391634912056519e-08, |
| "loss": 0.8106, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.9919942213928851, |
| "grad_norm": 1.8295259475708008, |
| "learning_rate": 3.9415523180297286e-08, |
| "loss": 0.4507, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9924757719857943, |
| "grad_norm": 2.970205068588257, |
| "learning_rate": 3.515792296789888e-08, |
| "loss": 0.3523, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.9929573225787034, |
| "grad_norm": 1.4164469242095947, |
| "learning_rate": 3.114355884301645e-08, |
| "loss": 0.7157, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.9934388731716126, |
| "grad_norm": 2.132336378097534, |
| "learning_rate": 2.7372440573469883e-08, |
| "loss": 0.5431, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.9939204237645217, |
| "grad_norm": 1.5810383558273315, |
| "learning_rate": 2.384457733520806e-08, |
| "loss": 0.2308, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.9944019743574309, |
| "grad_norm": 1.8615809679031372, |
| "learning_rate": 2.0559977712297785e-08, |
| "loss": 0.3219, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.99488352495034, |
| "grad_norm": 3.2156498432159424, |
| "learning_rate": 1.7518649696857126e-08, |
| "loss": 0.835, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.9953650755432493, |
| "grad_norm": 1.9598007202148438, |
| "learning_rate": 1.4720600689110963e-08, |
| "loss": 0.536, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.9958466261361584, |
| "grad_norm": 0.9099422097206116, |
| "learning_rate": 1.216583749731326e-08, |
| "loss": 0.3856, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.9963281767290676, |
| "grad_norm": 2.4464566707611084, |
| "learning_rate": 9.854366337758159e-09, |
| "loss": 0.9042, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.9968097273219768, |
| "grad_norm": 1.8802378177642822, |
| "learning_rate": 7.786192834746686e-09, |
| "loss": 0.8939, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9972912779148859, |
| "grad_norm": 2.2916386127471924, |
| "learning_rate": 5.961322020608951e-09, |
| "loss": 0.7196, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.9977728285077951, |
| "grad_norm": 0.9568644762039185, |
| "learning_rate": 4.3797583356264275e-09, |
| "loss": 0.1672, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.9982543791007042, |
| "grad_norm": 1.4971035718917847, |
| "learning_rate": 3.0415056281096755e-09, |
| "loss": 0.4794, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.9987359296936135, |
| "grad_norm": 3.3616390228271484, |
| "learning_rate": 1.9465671543095197e-09, |
| "loss": 1.0098, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.9992174802865226, |
| "grad_norm": 3.415937662124634, |
| "learning_rate": 1.094945578439255e-09, |
| "loss": 1.0445, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.9996990308794318, |
| "grad_norm": 3.9460153579711914, |
| "learning_rate": 4.866429726857469e-10, |
| "loss": 1.0316, |
| "step": 2076 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.570456027984619, |
| "learning_rate": 1.2166081717612797e-10, |
| "loss": 0.9203, |
| "step": 2077 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 2077, |
| "total_flos": 6.565805779711427e+17, |
| "train_loss": 0.7756532824952038, |
| "train_runtime": 4587.6806, |
| "train_samples_per_second": 7.242, |
| "train_steps_per_second": 0.453 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2077, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 2400000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.565805779711427e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|