| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 1308, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0061162079510703364, |
| "grad_norm": 0.796875, |
| "learning_rate": 1.5151515151515152e-07, |
| "loss": 1.9698597192764282, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.012232415902140673, |
| "grad_norm": 1.625, |
| "learning_rate": 4.5454545454545457e-07, |
| "loss": 2.038839101791382, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01834862385321101, |
| "grad_norm": 0.70703125, |
| "learning_rate": 7.575757575757576e-07, |
| "loss": 2.013974189758301, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.024464831804281346, |
| "grad_norm": 0.83984375, |
| "learning_rate": 1.0606060606060608e-06, |
| "loss": 2.130162239074707, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03058103975535168, |
| "grad_norm": 0.7890625, |
| "learning_rate": 1.3636363636363636e-06, |
| "loss": 2.004484176635742, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03669724770642202, |
| "grad_norm": 0.8671875, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 2.008946180343628, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04281345565749235, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.96969696969697e-06, |
| "loss": 2.0183446407318115, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04892966360856269, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.2727272727272728e-06, |
| "loss": 1.9834390878677368, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05504587155963303, |
| "grad_norm": 0.75390625, |
| "learning_rate": 2.575757575757576e-06, |
| "loss": 2.044725179672241, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06116207951070336, |
| "grad_norm": 0.8828125, |
| "learning_rate": 2.8787878787878793e-06, |
| "loss": 2.3765246868133545, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0672782874617737, |
| "grad_norm": 0.62890625, |
| "learning_rate": 3.181818181818182e-06, |
| "loss": 2.135927438735962, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07339449541284404, |
| "grad_norm": 1.1484375, |
| "learning_rate": 3.4848484848484854e-06, |
| "loss": 2.1705098152160645, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07951070336391437, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.7878787878787882e-06, |
| "loss": 2.0761096477508545, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0856269113149847, |
| "grad_norm": 2.03125, |
| "learning_rate": 4.0909090909090915e-06, |
| "loss": 2.0886521339416504, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.393939393939394e-06, |
| "loss": 2.201143980026245, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09785932721712538, |
| "grad_norm": 8.625, |
| "learning_rate": 4.696969696969698e-06, |
| "loss": 2.045245885848999, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10397553516819572, |
| "grad_norm": 0.65234375, |
| "learning_rate": 5e-06, |
| "loss": 1.907004475593567, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11009174311926606, |
| "grad_norm": 1.2265625, |
| "learning_rate": 5.303030303030303e-06, |
| "loss": 1.9484505653381348, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1162079510703364, |
| "grad_norm": 0.77734375, |
| "learning_rate": 5.606060606060606e-06, |
| "loss": 1.963153600692749, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.12232415902140673, |
| "grad_norm": 0.51953125, |
| "learning_rate": 5.90909090909091e-06, |
| "loss": 2.0666375160217285, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12844036697247707, |
| "grad_norm": 0.4453125, |
| "learning_rate": 6.212121212121213e-06, |
| "loss": 1.9147648811340332, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1345565749235474, |
| "grad_norm": 0.59765625, |
| "learning_rate": 6.515151515151516e-06, |
| "loss": 1.8978981971740723, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.14067278287461774, |
| "grad_norm": 0.61328125, |
| "learning_rate": 6.818181818181818e-06, |
| "loss": 1.8800417184829712, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.14678899082568808, |
| "grad_norm": 0.78125, |
| "learning_rate": 7.121212121212122e-06, |
| "loss": 2.1724555492401123, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1529051987767584, |
| "grad_norm": 0.8046875, |
| "learning_rate": 7.424242424242425e-06, |
| "loss": 2.0132710933685303, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15902140672782875, |
| "grad_norm": 0.53515625, |
| "learning_rate": 7.727272727272727e-06, |
| "loss": 1.9249347448349, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1651376146788991, |
| "grad_norm": 0.5625, |
| "learning_rate": 8.03030303030303e-06, |
| "loss": 2.0230674743652344, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1712538226299694, |
| "grad_norm": 0.46484375, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 1.8684461116790771, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.17737003058103976, |
| "grad_norm": 0.68359375, |
| "learning_rate": 8.636363636363637e-06, |
| "loss": 1.9816838502883911, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 0.70703125, |
| "learning_rate": 8.93939393939394e-06, |
| "loss": 1.8070955276489258, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18960244648318042, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.242424242424244e-06, |
| "loss": 1.9102303981781006, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.19571865443425077, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.545454545454547e-06, |
| "loss": 1.761095643043518, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2018348623853211, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.84848484848485e-06, |
| "loss": 1.8172270059585571, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.20795107033639143, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.99998560409937e-06, |
| "loss": 1.7951654195785522, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.21406727828746178, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.999870437446959e-06, |
| "loss": 1.7246266603469849, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22018348623853212, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.99964010708956e-06, |
| "loss": 1.7382261753082275, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.22629969418960244, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.999294618921943e-06, |
| "loss": 1.8094028234481812, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.2324159021406728, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.998833981786072e-06, |
| "loss": 1.7889823913574219, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.23853211009174313, |
| "grad_norm": 0.515625, |
| "learning_rate": 9.998258207470882e-06, |
| "loss": 1.7645984888076782, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.24464831804281345, |
| "grad_norm": 1.6328125, |
| "learning_rate": 9.997567310711977e-06, |
| "loss": 1.692162275314331, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.25076452599388377, |
| "grad_norm": 0.38671875, |
| "learning_rate": 9.996761309191248e-06, |
| "loss": 1.6656694412231445, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.25688073394495414, |
| "grad_norm": 1.0859375, |
| "learning_rate": 9.995840223536428e-06, |
| "loss": 1.69821035861969, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.26299694189602446, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.99480407732056e-06, |
| "loss": 1.693019986152649, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.2691131498470948, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.993652897061394e-06, |
| "loss": 1.585938572883606, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.99238671222071e-06, |
| "loss": 1.5834678411483765, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.28134556574923547, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.991005555203553e-06, |
| "loss": 1.5904253721237183, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2874617737003058, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.989509461357428e-06, |
| "loss": 1.7213293313980103, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.29357798165137616, |
| "grad_norm": 0.96875, |
| "learning_rate": 9.98789846897137e-06, |
| "loss": 1.59124755859375, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2996941896024465, |
| "grad_norm": 0.328125, |
| "learning_rate": 9.986172619274977e-06, |
| "loss": 1.4882735013961792, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3058103975535168, |
| "grad_norm": 0.490234375, |
| "learning_rate": 9.984331956437354e-06, |
| "loss": 1.6401163339614868, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3119266055045872, |
| "grad_norm": 0.484375, |
| "learning_rate": 9.982376527565981e-06, |
| "loss": 1.6229268312454224, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3180428134556575, |
| "grad_norm": 0.6953125, |
| "learning_rate": 9.980306382705504e-06, |
| "loss": 1.6486362218856812, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3241590214067278, |
| "grad_norm": 0.4921875, |
| "learning_rate": 9.978121574836463e-06, |
| "loss": 1.7563343048095703, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3302752293577982, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.975822159873925e-06, |
| "loss": 1.5931520462036133, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3363914373088685, |
| "grad_norm": 0.490234375, |
| "learning_rate": 9.973408196666062e-06, |
| "loss": 1.6376924514770508, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3425076452599388, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.970879746992641e-06, |
| "loss": 1.6083383560180664, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3486238532110092, |
| "grad_norm": 0.408203125, |
| "learning_rate": 9.968236875563444e-06, |
| "loss": 1.5672008991241455, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3547400611620795, |
| "grad_norm": 0.400390625, |
| "learning_rate": 9.965479650016611e-06, |
| "loss": 1.5744966268539429, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.36085626911314983, |
| "grad_norm": 0.51171875, |
| "learning_rate": 9.962608140916906e-06, |
| "loss": 1.6350196599960327, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 0.4609375, |
| "learning_rate": 9.959622421753922e-06, |
| "loss": 1.4963032007217407, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3730886850152905, |
| "grad_norm": 0.486328125, |
| "learning_rate": 9.956522568940185e-06, |
| "loss": 1.5451488494873047, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.37920489296636084, |
| "grad_norm": 0.439453125, |
| "learning_rate": 9.953308661809209e-06, |
| "loss": 1.599358320236206, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3853211009174312, |
| "grad_norm": 0.37890625, |
| "learning_rate": 9.949980782613466e-06, |
| "loss": 1.5644880533218384, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.39143730886850153, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.94653901652227e-06, |
| "loss": 1.6034414768218994, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.39755351681957185, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.942983451619614e-06, |
| "loss": 1.6047066450119019, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4036697247706422, |
| "grad_norm": 0.4140625, |
| "learning_rate": 9.939314178901898e-06, |
| "loss": 1.5338762998580933, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.40978593272171254, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.935531292275615e-06, |
| "loss": 1.5983346700668335, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.41590214067278286, |
| "grad_norm": 0.390625, |
| "learning_rate": 9.931634888554937e-06, |
| "loss": 1.4490175247192383, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.42201834862385323, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.927625067459245e-06, |
| "loss": 1.43030846118927, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.42813455657492355, |
| "grad_norm": 0.369140625, |
| "learning_rate": 9.923501931610571e-06, |
| "loss": 1.5441913604736328, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.43425076452599387, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.919265586530977e-06, |
| "loss": 1.5886114835739136, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.44036697247706424, |
| "grad_norm": 0.8203125, |
| "learning_rate": 9.914916140639849e-06, |
| "loss": 1.5252549648284912, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.44648318042813456, |
| "grad_norm": 0.353515625, |
| "learning_rate": 9.910453705251127e-06, |
| "loss": 1.4197413921356201, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.4525993883792049, |
| "grad_norm": 0.5390625, |
| "learning_rate": 9.905878394570453e-06, |
| "loss": 1.5738030672073364, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.90119032569225e-06, |
| "loss": 1.595241904258728, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4648318042813456, |
| "grad_norm": 0.515625, |
| "learning_rate": 9.89638961859672e-06, |
| "loss": 1.5898534059524536, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4709480122324159, |
| "grad_norm": 0.5078125, |
| "learning_rate": 9.891476396146785e-06, |
| "loss": 1.5508402585983276, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.47706422018348627, |
| "grad_norm": 0.404296875, |
| "learning_rate": 9.886450784084934e-06, |
| "loss": 1.5691711902618408, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4831804281345566, |
| "grad_norm": 0.85546875, |
| "learning_rate": 9.88131291103e-06, |
| "loss": 1.5895097255706787, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4892966360856269, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.876062908473883e-06, |
| "loss": 1.5543285608291626, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4954128440366973, |
| "grad_norm": 0.375, |
| "learning_rate": 9.870700910778169e-06, |
| "loss": 1.4683598279953003, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5015290519877675, |
| "grad_norm": 1.328125, |
| "learning_rate": 9.865227055170706e-06, |
| "loss": 1.4957642555236816, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5076452599388379, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.85964148174208e-06, |
| "loss": 1.448598027229309, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5137614678899083, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.853944333442036e-06, |
| "loss": 1.4433187246322632, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5198776758409785, |
| "grad_norm": 5.46875, |
| "learning_rate": 9.848135756075816e-06, |
| "loss": 1.500611424446106, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5259938837920489, |
| "grad_norm": 0.435546875, |
| "learning_rate": 9.842215898300434e-06, |
| "loss": 1.4782170057296753, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5321100917431193, |
| "grad_norm": 0.35546875, |
| "learning_rate": 9.836184911620863e-06, |
| "loss": 1.485479712486267, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5382262996941896, |
| "grad_norm": 1.53125, |
| "learning_rate": 9.830042950386162e-06, |
| "loss": 1.5060051679611206, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5443425076452599, |
| "grad_norm": 0.3671875, |
| "learning_rate": 9.823790171785527e-06, |
| "loss": 1.4704962968826294, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 0.4765625, |
| "learning_rate": 9.817426735844265e-06, |
| "loss": 1.4355278015136719, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5565749235474006, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.810952805419701e-06, |
| "loss": 1.5194344520568848, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5626911314984709, |
| "grad_norm": 0.455078125, |
| "learning_rate": 9.804368546197007e-06, |
| "loss": 1.5073320865631104, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5688073394495413, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.797674126684967e-06, |
| "loss": 1.522252082824707, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5749235474006116, |
| "grad_norm": 0.5390625, |
| "learning_rate": 9.790869718211657e-06, |
| "loss": 1.6073163747787476, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.581039755351682, |
| "grad_norm": 0.416015625, |
| "learning_rate": 9.783955494920067e-06, |
| "loss": 1.4052844047546387, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "grad_norm": 0.3515625, |
| "learning_rate": 9.77693163376364e-06, |
| "loss": 1.4193068742752075, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5932721712538226, |
| "grad_norm": 0.4609375, |
| "learning_rate": 9.76979831450175e-06, |
| "loss": 1.5307352542877197, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.599388379204893, |
| "grad_norm": 0.5625, |
| "learning_rate": 9.76255571969509e-06, |
| "loss": 1.424899697303772, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6055045871559633, |
| "grad_norm": 0.52734375, |
| "learning_rate": 9.755204034701004e-06, |
| "loss": 1.359844446182251, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6116207951070336, |
| "grad_norm": 0.5078125, |
| "learning_rate": 9.747743447668755e-06, |
| "loss": 1.582168459892273, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.617737003058104, |
| "grad_norm": 0.470703125, |
| "learning_rate": 9.740174149534694e-06, |
| "loss": 1.488830327987671, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6238532110091743, |
| "grad_norm": 0.5, |
| "learning_rate": 9.732496334017376e-06, |
| "loss": 1.4927191734313965, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6299694189602446, |
| "grad_norm": 0.45703125, |
| "learning_rate": 9.724710197612615e-06, |
| "loss": 1.4716768264770508, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.636085626911315, |
| "grad_norm": 0.408203125, |
| "learning_rate": 9.716815939588437e-06, |
| "loss": 1.3903311491012573, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 0.341796875, |
| "learning_rate": 9.708813761979992e-06, |
| "loss": 1.5344760417938232, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6483180428134556, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.700703869584386e-06, |
| "loss": 1.4522379636764526, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.654434250764526, |
| "grad_norm": 0.35546875, |
| "learning_rate": 9.692486469955425e-06, |
| "loss": 1.3874422311782837, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6605504587155964, |
| "grad_norm": 0.2734375, |
| "learning_rate": 9.684161773398321e-06, |
| "loss": 1.3861643075942993, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.675729992964292e-06, |
| "loss": 1.5152150392532349, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.672782874617737, |
| "grad_norm": 0.98828125, |
| "learning_rate": 9.667191344445123e-06, |
| "loss": 1.3514238595962524, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6788990825688074, |
| "grad_norm": 0.70703125, |
| "learning_rate": 9.658546046367646e-06, |
| "loss": 1.39436936378479, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6850152905198776, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.649794319988121e-06, |
| "loss": 1.4995126724243164, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.691131498470948, |
| "grad_norm": 0.490234375, |
| "learning_rate": 9.640936389286617e-06, |
| "loss": 1.4583836793899536, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6972477064220184, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.631972480961235e-06, |
| "loss": 1.4303733110427856, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7033639143730887, |
| "grad_norm": 0.421875, |
| "learning_rate": 9.622902824422336e-06, |
| "loss": 1.393810749053955, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.709480122324159, |
| "grad_norm": 0.453125, |
| "learning_rate": 9.613727651786659e-06, |
| "loss": 1.51703679561615, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7155963302752294, |
| "grad_norm": 0.5, |
| "learning_rate": 9.604447197871382e-06, |
| "loss": 1.373485803604126, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7217125382262997, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.59506170018811e-06, |
| "loss": 1.4396356344223022, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.72782874617737, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.5855713989368e-06, |
| "loss": 1.5568106174468994, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.575976536999616e-06, |
| "loss": 1.4187113046646118, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7400611620795107, |
| "grad_norm": 0.423828125, |
| "learning_rate": 9.566277359934703e-06, |
| "loss": 1.4353150129318237, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.746177370030581, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.556474115969911e-06, |
| "loss": 1.5181076526641846, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7522935779816514, |
| "grad_norm": 0.408203125, |
| "learning_rate": 9.546567055996441e-06, |
| "loss": 1.4269428253173828, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7584097859327217, |
| "grad_norm": 0.4765625, |
| "learning_rate": 9.536556433562422e-06, |
| "loss": 1.4407360553741455, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.764525993883792, |
| "grad_norm": 0.373046875, |
| "learning_rate": 9.526442504866427e-06, |
| "loss": 1.3571839332580566, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7706422018348624, |
| "grad_norm": 0.41796875, |
| "learning_rate": 9.516225528750904e-06, |
| "loss": 1.4300589561462402, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7767584097859327, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.505905766695564e-06, |
| "loss": 1.5078905820846558, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7828746177370031, |
| "grad_norm": 0.64453125, |
| "learning_rate": 9.495483482810688e-06, |
| "loss": 1.456427812576294, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7889908256880734, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.484958943830363e-06, |
| "loss": 1.4158270359039307, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7951070336391437, |
| "grad_norm": 0.3984375, |
| "learning_rate": 9.474332419105652e-06, |
| "loss": 1.3977278470993042, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8012232415902141, |
| "grad_norm": 0.51953125, |
| "learning_rate": 9.463604180597712e-06, |
| "loss": 1.3898099660873413, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8073394495412844, |
| "grad_norm": 0.58203125, |
| "learning_rate": 9.452774502870822e-06, |
| "loss": 1.4355534315109253, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.8134556574923547, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.441843663085368e-06, |
| "loss": 1.454459309577942, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8195718654434251, |
| "grad_norm": 0.4453125, |
| "learning_rate": 9.430811940990736e-06, |
| "loss": 1.4455972909927368, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 0.57421875, |
| "learning_rate": 9.419679618918164e-06, |
| "loss": 1.381105661392212, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8318042813455657, |
| "grad_norm": 0.46484375, |
| "learning_rate": 9.408446981773514e-06, |
| "loss": 1.4196290969848633, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8379204892966361, |
| "grad_norm": 1.1328125, |
| "learning_rate": 9.397114317029975e-06, |
| "loss": 1.4939439296722412, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8440366972477065, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.38568191472071e-06, |
| "loss": 1.4450997114181519, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8501529051987767, |
| "grad_norm": 0.6640625, |
| "learning_rate": 9.374150067431433e-06, |
| "loss": 1.4556881189346313, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8562691131498471, |
| "grad_norm": 0.36328125, |
| "learning_rate": 9.362519070292924e-06, |
| "loss": 1.3958441019058228, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8623853211009175, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.350789220973468e-06, |
| "loss": 1.492562174797058, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8685015290519877, |
| "grad_norm": 0.33203125, |
| "learning_rate": 9.33896081967124e-06, |
| "loss": 1.3768280744552612, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8746177370030581, |
| "grad_norm": 0.94140625, |
| "learning_rate": 9.32703416910663e-06, |
| "loss": 1.3635163307189941, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8807339449541285, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.315009574514487e-06, |
| "loss": 1.3402776718139648, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8868501529051988, |
| "grad_norm": 0.328125, |
| "learning_rate": 9.302887343636305e-06, |
| "loss": 1.4155652523040771, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8929663608562691, |
| "grad_norm": 0.453125, |
| "learning_rate": 9.290667786712354e-06, |
| "loss": 1.5360904932022095, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8990825688073395, |
| "grad_norm": 0.447265625, |
| "learning_rate": 9.278351216473737e-06, |
| "loss": 1.4269368648529053, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9051987767584098, |
| "grad_norm": 2.40625, |
| "learning_rate": 9.265937948134393e-06, |
| "loss": 1.4990252256393433, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.9113149847094801, |
| "grad_norm": 0.37890625, |
| "learning_rate": 9.253428299383013e-06, |
| "loss": 1.4629848003387451, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.240822590374927e-06, |
| "loss": 1.3986918926239014, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9235474006116208, |
| "grad_norm": 0.40234375, |
| "learning_rate": 9.228121143723901e-06, |
| "loss": 1.374011754989624, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.9296636085626911, |
| "grad_norm": 0.33203125, |
| "learning_rate": 9.215324284493888e-06, |
| "loss": 1.3964948654174805, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.9357798165137615, |
| "grad_norm": 0.43359375, |
| "learning_rate": 9.202432340190696e-06, |
| "loss": 1.3667786121368408, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9418960244648318, |
| "grad_norm": 0.380859375, |
| "learning_rate": 9.18944564075362e-06, |
| "loss": 1.3175499439239502, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.9480122324159022, |
| "grad_norm": 0.376953125, |
| "learning_rate": 9.17636451854699e-06, |
| "loss": 1.3974062204360962, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9541284403669725, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.163189308351666e-06, |
| "loss": 1.405277132987976, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.9602446483180428, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.149920347356472e-06, |
| "loss": 1.4029018878936768, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.9663608562691132, |
| "grad_norm": 0.41015625, |
| "learning_rate": 9.136557975149563e-06, |
| "loss": 1.3701725006103516, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9724770642201835, |
| "grad_norm": 0.5859375, |
| "learning_rate": 9.12310253370974e-06, |
| "loss": 1.4639108180999756, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.9785932721712538, |
| "grad_norm": 1.140625, |
| "learning_rate": 9.109554367397699e-06, |
| "loss": 1.3428951501846313, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9847094801223242, |
| "grad_norm": 0.9296875, |
| "learning_rate": 9.095913822947197e-06, |
| "loss": 1.2543865442276, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9908256880733946, |
| "grad_norm": 0.4296875, |
| "learning_rate": 9.082181249456211e-06, |
| "loss": 1.287245512008667, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9969418960244648, |
| "grad_norm": 0.404296875, |
| "learning_rate": 9.06835699837798e-06, |
| "loss": 1.3998972177505493, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.003058103975535, |
| "grad_norm": 0.4921875, |
| "learning_rate": 9.054441423512015e-06, |
| "loss": 1.381530523300171, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.0091743119266054, |
| "grad_norm": 0.380859375, |
| "learning_rate": 9.040434880995052e-06, |
| "loss": 1.388320803642273, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0152905198776758, |
| "grad_norm": 0.478515625, |
| "learning_rate": 9.026337729291927e-06, |
| "loss": 1.3628325462341309, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.0214067278287462, |
| "grad_norm": 0.447265625, |
| "learning_rate": 9.012150329186412e-06, |
| "loss": 1.3321391344070435, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.0275229357798166, |
| "grad_norm": 0.53515625, |
| "learning_rate": 8.997873043771974e-06, |
| "loss": 1.4002933502197266, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.033639143730887, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.983506238442486e-06, |
| "loss": 1.3850795030593872, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.039755351681957, |
| "grad_norm": 0.8984375, |
| "learning_rate": 8.969050280882873e-06, |
| "loss": 1.3575987815856934, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0458715596330275, |
| "grad_norm": 1.671875, |
| "learning_rate": 8.954505541059707e-06, |
| "loss": 1.448436975479126, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.0519877675840978, |
| "grad_norm": 0.51953125, |
| "learning_rate": 8.939872391211732e-06, |
| "loss": 1.3202804327011108, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.0581039755351682, |
| "grad_norm": 0.5703125, |
| "learning_rate": 8.925151205840343e-06, |
| "loss": 1.335026741027832, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.0642201834862386, |
| "grad_norm": 0.412109375, |
| "learning_rate": 8.910342361699996e-06, |
| "loss": 1.3237738609313965, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.070336391437309, |
| "grad_norm": 0.51171875, |
| "learning_rate": 8.895446237788574e-06, |
| "loss": 1.3708387613296509, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0764525993883791, |
| "grad_norm": 0.400390625, |
| "learning_rate": 8.88046321533768e-06, |
| "loss": 1.3443958759307861, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.0825688073394495, |
| "grad_norm": 1.2578125, |
| "learning_rate": 8.865393677802882e-06, |
| "loss": 1.231789231300354, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.0886850152905199, |
| "grad_norm": 0.455078125, |
| "learning_rate": 8.850238010853902e-06, |
| "loss": 1.3118000030517578, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.0948012232415902, |
| "grad_norm": 0.46484375, |
| "learning_rate": 8.834996602364738e-06, |
| "loss": 1.449766993522644, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.1009174311926606, |
| "grad_norm": 0.53515625, |
| "learning_rate": 8.81966984240375e-06, |
| "loss": 1.3435068130493164, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.107033639143731, |
| "grad_norm": 5.9375, |
| "learning_rate": 8.80425812322367e-06, |
| "loss": 1.2645937204360962, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.1131498470948011, |
| "grad_norm": 0.64453125, |
| "learning_rate": 8.78876183925156e-06, |
| "loss": 1.2984048128128052, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.1192660550458715, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.77318138707872e-06, |
| "loss": 1.3319660425186157, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.1253822629969419, |
| "grad_norm": 0.97265625, |
| "learning_rate": 8.757517165450543e-06, |
| "loss": 1.3149017095565796, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.1314984709480123, |
| "grad_norm": 0.4375, |
| "learning_rate": 8.741769575256306e-06, |
| "loss": 1.3030190467834473, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1376146788990826, |
| "grad_norm": 0.45703125, |
| "learning_rate": 8.725939019518902e-06, |
| "loss": 1.2816126346588135, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.143730886850153, |
| "grad_norm": 0.640625, |
| "learning_rate": 8.710025903384548e-06, |
| "loss": 1.3657718896865845, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.1498470948012232, |
| "grad_norm": 0.408203125, |
| "learning_rate": 8.69403063411239e-06, |
| "loss": 1.2346255779266357, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.1559633027522935, |
| "grad_norm": 0.35546875, |
| "learning_rate": 8.6779536210641e-06, |
| "loss": 1.2943626642227173, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.162079510703364, |
| "grad_norm": 0.515625, |
| "learning_rate": 8.661795275693385e-06, |
| "loss": 1.3616715669631958, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1681957186544343, |
| "grad_norm": 0.447265625, |
| "learning_rate": 8.64555601153547e-06, |
| "loss": 1.2533824443817139, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.1743119266055047, |
| "grad_norm": 0.56640625, |
| "learning_rate": 8.629236244196502e-06, |
| "loss": 1.287404179573059, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.1804281345565748, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.612836391342925e-06, |
| "loss": 1.3631038665771484, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.1865443425076452, |
| "grad_norm": 0.41015625, |
| "learning_rate": 8.596356872690779e-06, |
| "loss": 1.3277571201324463, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.1926605504587156, |
| "grad_norm": 0.50390625, |
| "learning_rate": 8.579798109994968e-06, |
| "loss": 1.3345115184783936, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.198776758409786, |
| "grad_norm": 0.458984375, |
| "learning_rate": 8.563160527038467e-06, |
| "loss": 1.2454558610916138, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.2048929663608563, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.546444549621467e-06, |
| "loss": 1.3097434043884277, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.2110091743119267, |
| "grad_norm": 0.470703125, |
| "learning_rate": 8.529650605550478e-06, |
| "loss": 1.2673131227493286, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.217125382262997, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.512779124627395e-06, |
| "loss": 1.4371856451034546, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.2232415902140672, |
| "grad_norm": 0.392578125, |
| "learning_rate": 8.495830538638481e-06, |
| "loss": 1.2818241119384766, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2293577981651376, |
| "grad_norm": 0.48046875, |
| "learning_rate": 8.478805281343335e-06, |
| "loss": 1.215641736984253, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.235474006116208, |
| "grad_norm": 2.296875, |
| "learning_rate": 8.461703788463757e-06, |
| "loss": 1.2823781967163086, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.2415902140672783, |
| "grad_norm": 0.671875, |
| "learning_rate": 8.44452649767264e-06, |
| "loss": 1.3114620447158813, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.2477064220183487, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.427273848582744e-06, |
| "loss": 1.2511239051818848, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.2538226299694188, |
| "grad_norm": 0.5234375, |
| "learning_rate": 8.40994628273544e-06, |
| "loss": 1.2478758096694946, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2599388379204892, |
| "grad_norm": 0.5390625, |
| "learning_rate": 8.392544243589428e-06, |
| "loss": 1.3285698890686035, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.2660550458715596, |
| "grad_norm": 0.74609375, |
| "learning_rate": 8.375068176509375e-06, |
| "loss": 1.3709665536880493, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.27217125382263, |
| "grad_norm": 0.703125, |
| "learning_rate": 8.357518528754524e-06, |
| "loss": 1.3329336643218994, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.2782874617737003, |
| "grad_norm": 0.53515625, |
| "learning_rate": 8.339895749467238e-06, |
| "loss": 1.2674789428710938, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.2844036697247707, |
| "grad_norm": 0.80859375, |
| "learning_rate": 8.322200289661517e-06, |
| "loss": 1.152662992477417, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.290519877675841, |
| "grad_norm": 0.45703125, |
| "learning_rate": 8.304432602211446e-06, |
| "loss": 1.3445444107055664, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.2966360856269112, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.28659314183961e-06, |
| "loss": 1.3826080560684204, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.3027522935779816, |
| "grad_norm": 0.3828125, |
| "learning_rate": 8.268682365105453e-06, |
| "loss": 1.3560914993286133, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.308868501529052, |
| "grad_norm": 0.62890625, |
| "learning_rate": 8.250700730393599e-06, |
| "loss": 1.2076865434646606, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.3149847094801224, |
| "grad_norm": 0.408203125, |
| "learning_rate": 8.232648697902113e-06, |
| "loss": 1.3048980236053467, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.3211009174311927, |
| "grad_norm": 0.4921875, |
| "learning_rate": 8.21452672963073e-06, |
| "loss": 1.352384328842163, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.3272171253822629, |
| "grad_norm": 0.3984375, |
| "learning_rate": 8.196335289369027e-06, |
| "loss": 1.390981674194336, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 1.0625, |
| "learning_rate": 8.178074842684554e-06, |
| "loss": 1.32779860496521, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.3394495412844036, |
| "grad_norm": 1.0390625, |
| "learning_rate": 8.159745856910922e-06, |
| "loss": 1.2868674993515015, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.345565749235474, |
| "grad_norm": 0.419921875, |
| "learning_rate": 8.14134880113584e-06, |
| "loss": 1.305415153503418, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.3516819571865444, |
| "grad_norm": 0.53125, |
| "learning_rate": 8.122884146189104e-06, |
| "loss": 1.3808095455169678, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.3577981651376148, |
| "grad_norm": 0.4921875, |
| "learning_rate": 8.104352364630565e-06, |
| "loss": 1.2937378883361816, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.3639143730886851, |
| "grad_norm": 0.32421875, |
| "learning_rate": 8.085753930738013e-06, |
| "loss": 1.2958605289459229, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.3700305810397553, |
| "grad_norm": 0.60546875, |
| "learning_rate": 8.067089320495057e-06, |
| "loss": 1.3038794994354248, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.3761467889908257, |
| "grad_norm": 0.52734375, |
| "learning_rate": 8.048359011578927e-06, |
| "loss": 1.2670778036117554, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.382262996941896, |
| "grad_norm": 1.390625, |
| "learning_rate": 8.029563483348268e-06, |
| "loss": 1.3002293109893799, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.3883792048929664, |
| "grad_norm": 0.73046875, |
| "learning_rate": 8.010703216830852e-06, |
| "loss": 1.3091164827346802, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.3944954128440368, |
| "grad_norm": 0.4921875, |
| "learning_rate": 7.991778694711278e-06, |
| "loss": 1.2860240936279297, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.400611620795107, |
| "grad_norm": 0.421875, |
| "learning_rate": 7.972790401318627e-06, |
| "loss": 1.2974958419799805, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.4067278287461773, |
| "grad_norm": 1.4609375, |
| "learning_rate": 7.953738822614048e-06, |
| "loss": 1.3687572479248047, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.4128440366972477, |
| "grad_norm": 0.447265625, |
| "learning_rate": 7.934624446178328e-06, |
| "loss": 1.2588635683059692, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.418960244648318, |
| "grad_norm": 0.427734375, |
| "learning_rate": 7.915447761199427e-06, |
| "loss": 1.3145904541015625, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.4250764525993884, |
| "grad_norm": 0.318359375, |
| "learning_rate": 7.896209258459934e-06, |
| "loss": 1.2143771648406982, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.4311926605504588, |
| "grad_norm": 0.40625, |
| "learning_rate": 7.876909430324527e-06, |
| "loss": 1.2713569402694702, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.4373088685015292, |
| "grad_norm": 0.64453125, |
| "learning_rate": 7.85754877072737e-06, |
| "loss": 1.3136000633239746, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.4434250764525993, |
| "grad_norm": 0.46484375, |
| "learning_rate": 7.838127775159451e-06, |
| "loss": 1.2473974227905273, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.4495412844036697, |
| "grad_norm": 0.5078125, |
| "learning_rate": 7.818646940655933e-06, |
| "loss": 1.3004451990127563, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.45565749235474, |
| "grad_norm": 1.4296875, |
| "learning_rate": 7.799106765783407e-06, |
| "loss": 1.3775520324707031, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.4617737003058104, |
| "grad_norm": 0.515625, |
| "learning_rate": 7.779507750627145e-06, |
| "loss": 1.409247875213623, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.4678899082568808, |
| "grad_norm": 3.953125, |
| "learning_rate": 7.7598503967783e-06, |
| "loss": 1.282897710800171, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.474006116207951, |
| "grad_norm": 0.4453125, |
| "learning_rate": 7.74013520732107e-06, |
| "loss": 1.2685235738754272, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.4801223241590213, |
| "grad_norm": 0.54296875, |
| "learning_rate": 7.720362686819814e-06, |
| "loss": 1.202805995941162, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.4862385321100917, |
| "grad_norm": 0.359375, |
| "learning_rate": 7.700533341306155e-06, |
| "loss": 1.3179457187652588, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.492354740061162, |
| "grad_norm": 0.4140625, |
| "learning_rate": 7.680647678266011e-06, |
| "loss": 1.3416056632995605, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.4984709480122325, |
| "grad_norm": 0.609375, |
| "learning_rate": 7.66070620662662e-06, |
| "loss": 1.2907155752182007, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.5045871559633026, |
| "grad_norm": 0.6640625, |
| "learning_rate": 7.640709436743512e-06, |
| "loss": 1.2985384464263916, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.5107033639143732, |
| "grad_norm": 0.54296875, |
| "learning_rate": 7.620657880387448e-06, |
| "loss": 1.2733287811279297, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.5168195718654434, |
| "grad_norm": 0.55859375, |
| "learning_rate": 7.600552050731315e-06, |
| "loss": 1.2120338678359985, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.5229357798165137, |
| "grad_norm": 0.859375, |
| "learning_rate": 7.5803924623370025e-06, |
| "loss": 1.2848923206329346, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.529051987767584, |
| "grad_norm": 0.69140625, |
| "learning_rate": 7.5601796311422325e-06, |
| "loss": 1.3336488008499146, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.5351681957186545, |
| "grad_norm": 0.45703125, |
| "learning_rate": 7.539914074447349e-06, |
| "loss": 1.2442420721054077, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.5412844036697249, |
| "grad_norm": 0.68359375, |
| "learning_rate": 7.519596310902081e-06, |
| "loss": 1.266619324684143, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.547400611620795, |
| "grad_norm": 0.65625, |
| "learning_rate": 7.499226860492273e-06, |
| "loss": 1.374267816543579, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.5535168195718656, |
| "grad_norm": 0.46484375, |
| "learning_rate": 7.478806244526576e-06, |
| "loss": 1.3529757261276245, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.5596330275229358, |
| "grad_norm": 0.455078125, |
| "learning_rate": 7.458334985623102e-06, |
| "loss": 1.2986624240875244, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.5657492354740061, |
| "grad_norm": 0.37109375, |
| "learning_rate": 7.437813607696049e-06, |
| "loss": 1.2934763431549072, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.5718654434250765, |
| "grad_norm": 0.90234375, |
| "learning_rate": 7.4172426359422976e-06, |
| "loss": 1.3502346277236938, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.5779816513761467, |
| "grad_norm": 0.58203125, |
| "learning_rate": 7.396622596827967e-06, |
| "loss": 1.2319389581680298, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.5840978593272173, |
| "grad_norm": 0.625, |
| "learning_rate": 7.375954018074941e-06, |
| "loss": 1.3282928466796875, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.5902140672782874, |
| "grad_norm": 0.6875, |
| "learning_rate": 7.3552374286473595e-06, |
| "loss": 1.3678048849105835, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.5963302752293578, |
| "grad_norm": 0.30859375, |
| "learning_rate": 7.3344733587380875e-06, |
| "loss": 1.2744084596633911, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.6024464831804281, |
| "grad_norm": 0.84375, |
| "learning_rate": 7.31366233975514e-06, |
| "loss": 1.281977891921997, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.6085626911314985, |
| "grad_norm": 0.61328125, |
| "learning_rate": 7.292804904308087e-06, |
| "loss": 1.2926934957504272, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.614678899082569, |
| "grad_norm": 0.484375, |
| "learning_rate": 7.271901586194417e-06, |
| "loss": 1.3355308771133423, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.620795107033639, |
| "grad_norm": 0.62890625, |
| "learning_rate": 7.2509529203858794e-06, |
| "loss": 1.2734055519104004, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.6269113149847096, |
| "grad_norm": 0.412109375, |
| "learning_rate": 7.229959443014793e-06, |
| "loss": 1.2471139430999756, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.6330275229357798, |
| "grad_norm": 0.376953125, |
| "learning_rate": 7.208921691360323e-06, |
| "loss": 1.3476160764694214, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.6391437308868502, |
| "grad_norm": 0.408203125, |
| "learning_rate": 7.187840203834732e-06, |
| "loss": 1.2233093976974487, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.6452599388379205, |
| "grad_norm": 0.66015625, |
| "learning_rate": 7.166715519969601e-06, |
| "loss": 1.2761595249176025, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.6513761467889907, |
| "grad_norm": 1.84375, |
| "learning_rate": 7.145548180402021e-06, |
| "loss": 1.3554096221923828, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.6574923547400613, |
| "grad_norm": 0.474609375, |
| "learning_rate": 7.124338726860755e-06, |
| "loss": 1.3470004796981812, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.6636085626911314, |
| "grad_norm": 0.796875, |
| "learning_rate": 7.103087702152377e-06, |
| "loss": 1.312508225440979, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.6697247706422018, |
| "grad_norm": 2.484375, |
| "learning_rate": 7.081795650147375e-06, |
| "loss": 1.2889965772628784, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.6758409785932722, |
| "grad_norm": 0.75, |
| "learning_rate": 7.060463115766239e-06, |
| "loss": 1.3792515993118286, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.6819571865443423, |
| "grad_norm": 1.96875, |
| "learning_rate": 7.0390906449655104e-06, |
| "loss": 1.321378469467163, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.688073394495413, |
| "grad_norm": 0.515625, |
| "learning_rate": 7.017678784723806e-06, |
| "loss": 1.3485661745071411, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.694189602446483, |
| "grad_norm": 0.5546875, |
| "learning_rate": 6.99622808302783e-06, |
| "loss": 1.3221888542175293, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.7003058103975535, |
| "grad_norm": 0.46875, |
| "learning_rate": 6.974739088858338e-06, |
| "loss": 1.3821053504943848, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.7064220183486238, |
| "grad_norm": 0.466796875, |
| "learning_rate": 6.9532123521760944e-06, |
| "loss": 1.272276759147644, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.7125382262996942, |
| "grad_norm": 0.4375, |
| "learning_rate": 6.931648423907796e-06, |
| "loss": 1.2930102348327637, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.7186544342507646, |
| "grad_norm": 0.361328125, |
| "learning_rate": 6.91004785593197e-06, |
| "loss": 1.2617864608764648, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.7247706422018347, |
| "grad_norm": 0.640625, |
| "learning_rate": 6.888411201064854e-06, |
| "loss": 1.3153817653656006, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.7308868501529053, |
| "grad_norm": 0.412109375, |
| "learning_rate": 6.866739013046243e-06, |
| "loss": 1.2653061151504517, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.7370030581039755, |
| "grad_norm": 0.5078125, |
| "learning_rate": 6.845031846525322e-06, |
| "loss": 1.2796239852905273, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.7431192660550459, |
| "grad_norm": 0.328125, |
| "learning_rate": 6.823290257046467e-06, |
| "loss": 1.2797678709030151, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.7492354740061162, |
| "grad_norm": 0.671875, |
| "learning_rate": 6.801514801035031e-06, |
| "loss": 1.2564300298690796, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.7553516819571864, |
| "grad_norm": 0.53515625, |
| "learning_rate": 6.7797060357831045e-06, |
| "loss": 1.3716152906417847, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.761467889908257, |
| "grad_norm": 0.64453125, |
| "learning_rate": 6.757864519435245e-06, |
| "loss": 1.3831623792648315, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.7675840978593271, |
| "grad_norm": 1.4765625, |
| "learning_rate": 6.735990810974205e-06, |
| "loss": 1.3119230270385742, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.7737003058103975, |
| "grad_norm": 0.69140625, |
| "learning_rate": 6.71408547020661e-06, |
| "loss": 1.2804102897644043, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.7798165137614679, |
| "grad_norm": 0.578125, |
| "learning_rate": 6.6921490577486495e-06, |
| "loss": 1.403084635734558, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.7859327217125383, |
| "grad_norm": 0.78125, |
| "learning_rate": 6.6701821350117155e-06, |
| "loss": 1.2526099681854248, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.7920489296636086, |
| "grad_norm": 0.7890625, |
| "learning_rate": 6.648185264188043e-06, |
| "loss": 1.2811146974563599, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.7981651376146788, |
| "grad_norm": 0.6171875, |
| "learning_rate": 6.626159008236316e-06, |
| "loss": 1.2454664707183838, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.8042813455657494, |
| "grad_norm": 0.71875, |
| "learning_rate": 6.60410393086726e-06, |
| "loss": 1.2602325677871704, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.8103975535168195, |
| "grad_norm": 0.421875, |
| "learning_rate": 6.582020596529224e-06, |
| "loss": 1.2364270687103271, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.81651376146789, |
| "grad_norm": 0.462890625, |
| "learning_rate": 6.559909570393723e-06, |
| "loss": 1.2236618995666504, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.8226299694189603, |
| "grad_norm": 1.109375, |
| "learning_rate": 6.537771418340981e-06, |
| "loss": 1.3950483798980713, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.8287461773700304, |
| "grad_norm": 0.578125, |
| "learning_rate": 6.515606706945448e-06, |
| "loss": 1.2344207763671875, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.834862385321101, |
| "grad_norm": 0.62890625, |
| "learning_rate": 6.493416003461296e-06, |
| "loss": 1.335288643836975, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.8409785932721712, |
| "grad_norm": 0.515625, |
| "learning_rate": 6.4711998758079064e-06, |
| "loss": 1.255522608757019, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.8470948012232415, |
| "grad_norm": 0.392578125, |
| "learning_rate": 6.448958892555332e-06, |
| "loss": 1.2738847732543945, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.853211009174312, |
| "grad_norm": 0.37890625, |
| "learning_rate": 6.426693622909742e-06, |
| "loss": 1.2251421213150024, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.8593272171253823, |
| "grad_norm": 0.58203125, |
| "learning_rate": 6.404404636698869e-06, |
| "loss": 1.1613845825195312, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.8654434250764527, |
| "grad_norm": 0.47265625, |
| "learning_rate": 6.3820925043574074e-06, |
| "loss": 1.288172721862793, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.8715596330275228, |
| "grad_norm": 1.2109375, |
| "learning_rate": 6.35975779691243e-06, |
| "loss": 1.2886998653411865, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.8776758409785934, |
| "grad_norm": 0.72265625, |
| "learning_rate": 6.337401085968759e-06, |
| "loss": 1.286860466003418, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.8837920489296636, |
| "grad_norm": 0.46484375, |
| "learning_rate": 6.3150229436943514e-06, |
| "loss": 1.2472259998321533, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.889908256880734, |
| "grad_norm": 0.796875, |
| "learning_rate": 6.2926239428056456e-06, |
| "loss": 1.309545874595642, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.8960244648318043, |
| "grad_norm": 0.546875, |
| "learning_rate": 6.270204656552908e-06, |
| "loss": 1.2884358167648315, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.9021406727828745, |
| "grad_norm": 0.61328125, |
| "learning_rate": 6.247765658705564e-06, |
| "loss": 1.2543675899505615, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.908256880733945, |
| "grad_norm": 0.3515625, |
| "learning_rate": 6.225307523537509e-06, |
| "loss": 1.1704795360565186, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.9143730886850152, |
| "grad_norm": 0.921875, |
| "learning_rate": 6.2028308258124135e-06, |
| "loss": 1.362220048904419, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.9204892966360856, |
| "grad_norm": 1.078125, |
| "learning_rate": 6.180336140769015e-06, |
| "loss": 1.3805466890335083, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.926605504587156, |
| "grad_norm": 0.60546875, |
| "learning_rate": 6.157824044106394e-06, |
| "loss": 1.3186891078948975, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.9327217125382263, |
| "grad_norm": 1.0625, |
| "learning_rate": 6.13529511196924e-06, |
| "loss": 1.1534855365753174, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.9388379204892967, |
| "grad_norm": 0.80859375, |
| "learning_rate": 6.112749920933111e-06, |
| "loss": 1.2515051364898682, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.9449541284403669, |
| "grad_norm": 1.4140625, |
| "learning_rate": 6.090189047989665e-06, |
| "loss": 1.4018653631210327, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.9510703363914375, |
| "grad_norm": 0.5625, |
| "learning_rate": 6.067613070531912e-06, |
| "loss": 1.300402283668518, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.9571865443425076, |
| "grad_norm": 0.6015625, |
| "learning_rate": 6.045022566339419e-06, |
| "loss": 1.3779313564300537, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.963302752293578, |
| "grad_norm": 0.439453125, |
| "learning_rate": 6.022418113563536e-06, |
| "loss": 1.2664169073104858, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.9694189602446484, |
| "grad_norm": 0.72265625, |
| "learning_rate": 5.999800290712594e-06, |
| "loss": 1.2255876064300537, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.9755351681957185, |
| "grad_norm": 0.34375, |
| "learning_rate": 5.9771696766370965e-06, |
| "loss": 1.3016749620437622, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.981651376146789, |
| "grad_norm": 0.443359375, |
| "learning_rate": 5.9545268505149114e-06, |
| "loss": 1.2409298419952393, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.9877675840978593, |
| "grad_norm": 0.85546875, |
| "learning_rate": 5.931872391836446e-06, |
| "loss": 1.3296973705291748, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.9938837920489296, |
| "grad_norm": 0.5703125, |
| "learning_rate": 5.909206880389813e-06, |
| "loss": 1.376185655593872, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.80078125, |
| "learning_rate": 5.8865308962459976e-06, |
| "loss": 1.2528204917907715, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.00611620795107, |
| "grad_norm": 0.45703125, |
| "learning_rate": 5.863845019744007e-06, |
| "loss": 1.1687815189361572, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.0122324159021407, |
| "grad_norm": 0.7734375, |
| "learning_rate": 5.841149831476024e-06, |
| "loss": 1.2196176052093506, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.018348623853211, |
| "grad_norm": 0.4453125, |
| "learning_rate": 5.81844591227254e-06, |
| "loss": 1.261337399482727, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.0244648318042815, |
| "grad_norm": 0.490234375, |
| "learning_rate": 5.795733843187496e-06, |
| "loss": 1.2313090562820435, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.0305810397553516, |
| "grad_norm": 1.1640625, |
| "learning_rate": 5.773014205483414e-06, |
| "loss": 1.2076407670974731, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.036697247706422, |
| "grad_norm": 0.5859375, |
| "learning_rate": 5.750287580616511e-06, |
| "loss": 1.1940546035766602, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.0428134556574924, |
| "grad_norm": 1.3203125, |
| "learning_rate": 5.7275545502218274e-06, |
| "loss": 1.0421754121780396, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.0489296636085625, |
| "grad_norm": 0.54296875, |
| "learning_rate": 5.704815696098337e-06, |
| "loss": 1.2445980310440063, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.055045871559633, |
| "grad_norm": 0.50390625, |
| "learning_rate": 5.68207160019406e-06, |
| "loss": 1.2573648691177368, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.0611620795107033, |
| "grad_norm": 0.498046875, |
| "learning_rate": 5.659322844591166e-06, |
| "loss": 1.3194655179977417, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.067278287461774, |
| "grad_norm": 0.51171875, |
| "learning_rate": 5.636570011491082e-06, |
| "loss": 1.2315115928649902, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.073394495412844, |
| "grad_norm": 0.486328125, |
| "learning_rate": 5.613813683199582e-06, |
| "loss": 1.1668107509613037, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.079510703363914, |
| "grad_norm": 0.51953125, |
| "learning_rate": 5.591054442111901e-06, |
| "loss": 1.1951708793640137, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.085626911314985, |
| "grad_norm": 0.384765625, |
| "learning_rate": 5.568292870697812e-06, |
| "loss": 1.1300991773605347, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.091743119266055, |
| "grad_norm": 0.71484375, |
| "learning_rate": 5.545529551486731e-06, |
| "loss": 1.269416332244873, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.0978593272171255, |
| "grad_norm": 0.81640625, |
| "learning_rate": 5.522765067052805e-06, |
| "loss": 1.1883726119995117, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.1039755351681957, |
| "grad_norm": 0.482421875, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 1.172653317451477, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.1100917431192663, |
| "grad_norm": 0.6796875, |
| "learning_rate": 5.477234932947196e-06, |
| "loss": 1.2290334701538086, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.1162079510703364, |
| "grad_norm": 1.4453125, |
| "learning_rate": 5.45447044851327e-06, |
| "loss": 1.1191812753677368, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.1223241590214066, |
| "grad_norm": 0.43359375, |
| "learning_rate": 5.431707129302188e-06, |
| "loss": 1.3137654066085815, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.128440366972477, |
| "grad_norm": 0.734375, |
| "learning_rate": 5.4089455578881005e-06, |
| "loss": 1.1786179542541504, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.1345565749235473, |
| "grad_norm": 0.419921875, |
| "learning_rate": 5.386186316800418e-06, |
| "loss": 1.1776201725006104, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.140672782874618, |
| "grad_norm": 0.482421875, |
| "learning_rate": 5.36342998850892e-06, |
| "loss": 1.18330979347229, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.146788990825688, |
| "grad_norm": 0.68359375, |
| "learning_rate": 5.340677155408835e-06, |
| "loss": 1.2524994611740112, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.1529051987767582, |
| "grad_norm": 0.62109375, |
| "learning_rate": 5.317928399805943e-06, |
| "loss": 1.2536473274230957, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.159021406727829, |
| "grad_norm": 0.46875, |
| "learning_rate": 5.295184303901665e-06, |
| "loss": 1.1864341497421265, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.165137614678899, |
| "grad_norm": 0.439453125, |
| "learning_rate": 5.272445449778175e-06, |
| "loss": 1.2302113771438599, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.1712538226299696, |
| "grad_norm": 0.447265625, |
| "learning_rate": 5.249712419383492e-06, |
| "loss": 1.153498888015747, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.1773700305810397, |
| "grad_norm": 0.4765625, |
| "learning_rate": 5.226985794516587e-06, |
| "loss": 1.2334654331207275, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.18348623853211, |
| "grad_norm": 0.4453125, |
| "learning_rate": 5.204266156812504e-06, |
| "loss": 1.1027376651763916, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.1896024464831805, |
| "grad_norm": 4.0625, |
| "learning_rate": 5.181554087727462e-06, |
| "loss": 1.2752158641815186, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.1957186544342506, |
| "grad_norm": 0.5625, |
| "learning_rate": 5.158850168523979e-06, |
| "loss": 1.2342238426208496, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.2018348623853212, |
| "grad_norm": 0.7421875, |
| "learning_rate": 5.136154980255995e-06, |
| "loss": 1.2153668403625488, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.2079510703363914, |
| "grad_norm": 0.6796875, |
| "learning_rate": 5.1134691037540055e-06, |
| "loss": 1.2085171937942505, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.214067278287462, |
| "grad_norm": 0.80859375, |
| "learning_rate": 5.090793119610189e-06, |
| "loss": 1.230190634727478, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.220183486238532, |
| "grad_norm": 0.65625, |
| "learning_rate": 5.068127608163557e-06, |
| "loss": 1.1547964811325073, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.2262996941896023, |
| "grad_norm": 0.57421875, |
| "learning_rate": 5.045473149485091e-06, |
| "loss": 1.2784456014633179, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.232415902140673, |
| "grad_norm": 0.734375, |
| "learning_rate": 5.022830323362905e-06, |
| "loss": 1.1994041204452515, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.238532110091743, |
| "grad_norm": 0.59375, |
| "learning_rate": 5.000199709287408e-06, |
| "loss": 1.1957271099090576, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.2446483180428136, |
| "grad_norm": 0.70703125, |
| "learning_rate": 4.9775818864364635e-06, |
| "loss": 1.2446789741516113, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.2507645259938838, |
| "grad_norm": 0.486328125, |
| "learning_rate": 4.954977433660583e-06, |
| "loss": 1.1822783946990967, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.2568807339449544, |
| "grad_norm": 0.60546875, |
| "learning_rate": 4.9323869294680915e-06, |
| "loss": 1.1413577795028687, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.2629969418960245, |
| "grad_norm": 0.49609375, |
| "learning_rate": 4.909810952010336e-06, |
| "loss": 1.1892144680023193, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.2691131498470947, |
| "grad_norm": 0.498046875, |
| "learning_rate": 4.887250079066892e-06, |
| "loss": 1.2589919567108154, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.2752293577981653, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.86470488803076e-06, |
| "loss": 1.2584980726242065, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.2813455657492354, |
| "grad_norm": 0.435546875, |
| "learning_rate": 4.842175955893608e-06, |
| "loss": 1.1710209846496582, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.287461773700306, |
| "grad_norm": 0.59765625, |
| "learning_rate": 4.819663859230986e-06, |
| "loss": 1.2968641519546509, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.293577981651376, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.797169174187588e-06, |
| "loss": 1.198433756828308, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.2996941896024463, |
| "grad_norm": 0.6875, |
| "learning_rate": 4.774692476462493e-06, |
| "loss": 1.296976089477539, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.305810397553517, |
| "grad_norm": 0.63671875, |
| "learning_rate": 4.752234341294438e-06, |
| "loss": 1.2286152839660645, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.311926605504587, |
| "grad_norm": 0.87890625, |
| "learning_rate": 4.729795343447093e-06, |
| "loss": 1.2850275039672852, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.3180428134556577, |
| "grad_norm": 0.578125, |
| "learning_rate": 4.707376057194356e-06, |
| "loss": 1.2537508010864258, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.324159021406728, |
| "grad_norm": 0.58984375, |
| "learning_rate": 4.68497705630565e-06, |
| "loss": 1.1948941946029663, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.330275229357798, |
| "grad_norm": 0.51171875, |
| "learning_rate": 4.662598914031241e-06, |
| "loss": 1.2438340187072754, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.3363914373088686, |
| "grad_norm": 0.62890625, |
| "learning_rate": 4.6402422030875704e-06, |
| "loss": 1.3103235960006714, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.3425076452599387, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.617907495642594e-06, |
| "loss": 1.1827704906463623, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.3486238532110093, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.595595363301133e-06, |
| "loss": 1.1387625932693481, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.3547400611620795, |
| "grad_norm": 0.68359375, |
| "learning_rate": 4.5733063770902595e-06, |
| "loss": 1.2371636629104614, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.3608562691131496, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.551041107444671e-06, |
| "loss": 1.1606448888778687, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.36697247706422, |
| "grad_norm": 0.60546875, |
| "learning_rate": 4.528800124192095e-06, |
| "loss": 1.3499796390533447, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.3730886850152904, |
| "grad_norm": 0.388671875, |
| "learning_rate": 4.506583996538705e-06, |
| "loss": 1.1447316408157349, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.379204892966361, |
| "grad_norm": 0.6640625, |
| "learning_rate": 4.484393293054553e-06, |
| "loss": 1.190900444984436, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.385321100917431, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.462228581659019e-06, |
| "loss": 1.2503337860107422, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.3914373088685017, |
| "grad_norm": 0.609375, |
| "learning_rate": 4.440090429606278e-06, |
| "loss": 1.1737557649612427, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.397553516819572, |
| "grad_norm": 0.75390625, |
| "learning_rate": 4.417979403470778e-06, |
| "loss": 1.239940881729126, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.4036697247706424, |
| "grad_norm": 0.419921875, |
| "learning_rate": 4.3958960691327425e-06, |
| "loss": 1.1777243614196777, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.4097859327217126, |
| "grad_norm": 0.5859375, |
| "learning_rate": 4.373840991763686e-06, |
| "loss": 1.1661309003829956, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.4159021406727827, |
| "grad_norm": 0.466796875, |
| "learning_rate": 4.3518147358119575e-06, |
| "loss": 1.2908847332000732, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.4220183486238533, |
| "grad_norm": 0.58984375, |
| "learning_rate": 4.329817864988285e-06, |
| "loss": 1.261257290840149, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.4281345565749235, |
| "grad_norm": 0.486328125, |
| "learning_rate": 4.307850942251351e-06, |
| "loss": 1.0505046844482422, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.434250764525994, |
| "grad_norm": 0.68359375, |
| "learning_rate": 4.285914529793392e-06, |
| "loss": 1.260128378868103, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.4403669724770642, |
| "grad_norm": 0.72265625, |
| "learning_rate": 4.2640091890257984e-06, |
| "loss": 1.336702823638916, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.4464831804281344, |
| "grad_norm": 0.50390625, |
| "learning_rate": 4.242135480564756e-06, |
| "loss": 1.2336891889572144, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.452599388379205, |
| "grad_norm": 0.75390625, |
| "learning_rate": 4.220293964216899e-06, |
| "loss": 1.1661975383758545, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.458715596330275, |
| "grad_norm": 0.59765625, |
| "learning_rate": 4.198485198964971e-06, |
| "loss": 1.2408455610275269, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.4648318042813457, |
| "grad_norm": 0.48828125, |
| "learning_rate": 4.176709742953536e-06, |
| "loss": 1.1344859600067139, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.470948012232416, |
| "grad_norm": 5.84375, |
| "learning_rate": 4.15496815347468e-06, |
| "loss": 1.1564085483551025, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.477064220183486, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.133260986953759e-06, |
| "loss": 1.3386648893356323, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.4831804281345566, |
| "grad_norm": 0.423828125, |
| "learning_rate": 4.111588798935146e-06, |
| "loss": 1.1828325986862183, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.489296636085627, |
| "grad_norm": 0.703125, |
| "learning_rate": 4.089952144068031e-06, |
| "loss": 1.1244158744812012, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.4954128440366974, |
| "grad_norm": 0.68359375, |
| "learning_rate": 4.068351576092204e-06, |
| "loss": 1.2698228359222412, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.5015290519877675, |
| "grad_norm": 0.74609375, |
| "learning_rate": 4.046787647823906e-06, |
| "loss": 1.2006717920303345, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.5076452599388377, |
| "grad_norm": 1.375, |
| "learning_rate": 4.025260911141664e-06, |
| "loss": 1.217053771018982, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.5137614678899083, |
| "grad_norm": 0.53125, |
| "learning_rate": 4.003771916972171e-06, |
| "loss": 1.2399015426635742, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.5198776758409784, |
| "grad_norm": 0.59765625, |
| "learning_rate": 3.982321215276195e-06, |
| "loss": 1.1872673034667969, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.525993883792049, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.960909355034491e-06, |
| "loss": 1.2071783542633057, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.532110091743119, |
| "grad_norm": 0.58984375, |
| "learning_rate": 3.939536884233762e-06, |
| "loss": 1.2099813222885132, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.5382262996941893, |
| "grad_norm": 0.8203125, |
| "learning_rate": 3.918204349852626e-06, |
| "loss": 1.2038205862045288, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.54434250764526, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.896912297847626e-06, |
| "loss": 1.1809529066085815, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.5504587155963305, |
| "grad_norm": 0.57421875, |
| "learning_rate": 3.875661273139246e-06, |
| "loss": 1.1591264009475708, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.5565749235474007, |
| "grad_norm": 0.8359375, |
| "learning_rate": 3.854451819597981e-06, |
| "loss": 1.0593103170394897, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.562691131498471, |
| "grad_norm": 0.466796875, |
| "learning_rate": 3.833284480030401e-06, |
| "loss": 1.2778112888336182, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.5688073394495414, |
| "grad_norm": 0.6015625, |
| "learning_rate": 3.81215979616527e-06, |
| "loss": 1.153441309928894, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.5749235474006116, |
| "grad_norm": 0.71875, |
| "learning_rate": 3.79107830863968e-06, |
| "loss": 1.251842975616455, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.581039755351682, |
| "grad_norm": 0.77734375, |
| "learning_rate": 3.7700405569852082e-06, |
| "loss": 1.1608760356903076, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.5871559633027523, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.749047079614121e-06, |
| "loss": 1.1455830335617065, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.5932721712538225, |
| "grad_norm": 0.390625, |
| "learning_rate": 3.7280984138055842e-06, |
| "loss": 1.201966643333435, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.599388379204893, |
| "grad_norm": 0.62890625, |
| "learning_rate": 3.707195095691913e-06, |
| "loss": 1.232427954673767, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.6055045871559632, |
| "grad_norm": 2.890625, |
| "learning_rate": 3.6863376602448607e-06, |
| "loss": 1.257423758506775, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.611620795107034, |
| "grad_norm": 0.50390625, |
| "learning_rate": 3.665526641261914e-06, |
| "loss": 1.154307246208191, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.617737003058104, |
| "grad_norm": 0.56640625, |
| "learning_rate": 3.6447625713526415e-06, |
| "loss": 1.3352923393249512, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.623853211009174, |
| "grad_norm": 0.59765625, |
| "learning_rate": 3.6240459819250605e-06, |
| "loss": 1.2940092086791992, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.6299694189602447, |
| "grad_norm": 1.984375, |
| "learning_rate": 3.603377403172035e-06, |
| "loss": 1.3129587173461914, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.636085626911315, |
| "grad_norm": 0.66015625, |
| "learning_rate": 3.582757364057704e-06, |
| "loss": 1.1294050216674805, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.6422018348623855, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.5621863923039533e-06, |
| "loss": 1.1990245580673218, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.6483180428134556, |
| "grad_norm": 0.609375, |
| "learning_rate": 3.5416650143768994e-06, |
| "loss": 1.1870311498641968, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.6544342507645258, |
| "grad_norm": 0.44921875, |
| "learning_rate": 3.5211937554734234e-06, |
| "loss": 1.199330449104309, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.6605504587155964, |
| "grad_norm": 0.609375, |
| "learning_rate": 3.5007731395077273e-06, |
| "loss": 1.247740387916565, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.447265625, |
| "learning_rate": 3.4804036890979207e-06, |
| "loss": 1.1606550216674805, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.672782874617737, |
| "grad_norm": 0.48046875, |
| "learning_rate": 3.460085925552653e-06, |
| "loss": 1.1833080053329468, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.6788990825688073, |
| "grad_norm": 0.8984375, |
| "learning_rate": 3.439820368857768e-06, |
| "loss": 1.199750304222107, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.6850152905198774, |
| "grad_norm": 0.69140625, |
| "learning_rate": 3.4196075376629976e-06, |
| "loss": 1.1525050401687622, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.691131498470948, |
| "grad_norm": 1.7578125, |
| "learning_rate": 3.3994479492686867e-06, |
| "loss": 1.251511812210083, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.6972477064220186, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.379342119612553e-06, |
| "loss": 1.2259825468063354, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.7033639143730888, |
| "grad_norm": 0.39453125, |
| "learning_rate": 3.3592905632564874e-06, |
| "loss": 1.2564154863357544, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.709480122324159, |
| "grad_norm": 0.75, |
| "learning_rate": 3.3392937933733804e-06, |
| "loss": 1.1841342449188232, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.7155963302752295, |
| "grad_norm": 0.76953125, |
| "learning_rate": 3.319352321733989e-06, |
| "loss": 1.194476842880249, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.7217125382262997, |
| "grad_norm": 0.59765625, |
| "learning_rate": 3.2994666586938473e-06, |
| "loss": 1.254859209060669, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.7278287461773703, |
| "grad_norm": 0.58203125, |
| "learning_rate": 3.2796373131801873e-06, |
| "loss": 1.255743384361267, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.7339449541284404, |
| "grad_norm": 0.86328125, |
| "learning_rate": 3.259864792678933e-06, |
| "loss": 1.2186676263809204, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.7400611620795106, |
| "grad_norm": 0.6015625, |
| "learning_rate": 3.2401496032217017e-06, |
| "loss": 1.3046661615371704, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.746177370030581, |
| "grad_norm": 0.50390625, |
| "learning_rate": 3.2204922493728576e-06, |
| "loss": 1.3042587041854858, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.7522935779816513, |
| "grad_norm": 0.5234375, |
| "learning_rate": 3.200893234216596e-06, |
| "loss": 1.179953694343567, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.758409785932722, |
| "grad_norm": 0.515625, |
| "learning_rate": 3.1813530593440693e-06, |
| "loss": 1.2110344171524048, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.764525993883792, |
| "grad_norm": 0.8984375, |
| "learning_rate": 3.1618722248405504e-06, |
| "loss": 1.155335783958435, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.770642201834862, |
| "grad_norm": 0.578125, |
| "learning_rate": 3.1424512292726315e-06, |
| "loss": 1.1856063604354858, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.776758409785933, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.123090569675472e-06, |
| "loss": 1.142336368560791, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.782874617737003, |
| "grad_norm": 0.93359375, |
| "learning_rate": 3.1037907415400674e-06, |
| "loss": 1.143799066543579, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.7889908256880735, |
| "grad_norm": 1.0, |
| "learning_rate": 3.0845522388005756e-06, |
| "loss": 1.2811185121536255, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.7951070336391437, |
| "grad_norm": 0.81640625, |
| "learning_rate": 3.0653755538216724e-06, |
| "loss": 1.099307894706726, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.801223241590214, |
| "grad_norm": 0.58984375, |
| "learning_rate": 3.046261177385954e-06, |
| "loss": 1.1932672262191772, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.8073394495412844, |
| "grad_norm": 0.5859375, |
| "learning_rate": 3.027209598681373e-06, |
| "loss": 1.1614950895309448, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.8134556574923546, |
| "grad_norm": 1.625, |
| "learning_rate": 3.008221305288722e-06, |
| "loss": 1.3066401481628418, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.819571865443425, |
| "grad_norm": 0.71484375, |
| "learning_rate": 2.9892967831691506e-06, |
| "loss": 1.261734962463379, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.8256880733944953, |
| "grad_norm": 0.87109375, |
| "learning_rate": 2.9704365166517337e-06, |
| "loss": 1.2576831579208374, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.8318042813455655, |
| "grad_norm": 0.578125, |
| "learning_rate": 2.9516409884210726e-06, |
| "loss": 1.1941940784454346, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.837920489296636, |
| "grad_norm": 0.78515625, |
| "learning_rate": 2.9329106795049445e-06, |
| "loss": 1.2333204746246338, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.8440366972477067, |
| "grad_norm": 0.5234375, |
| "learning_rate": 2.914246069261988e-06, |
| "loss": 1.2176916599273682, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.850152905198777, |
| "grad_norm": 0.75390625, |
| "learning_rate": 2.8956476353694368e-06, |
| "loss": 1.2780966758728027, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.856269113149847, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.877115853810898e-06, |
| "loss": 1.2115226984024048, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.8623853211009176, |
| "grad_norm": 0.65625, |
| "learning_rate": 2.8586511988641634e-06, |
| "loss": 1.189244031906128, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.8685015290519877, |
| "grad_norm": 0.56640625, |
| "learning_rate": 2.8402541430890794e-06, |
| "loss": 1.2004551887512207, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.8746177370030583, |
| "grad_norm": 0.84375, |
| "learning_rate": 2.821925157315447e-06, |
| "loss": 1.228663682937622, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.8807339449541285, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.8036647106309744e-06, |
| "loss": 1.2045689821243286, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.8868501529051986, |
| "grad_norm": 0.5859375, |
| "learning_rate": 2.78547327036927e-06, |
| "loss": 1.2150650024414062, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.8929663608562692, |
| "grad_norm": 0.65625, |
| "learning_rate": 2.767351302097887e-06, |
| "loss": 1.199387788772583, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.8990825688073394, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.7492992696064013e-06, |
| "loss": 1.2142434120178223, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.90519877675841, |
| "grad_norm": 0.76953125, |
| "learning_rate": 2.731317634894548e-06, |
| "loss": 1.2693067789077759, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.91131498470948, |
| "grad_norm": 1.4921875, |
| "learning_rate": 2.7134068581603936e-06, |
| "loss": 1.2424131631851196, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.9174311926605503, |
| "grad_norm": 0.625, |
| "learning_rate": 2.6955673977885566e-06, |
| "loss": 1.2381134033203125, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.923547400611621, |
| "grad_norm": 0.8046875, |
| "learning_rate": 2.677799710338486e-06, |
| "loss": 1.2375258207321167, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.929663608562691, |
| "grad_norm": 1.125, |
| "learning_rate": 2.660104250532764e-06, |
| "loss": 1.129172921180725, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.9357798165137616, |
| "grad_norm": 0.5, |
| "learning_rate": 2.6424814712454773e-06, |
| "loss": 1.1203192472457886, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.941896024464832, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.624931823490625e-06, |
| "loss": 1.2383675575256348, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.948012232415902, |
| "grad_norm": 0.6171875, |
| "learning_rate": 2.607455756410573e-06, |
| "loss": 1.1556285619735718, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.9541284403669725, |
| "grad_norm": 0.90625, |
| "learning_rate": 2.5900537172645624e-06, |
| "loss": 1.211835503578186, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.9602446483180427, |
| "grad_norm": 0.8046875, |
| "learning_rate": 2.5727261514172586e-06, |
| "loss": 1.1909599304199219, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.9663608562691133, |
| "grad_norm": 0.7265625, |
| "learning_rate": 2.55547350232736e-06, |
| "loss": 1.2073407173156738, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.9724770642201834, |
| "grad_norm": 0.498046875, |
| "learning_rate": 2.5382962115362454e-06, |
| "loss": 1.202832818031311, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.9785932721712536, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.521194718656669e-06, |
| "loss": 1.2078254222869873, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.984709480122324, |
| "grad_norm": 0.498046875, |
| "learning_rate": 2.504169461361518e-06, |
| "loss": 1.1780730485916138, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.9908256880733948, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.487220875372606e-06, |
| "loss": 1.1677711009979248, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.996941896024465, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.470349394449524e-06, |
| "loss": 1.2224700450897217, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.003058103975535, |
| "grad_norm": 0.5859375, |
| "learning_rate": 2.453555450378535e-06, |
| "loss": 1.2254760265350342, |
| "step": 982 |
| }, |
| { |
| "epoch": 3.0091743119266057, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.436839472961534e-06, |
| "loss": 1.235056757926941, |
| "step": 984 |
| }, |
| { |
| "epoch": 3.015290519877676, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.4202018900050327e-06, |
| "loss": 1.2022202014923096, |
| "step": 986 |
| }, |
| { |
| "epoch": 3.021406727828746, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.4036431273092238e-06, |
| "loss": 1.2913790941238403, |
| "step": 988 |
| }, |
| { |
| "epoch": 3.0275229357798166, |
| "grad_norm": 0.6640625, |
| "learning_rate": 2.387163608657078e-06, |
| "loss": 1.2257859706878662, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.0336391437308867, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.3707637558034994e-06, |
| "loss": 1.173649787902832, |
| "step": 992 |
| }, |
| { |
| "epoch": 3.0397553516819573, |
| "grad_norm": 0.89453125, |
| "learning_rate": 2.3544439884645317e-06, |
| "loss": 1.2261406183242798, |
| "step": 994 |
| }, |
| { |
| "epoch": 3.0458715596330275, |
| "grad_norm": 1.421875, |
| "learning_rate": 2.3382047243066163e-06, |
| "loss": 1.132150650024414, |
| "step": 996 |
| }, |
| { |
| "epoch": 3.051987767584098, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.3220463789359014e-06, |
| "loss": 1.1366033554077148, |
| "step": 998 |
| }, |
| { |
| "epoch": 3.058103975535168, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.30596936588761e-06, |
| "loss": 1.1769903898239136, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.0642201834862384, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.2899740966154526e-06, |
| "loss": 1.2203010320663452, |
| "step": 1002 |
| }, |
| { |
| "epoch": 3.070336391437309, |
| "grad_norm": 0.578125, |
| "learning_rate": 2.274060980481098e-06, |
| "loss": 1.077088475227356, |
| "step": 1004 |
| }, |
| { |
| "epoch": 3.076452599388379, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.2582304247436963e-06, |
| "loss": 1.177517056465149, |
| "step": 1006 |
| }, |
| { |
| "epoch": 3.0825688073394497, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.2424828345494575e-06, |
| "loss": 1.0615501403808594, |
| "step": 1008 |
| }, |
| { |
| "epoch": 3.08868501529052, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.226818612921281e-06, |
| "loss": 1.257022738456726, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.09480122324159, |
| "grad_norm": 0.57421875, |
| "learning_rate": 2.2112381607484417e-06, |
| "loss": 1.3333863019943237, |
| "step": 1012 |
| }, |
| { |
| "epoch": 3.1009174311926606, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.195741876776331e-06, |
| "loss": 1.116982102394104, |
| "step": 1014 |
| }, |
| { |
| "epoch": 3.1070336391437308, |
| "grad_norm": 0.484375, |
| "learning_rate": 2.180330157596251e-06, |
| "loss": 1.1025663614273071, |
| "step": 1016 |
| }, |
| { |
| "epoch": 3.1131498470948014, |
| "grad_norm": 0.84375, |
| "learning_rate": 2.1650033976352645e-06, |
| "loss": 1.1931098699569702, |
| "step": 1018 |
| }, |
| { |
| "epoch": 3.1192660550458715, |
| "grad_norm": 0.58984375, |
| "learning_rate": 2.1497619891461016e-06, |
| "loss": 1.2750816345214844, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.1253822629969417, |
| "grad_norm": 0.578125, |
| "learning_rate": 2.134606322197119e-06, |
| "loss": 1.200748324394226, |
| "step": 1022 |
| }, |
| { |
| "epoch": 3.1314984709480123, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.119536784662321e-06, |
| "loss": 1.1820026636123657, |
| "step": 1024 |
| }, |
| { |
| "epoch": 3.1376146788990824, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.1045537622114265e-06, |
| "loss": 1.072840929031372, |
| "step": 1026 |
| }, |
| { |
| "epoch": 3.143730886850153, |
| "grad_norm": 0.7265625, |
| "learning_rate": 2.089657638300005e-06, |
| "loss": 1.1731314659118652, |
| "step": 1028 |
| }, |
| { |
| "epoch": 3.149847094801223, |
| "grad_norm": 0.55859375, |
| "learning_rate": 2.0748487941596596e-06, |
| "loss": 1.1329575777053833, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.1559633027522938, |
| "grad_norm": 0.6328125, |
| "learning_rate": 2.06012760878827e-06, |
| "loss": 1.2068923711776733, |
| "step": 1032 |
| }, |
| { |
| "epoch": 3.162079510703364, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.045494458940295e-06, |
| "loss": 1.1488394737243652, |
| "step": 1034 |
| }, |
| { |
| "epoch": 3.168195718654434, |
| "grad_norm": 0.65625, |
| "learning_rate": 2.0309497191171285e-06, |
| "loss": 1.1287355422973633, |
| "step": 1036 |
| }, |
| { |
| "epoch": 3.1743119266055047, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.0164937615575148e-06, |
| "loss": 1.182981014251709, |
| "step": 1038 |
| }, |
| { |
| "epoch": 3.180428134556575, |
| "grad_norm": 0.66015625, |
| "learning_rate": 2.002126956228026e-06, |
| "loss": 1.159349799156189, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.1865443425076454, |
| "grad_norm": 0.89453125, |
| "learning_rate": 1.9878496708135885e-06, |
| "loss": 1.1993876695632935, |
| "step": 1042 |
| }, |
| { |
| "epoch": 3.1926605504587156, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.973662270708074e-06, |
| "loss": 1.1298656463623047, |
| "step": 1044 |
| }, |
| { |
| "epoch": 3.198776758409786, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.959565119004951e-06, |
| "loss": 1.1985409259796143, |
| "step": 1046 |
| }, |
| { |
| "epoch": 3.2048929663608563, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.9455585764879877e-06, |
| "loss": 1.1955678462982178, |
| "step": 1048 |
| }, |
| { |
| "epoch": 3.2110091743119265, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.9316430016220223e-06, |
| "loss": 1.1202224493026733, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.217125382262997, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.91781875054379e-06, |
| "loss": 1.157238245010376, |
| "step": 1052 |
| }, |
| { |
| "epoch": 3.223241590214067, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9040861770528047e-06, |
| "loss": 1.1316120624542236, |
| "step": 1054 |
| }, |
| { |
| "epoch": 3.229357798165138, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.890445632602303e-06, |
| "loss": 1.1989833116531372, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.235474006116208, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.876897466290259e-06, |
| "loss": 1.222222089767456, |
| "step": 1058 |
| }, |
| { |
| "epoch": 3.241590214067278, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.8634420248504382e-06, |
| "loss": 1.2111024856567383, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.2477064220183487, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.8500796526435305e-06, |
| "loss": 1.172393560409546, |
| "step": 1062 |
| }, |
| { |
| "epoch": 3.253822629969419, |
| "grad_norm": 2.328125, |
| "learning_rate": 1.8368106916483358e-06, |
| "loss": 1.1863235235214233, |
| "step": 1064 |
| }, |
| { |
| "epoch": 3.2599388379204894, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.8236354814530113e-06, |
| "loss": 1.29865562915802, |
| "step": 1066 |
| }, |
| { |
| "epoch": 3.2660550458715596, |
| "grad_norm": 0.5, |
| "learning_rate": 1.8105543592463803e-06, |
| "loss": 1.261027455329895, |
| "step": 1068 |
| }, |
| { |
| "epoch": 3.2721712538226297, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.7975676598093042e-06, |
| "loss": 1.2421050071716309, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.2782874617737003, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.784675715506113e-06, |
| "loss": 1.274834394454956, |
| "step": 1072 |
| }, |
| { |
| "epoch": 3.2844036697247705, |
| "grad_norm": 0.87890625, |
| "learning_rate": 1.7718788562760992e-06, |
| "loss": 1.2069604396820068, |
| "step": 1074 |
| }, |
| { |
| "epoch": 3.290519877675841, |
| "grad_norm": 0.63671875, |
| "learning_rate": 1.7591774096250736e-06, |
| "loss": 1.1289021968841553, |
| "step": 1076 |
| }, |
| { |
| "epoch": 3.2966360856269112, |
| "grad_norm": 0.79296875, |
| "learning_rate": 1.7465717006169887e-06, |
| "loss": 1.2350070476531982, |
| "step": 1078 |
| }, |
| { |
| "epoch": 3.302752293577982, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.734062051865609e-06, |
| "loss": 1.1916759014129639, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.308868501529052, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.7216487835262635e-06, |
| "loss": 1.1767183542251587, |
| "step": 1082 |
| }, |
| { |
| "epoch": 3.314984709480122, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.7093322132876485e-06, |
| "loss": 1.1724700927734375, |
| "step": 1084 |
| }, |
| { |
| "epoch": 3.3211009174311927, |
| "grad_norm": 0.494140625, |
| "learning_rate": 1.6971126563636977e-06, |
| "loss": 1.1266517639160156, |
| "step": 1086 |
| }, |
| { |
| "epoch": 3.327217125382263, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.6849904254855151e-06, |
| "loss": 1.211061716079712, |
| "step": 1088 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 1.609375, |
| "learning_rate": 1.6729658308933706e-06, |
| "loss": 1.2213722467422485, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.3394495412844036, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.6610391803287611e-06, |
| "loss": 1.1516450643539429, |
| "step": 1092 |
| }, |
| { |
| "epoch": 3.3455657492354742, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.6492107790265338e-06, |
| "loss": 1.1679214239120483, |
| "step": 1094 |
| }, |
| { |
| "epoch": 3.3516819571865444, |
| "grad_norm": 0.796875, |
| "learning_rate": 1.6374809297070766e-06, |
| "loss": 1.2308049201965332, |
| "step": 1096 |
| }, |
| { |
| "epoch": 3.3577981651376145, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.6258499325685673e-06, |
| "loss": 1.181188941001892, |
| "step": 1098 |
| }, |
| { |
| "epoch": 3.363914373088685, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.6143180852792911e-06, |
| "loss": 1.2255089282989502, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.3700305810397553, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.602885682970026e-06, |
| "loss": 1.2569115161895752, |
| "step": 1102 |
| }, |
| { |
| "epoch": 3.376146788990826, |
| "grad_norm": 0.92578125, |
| "learning_rate": 1.5915530182264868e-06, |
| "loss": 1.2300969362258911, |
| "step": 1104 |
| }, |
| { |
| "epoch": 3.382262996941896, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.5803203810818366e-06, |
| "loss": 1.2431167364120483, |
| "step": 1106 |
| }, |
| { |
| "epoch": 3.388379204892966, |
| "grad_norm": 0.7890625, |
| "learning_rate": 1.5691880590092671e-06, |
| "loss": 1.2489876747131348, |
| "step": 1108 |
| }, |
| { |
| "epoch": 3.3944954128440368, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.558156336914634e-06, |
| "loss": 1.1550531387329102, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.400611620795107, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.547225497129179e-06, |
| "loss": 1.1249154806137085, |
| "step": 1112 |
| }, |
| { |
| "epoch": 3.4067278287461775, |
| "grad_norm": 1.84375, |
| "learning_rate": 1.5363958194022896e-06, |
| "loss": 1.1979522705078125, |
| "step": 1114 |
| }, |
| { |
| "epoch": 3.4128440366972477, |
| "grad_norm": 0.89453125, |
| "learning_rate": 1.5256675808943488e-06, |
| "loss": 1.1001931428909302, |
| "step": 1116 |
| }, |
| { |
| "epoch": 3.418960244648318, |
| "grad_norm": 0.6484375, |
| "learning_rate": 1.5150410561696382e-06, |
| "loss": 1.1855971813201904, |
| "step": 1118 |
| }, |
| { |
| "epoch": 3.4250764525993884, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.5045165171893117e-06, |
| "loss": 1.197637677192688, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.4311926605504586, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.4940942333044367e-06, |
| "loss": 1.1402236223220825, |
| "step": 1122 |
| }, |
| { |
| "epoch": 3.437308868501529, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.4837744712490983e-06, |
| "loss": 1.1059956550598145, |
| "step": 1124 |
| }, |
| { |
| "epoch": 3.4434250764525993, |
| "grad_norm": 0.7734375, |
| "learning_rate": 1.4735574951335752e-06, |
| "loss": 1.1585502624511719, |
| "step": 1126 |
| }, |
| { |
| "epoch": 3.44954128440367, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.4634435664375784e-06, |
| "loss": 1.2298681735992432, |
| "step": 1128 |
| }, |
| { |
| "epoch": 3.45565749235474, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.4534329440035599e-06, |
| "loss": 1.1276212930679321, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.46177370030581, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.4435258840300897e-06, |
| "loss": 1.1073015928268433, |
| "step": 1132 |
| }, |
| { |
| "epoch": 3.467889908256881, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.4337226400652977e-06, |
| "loss": 1.1824053525924683, |
| "step": 1134 |
| }, |
| { |
| "epoch": 3.474006116207951, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.424023463000384e-06, |
| "loss": 1.2478643655776978, |
| "step": 1136 |
| }, |
| { |
| "epoch": 3.4801223241590216, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.4144286010631993e-06, |
| "loss": 1.2114766836166382, |
| "step": 1138 |
| }, |
| { |
| "epoch": 3.4862385321100917, |
| "grad_norm": 0.57421875, |
| "learning_rate": 1.4049382998118919e-06, |
| "loss": 1.2164137363433838, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.4923547400611623, |
| "grad_norm": 0.8046875, |
| "learning_rate": 1.3955528021286208e-06, |
| "loss": 1.115936517715454, |
| "step": 1142 |
| }, |
| { |
| "epoch": 3.4984709480122325, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.3862723482133437e-06, |
| "loss": 1.1582000255584717, |
| "step": 1144 |
| }, |
| { |
| "epoch": 3.5045871559633026, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.3770971755776667e-06, |
| "loss": 1.1616395711898804, |
| "step": 1146 |
| }, |
| { |
| "epoch": 3.510703363914373, |
| "grad_norm": 0.73828125, |
| "learning_rate": 1.3680275190387677e-06, |
| "loss": 1.20869779586792, |
| "step": 1148 |
| }, |
| { |
| "epoch": 3.5168195718654434, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.3590636107133849e-06, |
| "loss": 1.2474617958068848, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.522935779816514, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.3502056800118784e-06, |
| "loss": 1.2327600717544556, |
| "step": 1152 |
| }, |
| { |
| "epoch": 3.529051987767584, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.3414539536323568e-06, |
| "loss": 1.1355574131011963, |
| "step": 1154 |
| }, |
| { |
| "epoch": 3.5351681957186543, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.3328086555548764e-06, |
| "loss": 1.1376428604125977, |
| "step": 1156 |
| }, |
| { |
| "epoch": 3.541284403669725, |
| "grad_norm": 0.48828125, |
| "learning_rate": 1.3242700070357098e-06, |
| "loss": 1.128600001335144, |
| "step": 1158 |
| }, |
| { |
| "epoch": 3.547400611620795, |
| "grad_norm": 1.65625, |
| "learning_rate": 1.3158382266016803e-06, |
| "loss": 1.2273775339126587, |
| "step": 1160 |
| }, |
| { |
| "epoch": 3.5535168195718656, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.3075135300445746e-06, |
| "loss": 1.1972393989562988, |
| "step": 1162 |
| }, |
| { |
| "epoch": 3.5596330275229358, |
| "grad_norm": 0.62890625, |
| "learning_rate": 1.2992961304156146e-06, |
| "loss": 1.2698583602905273, |
| "step": 1164 |
| }, |
| { |
| "epoch": 3.565749235474006, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.2911862380200076e-06, |
| "loss": 1.215325117111206, |
| "step": 1166 |
| }, |
| { |
| "epoch": 3.5718654434250765, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.2831840604115647e-06, |
| "loss": 1.1836117506027222, |
| "step": 1168 |
| }, |
| { |
| "epoch": 3.5779816513761467, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.2752898023873873e-06, |
| "loss": 1.1673725843429565, |
| "step": 1170 |
| }, |
| { |
| "epoch": 3.5840978593272173, |
| "grad_norm": 0.6171875, |
| "learning_rate": 1.2675036659826251e-06, |
| "loss": 1.1013611555099487, |
| "step": 1172 |
| }, |
| { |
| "epoch": 3.5902140672782874, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.2598258504653082e-06, |
| "loss": 1.2070239782333374, |
| "step": 1174 |
| }, |
| { |
| "epoch": 3.5963302752293576, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.2522565523312456e-06, |
| "loss": 1.1760621070861816, |
| "step": 1176 |
| }, |
| { |
| "epoch": 3.602446483180428, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.2447959652989963e-06, |
| "loss": 1.2546082735061646, |
| "step": 1178 |
| }, |
| { |
| "epoch": 3.6085626911314987, |
| "grad_norm": 1.40625, |
| "learning_rate": 1.2374442803049125e-06, |
| "loss": 1.11211359500885, |
| "step": 1180 |
| }, |
| { |
| "epoch": 3.614678899082569, |
| "grad_norm": 0.67578125, |
| "learning_rate": 1.2302016854982504e-06, |
| "loss": 1.1653016805648804, |
| "step": 1182 |
| }, |
| { |
| "epoch": 3.620795107033639, |
| "grad_norm": 0.76953125, |
| "learning_rate": 1.2230683662363599e-06, |
| "loss": 1.0931107997894287, |
| "step": 1184 |
| }, |
| { |
| "epoch": 3.6269113149847096, |
| "grad_norm": 0.53125, |
| "learning_rate": 1.2160445050799346e-06, |
| "loss": 1.1593706607818604, |
| "step": 1186 |
| }, |
| { |
| "epoch": 3.63302752293578, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.2091302817883444e-06, |
| "loss": 1.2466744184494019, |
| "step": 1188 |
| }, |
| { |
| "epoch": 3.6391437308868504, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.2023258733150345e-06, |
| "loss": 1.1520183086395264, |
| "step": 1190 |
| }, |
| { |
| "epoch": 3.6452599388379205, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.195631453802994e-06, |
| "loss": 1.1501617431640625, |
| "step": 1192 |
| }, |
| { |
| "epoch": 3.6513761467889907, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.1890471945803e-06, |
| "loss": 1.1947115659713745, |
| "step": 1194 |
| }, |
| { |
| "epoch": 3.6574923547400613, |
| "grad_norm": 0.82421875, |
| "learning_rate": 1.1825732641557358e-06, |
| "loss": 1.090171217918396, |
| "step": 1196 |
| }, |
| { |
| "epoch": 3.6636085626911314, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.1762098282144735e-06, |
| "loss": 1.231759786605835, |
| "step": 1198 |
| }, |
| { |
| "epoch": 3.669724770642202, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.169957049613839e-06, |
| "loss": 1.2382960319519043, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.675840978593272, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.1638150883791386e-06, |
| "loss": 1.1713348627090454, |
| "step": 1202 |
| }, |
| { |
| "epoch": 3.6819571865443423, |
| "grad_norm": 0.625, |
| "learning_rate": 1.157784101699567e-06, |
| "loss": 1.1755608320236206, |
| "step": 1204 |
| }, |
| { |
| "epoch": 3.688073394495413, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.1518642439241849e-06, |
| "loss": 1.2025344371795654, |
| "step": 1206 |
| }, |
| { |
| "epoch": 3.694189602446483, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.146055666557966e-06, |
| "loss": 1.2071685791015625, |
| "step": 1208 |
| }, |
| { |
| "epoch": 3.7003058103975537, |
| "grad_norm": 0.703125, |
| "learning_rate": 1.140358518257922e-06, |
| "loss": 1.1952728033065796, |
| "step": 1210 |
| }, |
| { |
| "epoch": 3.706422018348624, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.1347729448292953e-06, |
| "loss": 1.21987783908844, |
| "step": 1212 |
| }, |
| { |
| "epoch": 3.712538226299694, |
| "grad_norm": 0.7421875, |
| "learning_rate": 1.129299089221832e-06, |
| "loss": 1.2178161144256592, |
| "step": 1214 |
| }, |
| { |
| "epoch": 3.7186544342507646, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.1239370915261196e-06, |
| "loss": 1.1406751871109009, |
| "step": 1216 |
| }, |
| { |
| "epoch": 3.7247706422018347, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.1186870889700013e-06, |
| "loss": 1.1654596328735352, |
| "step": 1218 |
| }, |
| { |
| "epoch": 3.7308868501529053, |
| "grad_norm": 2.328125, |
| "learning_rate": 1.1135492159150676e-06, |
| "loss": 1.2073957920074463, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.7370030581039755, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.108523603853215e-06, |
| "loss": 1.1250100135803223, |
| "step": 1222 |
| }, |
| { |
| "epoch": 3.7431192660550456, |
| "grad_norm": 0.60546875, |
| "learning_rate": 1.1036103814032804e-06, |
| "loss": 1.2246984243392944, |
| "step": 1224 |
| }, |
| { |
| "epoch": 3.7492354740061162, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.0988096743077513e-06, |
| "loss": 1.1390925645828247, |
| "step": 1226 |
| }, |
| { |
| "epoch": 3.7553516819571864, |
| "grad_norm": 0.58203125, |
| "learning_rate": 1.094121605429547e-06, |
| "loss": 1.1992175579071045, |
| "step": 1228 |
| }, |
| { |
| "epoch": 3.761467889908257, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.089546294748873e-06, |
| "loss": 1.0999352931976318, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.767584097859327, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.085083859360151e-06, |
| "loss": 1.1122483015060425, |
| "step": 1232 |
| }, |
| { |
| "epoch": 3.7737003058103973, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.0807344134690236e-06, |
| "loss": 1.1888892650604248, |
| "step": 1234 |
| }, |
| { |
| "epoch": 3.779816513761468, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.0764980683894297e-06, |
| "loss": 1.1580041646957397, |
| "step": 1236 |
| }, |
| { |
| "epoch": 3.7859327217125385, |
| "grad_norm": 0.76171875, |
| "learning_rate": 1.0723749325407564e-06, |
| "loss": 1.176745891571045, |
| "step": 1238 |
| }, |
| { |
| "epoch": 3.7920489296636086, |
| "grad_norm": 0.65234375, |
| "learning_rate": 1.0683651114450641e-06, |
| "loss": 1.1710706949234009, |
| "step": 1240 |
| }, |
| { |
| "epoch": 3.7981651376146788, |
| "grad_norm": 2.578125, |
| "learning_rate": 1.0644687077243864e-06, |
| "loss": 1.1870887279510498, |
| "step": 1242 |
| }, |
| { |
| "epoch": 3.8042813455657494, |
| "grad_norm": 0.76953125, |
| "learning_rate": 1.0606858210981025e-06, |
| "loss": 1.169495940208435, |
| "step": 1244 |
| }, |
| { |
| "epoch": 3.8103975535168195, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.0570165483803867e-06, |
| "loss": 1.1190178394317627, |
| "step": 1246 |
| }, |
| { |
| "epoch": 3.81651376146789, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.05346098347773e-06, |
| "loss": 1.1436067819595337, |
| "step": 1248 |
| }, |
| { |
| "epoch": 3.8226299694189603, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.050019217386535e-06, |
| "loss": 1.2288410663604736, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.8287461773700304, |
| "grad_norm": 1.25, |
| "learning_rate": 1.0466913381907914e-06, |
| "loss": 1.2218413352966309, |
| "step": 1252 |
| }, |
| { |
| "epoch": 3.834862385321101, |
| "grad_norm": 0.53515625, |
| "learning_rate": 1.0434774310598166e-06, |
| "loss": 1.208377480506897, |
| "step": 1254 |
| }, |
| { |
| "epoch": 3.840978593272171, |
| "grad_norm": 0.5859375, |
| "learning_rate": 1.04037757824608e-06, |
| "loss": 1.1784342527389526, |
| "step": 1256 |
| }, |
| { |
| "epoch": 3.8470948012232418, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.0373918590830952e-06, |
| "loss": 1.2136183977127075, |
| "step": 1258 |
| }, |
| { |
| "epoch": 3.853211009174312, |
| "grad_norm": 0.5390625, |
| "learning_rate": 1.0345203499833913e-06, |
| "loss": 1.2747994661331177, |
| "step": 1260 |
| }, |
| { |
| "epoch": 3.859327217125382, |
| "grad_norm": 0.73828125, |
| "learning_rate": 1.0317631244365575e-06, |
| "loss": 1.1638200283050537, |
| "step": 1262 |
| }, |
| { |
| "epoch": 3.8654434250764527, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.0291202530073602e-06, |
| "loss": 1.2454450130462646, |
| "step": 1264 |
| }, |
| { |
| "epoch": 3.871559633027523, |
| "grad_norm": 0.64453125, |
| "learning_rate": 1.0265918033339392e-06, |
| "loss": 1.1502002477645874, |
| "step": 1266 |
| }, |
| { |
| "epoch": 3.8776758409785934, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.0241778401260764e-06, |
| "loss": 1.1322892904281616, |
| "step": 1268 |
| }, |
| { |
| "epoch": 3.8837920489296636, |
| "grad_norm": 0.61328125, |
| "learning_rate": 1.0218784251635382e-06, |
| "loss": 1.1245934963226318, |
| "step": 1270 |
| }, |
| { |
| "epoch": 3.8899082568807337, |
| "grad_norm": 0.82421875, |
| "learning_rate": 1.0196936172944962e-06, |
| "loss": 1.2275093793869019, |
| "step": 1272 |
| }, |
| { |
| "epoch": 3.8960244648318043, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.0176234724340201e-06, |
| "loss": 1.2591514587402344, |
| "step": 1274 |
| }, |
| { |
| "epoch": 3.9021406727828745, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.0156680435626468e-06, |
| "loss": 1.1828017234802246, |
| "step": 1276 |
| }, |
| { |
| "epoch": 3.908256880733945, |
| "grad_norm": 0.6875, |
| "learning_rate": 1.0138273807250244e-06, |
| "loss": 1.1989636421203613, |
| "step": 1278 |
| }, |
| { |
| "epoch": 3.914373088685015, |
| "grad_norm": 0.5234375, |
| "learning_rate": 1.0121015310286318e-06, |
| "loss": 1.1318210363388062, |
| "step": 1280 |
| }, |
| { |
| "epoch": 3.9204892966360854, |
| "grad_norm": 0.51953125, |
| "learning_rate": 1.0104905386425735e-06, |
| "loss": 1.1387715339660645, |
| "step": 1282 |
| }, |
| { |
| "epoch": 3.926605504587156, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.0089944447964479e-06, |
| "loss": 1.0994793176651, |
| "step": 1284 |
| }, |
| { |
| "epoch": 3.9327217125382266, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.0076132877792933e-06, |
| "loss": 1.2001361846923828, |
| "step": 1286 |
| }, |
| { |
| "epoch": 3.9388379204892967, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.0063471029386065e-06, |
| "loss": 1.1622974872589111, |
| "step": 1288 |
| }, |
| { |
| "epoch": 3.944954128440367, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.0051959226794407e-06, |
| "loss": 1.170785903930664, |
| "step": 1290 |
| }, |
| { |
| "epoch": 3.9510703363914375, |
| "grad_norm": 0.546875, |
| "learning_rate": 1.004159776463573e-06, |
| "loss": 1.1379996538162231, |
| "step": 1292 |
| }, |
| { |
| "epoch": 3.9571865443425076, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.003238690808754e-06, |
| "loss": 1.2565704584121704, |
| "step": 1294 |
| }, |
| { |
| "epoch": 3.963302752293578, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.0024326892880253e-06, |
| "loss": 1.1255217790603638, |
| "step": 1296 |
| }, |
| { |
| "epoch": 3.9694189602446484, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.0017417925291187e-06, |
| "loss": 1.149346113204956, |
| "step": 1298 |
| }, |
| { |
| "epoch": 3.9755351681957185, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.001166018213929e-06, |
| "loss": 1.1946812868118286, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.981651376146789, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.0007053810780578e-06, |
| "loss": 1.1319454908370972, |
| "step": 1302 |
| }, |
| { |
| "epoch": 3.9877675840978593, |
| "grad_norm": 0.8203125, |
| "learning_rate": 1.0003598929104407e-06, |
| "loss": 1.1686453819274902, |
| "step": 1304 |
| }, |
| { |
| "epoch": 3.99388379204893, |
| "grad_norm": 0.52734375, |
| "learning_rate": 1.0001295625530423e-06, |
| "loss": 1.130082607269287, |
| "step": 1306 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.78515625, |
| "learning_rate": 1.0000143959006323e-06, |
| "loss": 1.2041049003601074, |
| "step": 1308 |
| }, |
| { |
| "epoch": 4.0, |
| "step": 1308, |
| "total_flos": 3.2734142949973033e+18, |
| "train_loss": 1.3248716153500641, |
| "train_runtime": 16065.8623, |
| "train_samples_per_second": 2.605, |
| "train_steps_per_second": 0.081 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1308, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.2734142949973033e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|