| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.67373142203852, |
| "global_step": 110000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.885815185403178e-07, |
| "loss": 0.6708, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.1771630370806356e-06, |
| "loss": 0.6457, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.7657445556209538e-06, |
| "loss": 0.6149, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 2.3543260741612712e-06, |
| "loss": 0.5516, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2.942907592701589e-06, |
| "loss": 0.5014, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 3.5314891112419075e-06, |
| "loss": 0.484, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.120070629782225e-06, |
| "loss": 0.4428, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.7086521483225425e-06, |
| "loss": 0.4323, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 5.297233666862861e-06, |
| "loss": 0.4234, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 5.885815185403178e-06, |
| "loss": 0.402, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.474396703943496e-06, |
| "loss": 0.4142, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 7.062978222483815e-06, |
| "loss": 0.39, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 7.651559741024132e-06, |
| "loss": 0.3842, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.24014125956445e-06, |
| "loss": 0.3747, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.828722778104768e-06, |
| "loss": 0.37, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 9.417304296645085e-06, |
| "loss": 0.3688, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.0005885815185404e-05, |
| "loss": 0.3656, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.0594467333725723e-05, |
| "loss": 0.3488, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.1183048852266041e-05, |
| "loss": 0.3532, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.1771630370806357e-05, |
| "loss": 0.3553, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.2360211889346675e-05, |
| "loss": 0.3597, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.2948793407886992e-05, |
| "loss": 0.3427, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3537374926427311e-05, |
| "loss": 0.35, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.412595644496763e-05, |
| "loss": 0.342, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.4714537963507947e-05, |
| "loss": 0.3304, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.5303119482048264e-05, |
| "loss": 0.3326, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.589170100058858e-05, |
| "loss": 0.3273, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.64802825191289e-05, |
| "loss": 0.3226, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.706886403766922e-05, |
| "loss": 0.329, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.7657445556209536e-05, |
| "loss": 0.3096, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.8246027074749856e-05, |
| "loss": 0.3196, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.883460859329017e-05, |
| "loss": 0.3228, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.942319011183049e-05, |
| "loss": 0.3164, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.999925173503938e-05, |
| "loss": 0.2999, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.996183848700825e-05, |
| "loss": 0.3205, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.9924425238977122e-05, |
| "loss": 0.3053, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.9887011990945994e-05, |
| "loss": 0.2995, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.984959874291487e-05, |
| "loss": 0.313, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.981218549488374e-05, |
| "loss": 0.2996, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.977477224685261e-05, |
| "loss": 0.3289, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.9737358998821485e-05, |
| "loss": 0.3015, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.9699945750790357e-05, |
| "loss": 0.3029, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.966253250275923e-05, |
| "loss": 0.3075, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.96251192547281e-05, |
| "loss": 0.2972, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.9587706006696973e-05, |
| "loss": 0.2984, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.9550292758665845e-05, |
| "loss": 0.2894, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.9512879510634717e-05, |
| "loss": 0.2857, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.9475466262603592e-05, |
| "loss": 0.2812, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.943805301457246e-05, |
| "loss": 0.2872, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.9400639766541333e-05, |
| "loss": 0.289, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.9363226518510205e-05, |
| "loss": 0.2791, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.932581327047908e-05, |
| "loss": 0.2912, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.9288400022447952e-05, |
| "loss": 0.2872, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.9250986774416824e-05, |
| "loss": 0.2766, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.9213573526385692e-05, |
| "loss": 0.2819, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.9176160278354568e-05, |
| "loss": 0.2795, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.913874703032344e-05, |
| "loss": 0.2677, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.910133378229231e-05, |
| "loss": 0.2488, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.9063920534261183e-05, |
| "loss": 0.256, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.9026507286230055e-05, |
| "loss": 0.2489, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.8989094038198927e-05, |
| "loss": 0.2525, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.89516807901678e-05, |
| "loss": 0.2483, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1.8914267542136674e-05, |
| "loss": 0.2528, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.8876854294105543e-05, |
| "loss": 0.2481, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.8839441046074415e-05, |
| "loss": 0.2517, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.880202779804329e-05, |
| "loss": 0.2514, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.8764614550012162e-05, |
| "loss": 0.2464, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.8727201301981034e-05, |
| "loss": 0.2586, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.8689788053949906e-05, |
| "loss": 0.2507, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.8652374805918778e-05, |
| "loss": 0.2609, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.861496155788765e-05, |
| "loss": 0.2368, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.857754830985652e-05, |
| "loss": 0.2473, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.8540135061825394e-05, |
| "loss": 0.2379, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.8502721813794265e-05, |
| "loss": 0.2431, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.8465308565763137e-05, |
| "loss": 0.2521, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.842789531773201e-05, |
| "loss": 0.2473, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.8390482069700885e-05, |
| "loss": 0.2496, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.8353068821669757e-05, |
| "loss": 0.2529, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.8315655573638625e-05, |
| "loss": 0.2484, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.8278242325607497e-05, |
| "loss": 0.2393, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.8240829077576372e-05, |
| "loss": 0.2394, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.8203415829545244e-05, |
| "loss": 0.2265, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.8166002581514116e-05, |
| "loss": 0.2435, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.8128589333482988e-05, |
| "loss": 0.2513, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.809117608545186e-05, |
| "loss": 0.2478, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.8053762837420732e-05, |
| "loss": 0.2601, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.8016349589389604e-05, |
| "loss": 0.2371, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 1.7978936341358476e-05, |
| "loss": 0.2504, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 1.7941523093327348e-05, |
| "loss": 0.2414, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.790410984529622e-05, |
| "loss": 0.2296, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 1.7866696597265095e-05, |
| "loss": 0.2413, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 1.7829283349233967e-05, |
| "loss": 0.2447, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 1.779187010120284e-05, |
| "loss": 0.2392, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 1.7754456853171707e-05, |
| "loss": 0.2515, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 1.7717043605140583e-05, |
| "loss": 0.2383, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 1.7679630357109454e-05, |
| "loss": 0.2522, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 1.7642217109078326e-05, |
| "loss": 0.244, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 1.7604803861047198e-05, |
| "loss": 0.2368, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 1.756739061301607e-05, |
| "loss": 0.2416, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.7529977364984942e-05, |
| "loss": 0.2428, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.7492564116953814e-05, |
| "loss": 0.2387, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.745515086892269e-05, |
| "loss": 0.2363, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.7417737620891558e-05, |
| "loss": 0.2433, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.738032437286043e-05, |
| "loss": 0.2395, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.7342911124829302e-05, |
| "loss": 0.237, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.7305497876798177e-05, |
| "loss": 0.2382, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.726808462876705e-05, |
| "loss": 0.2306, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.723067138073592e-05, |
| "loss": 0.2356, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.7193258132704793e-05, |
| "loss": 0.2473, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.7155844884673665e-05, |
| "loss": 0.2428, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.7118431636642537e-05, |
| "loss": 0.2302, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.708101838861141e-05, |
| "loss": 0.2235, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.704360514058028e-05, |
| "loss": 0.2271, |
| "step": 22600 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.7006191892549152e-05, |
| "loss": 0.2229, |
| "step": 22800 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.6968778644518024e-05, |
| "loss": 0.2, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.69313653964869e-05, |
| "loss": 0.1905, |
| "step": 23200 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.689395214845577e-05, |
| "loss": 0.1908, |
| "step": 23400 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.685653890042464e-05, |
| "loss": 0.2062, |
| "step": 23600 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.6819125652393512e-05, |
| "loss": 0.1926, |
| "step": 23800 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.6781712404362387e-05, |
| "loss": 0.1867, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.674429915633126e-05, |
| "loss": 0.1892, |
| "step": 24200 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.670688590830013e-05, |
| "loss": 0.1853, |
| "step": 24400 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.6669472660269003e-05, |
| "loss": 0.1972, |
| "step": 24600 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.6632059412237875e-05, |
| "loss": 0.1916, |
| "step": 24800 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.6594646164206747e-05, |
| "loss": 0.1836, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.655723291617562e-05, |
| "loss": 0.1855, |
| "step": 25200 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.651981966814449e-05, |
| "loss": 0.2007, |
| "step": 25400 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.6482406420113363e-05, |
| "loss": 0.1857, |
| "step": 25600 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.6444993172082235e-05, |
| "loss": 0.2067, |
| "step": 25800 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.6407579924051106e-05, |
| "loss": 0.2096, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.6370166676019982e-05, |
| "loss": 0.2039, |
| "step": 26200 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.6332753427988854e-05, |
| "loss": 0.1985, |
| "step": 26400 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.6295340179957722e-05, |
| "loss": 0.1927, |
| "step": 26600 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.6257926931926597e-05, |
| "loss": 0.2015, |
| "step": 26800 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.622051368389547e-05, |
| "loss": 0.1822, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.618310043586434e-05, |
| "loss": 0.1957, |
| "step": 27200 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.6145687187833213e-05, |
| "loss": 0.1936, |
| "step": 27400 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.6108273939802085e-05, |
| "loss": 0.1896, |
| "step": 27600 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.6070860691770957e-05, |
| "loss": 0.199, |
| "step": 27800 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.603344744373983e-05, |
| "loss": 0.1954, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.5996034195708704e-05, |
| "loss": 0.1918, |
| "step": 28200 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.5958620947677573e-05, |
| "loss": 0.1902, |
| "step": 28400 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.5921207699646445e-05, |
| "loss": 0.201, |
| "step": 28600 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.5883794451615317e-05, |
| "loss": 0.1888, |
| "step": 28800 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.5846381203584192e-05, |
| "loss": 0.1932, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1.5808967955553064e-05, |
| "loss": 0.2016, |
| "step": 29200 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 1.5771554707521936e-05, |
| "loss": 0.199, |
| "step": 29400 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 1.5734141459490804e-05, |
| "loss": 0.1854, |
| "step": 29600 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 1.569672821145968e-05, |
| "loss": 0.1935, |
| "step": 29800 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 1.565931496342855e-05, |
| "loss": 0.1902, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 1.5621901715397423e-05, |
| "loss": 0.1831, |
| "step": 30200 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 1.5584488467366295e-05, |
| "loss": 0.1978, |
| "step": 30400 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 1.5547075219335167e-05, |
| "loss": 0.1938, |
| "step": 30600 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 1.550966197130404e-05, |
| "loss": 0.1911, |
| "step": 30800 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 1.547224872327291e-05, |
| "loss": 0.1847, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 1.5434835475241786e-05, |
| "loss": 0.1919, |
| "step": 31200 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 1.5397422227210655e-05, |
| "loss": 0.1944, |
| "step": 31400 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 1.5360008979179527e-05, |
| "loss": 0.1809, |
| "step": 31600 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 1.5322595731148402e-05, |
| "loss": 0.1986, |
| "step": 31800 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1.5285182483117274e-05, |
| "loss": 0.1927, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.5247769235086144e-05, |
| "loss": 0.1955, |
| "step": 32200 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.5210355987055016e-05, |
| "loss": 0.1909, |
| "step": 32400 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 1.517294273902389e-05, |
| "loss": 0.2026, |
| "step": 32600 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.5135529490992762e-05, |
| "loss": 0.1922, |
| "step": 32800 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.5098116242961634e-05, |
| "loss": 0.1892, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.5060702994930506e-05, |
| "loss": 0.1962, |
| "step": 33200 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.502328974689938e-05, |
| "loss": 0.1987, |
| "step": 33400 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.4985876498868251e-05, |
| "loss": 0.2, |
| "step": 33600 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 1.4948463250837121e-05, |
| "loss": 0.1784, |
| "step": 33800 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.4911050002805995e-05, |
| "loss": 0.1998, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 1.4873636754774867e-05, |
| "loss": 0.1794, |
| "step": 34200 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 1.4836223506743739e-05, |
| "loss": 0.1545, |
| "step": 34400 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 1.479881025871261e-05, |
| "loss": 0.1596, |
| "step": 34600 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 1.4761397010681484e-05, |
| "loss": 0.1522, |
| "step": 34800 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 1.4723983762650356e-05, |
| "loss": 0.1633, |
| "step": 35000 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 1.4686570514619227e-05, |
| "loss": 0.1421, |
| "step": 35200 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 1.4649157266588102e-05, |
| "loss": 0.1506, |
| "step": 35400 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 1.4611744018556972e-05, |
| "loss": 0.1563, |
| "step": 35600 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 1.4574330770525844e-05, |
| "loss": 0.1557, |
| "step": 35800 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 1.4536917522494716e-05, |
| "loss": 0.1641, |
| "step": 36000 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 1.449950427446359e-05, |
| "loss": 0.1545, |
| "step": 36200 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 1.4462091026432461e-05, |
| "loss": 0.1625, |
| "step": 36400 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 1.4424677778401333e-05, |
| "loss": 0.165, |
| "step": 36600 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 1.4387264530370207e-05, |
| "loss": 0.157, |
| "step": 36800 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 1.4349851282339077e-05, |
| "loss": 0.1485, |
| "step": 37000 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 1.4312438034307949e-05, |
| "loss": 0.1571, |
| "step": 37200 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 1.4275024786276821e-05, |
| "loss": 0.1608, |
| "step": 37400 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 1.4237611538245695e-05, |
| "loss": 0.162, |
| "step": 37600 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 1.4200198290214567e-05, |
| "loss": 0.154, |
| "step": 37800 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 1.4162785042183438e-05, |
| "loss": 0.1536, |
| "step": 38000 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 1.4125371794152309e-05, |
| "loss": 0.164, |
| "step": 38200 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 1.4087958546121184e-05, |
| "loss": 0.1655, |
| "step": 38400 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 1.4050545298090054e-05, |
| "loss": 0.1606, |
| "step": 38600 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 1.4013132050058926e-05, |
| "loss": 0.1533, |
| "step": 38800 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 1.39757188020278e-05, |
| "loss": 0.1515, |
| "step": 39000 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 1.3938305553996672e-05, |
| "loss": 0.1624, |
| "step": 39200 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 1.3900892305965544e-05, |
| "loss": 0.1467, |
| "step": 39400 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 1.3863479057934415e-05, |
| "loss": 0.1582, |
| "step": 39600 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 1.3826065809903289e-05, |
| "loss": 0.163, |
| "step": 39800 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 1.378865256187216e-05, |
| "loss": 0.1561, |
| "step": 40000 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 1.3751239313841031e-05, |
| "loss": 0.1558, |
| "step": 40200 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 1.3713826065809905e-05, |
| "loss": 0.1686, |
| "step": 40400 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 1.3676412817778777e-05, |
| "loss": 0.161, |
| "step": 40600 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 1.3638999569747649e-05, |
| "loss": 0.1574, |
| "step": 40800 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 1.360158632171652e-05, |
| "loss": 0.1591, |
| "step": 41000 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 1.3564173073685394e-05, |
| "loss": 0.1618, |
| "step": 41200 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 1.3526759825654266e-05, |
| "loss": 0.1545, |
| "step": 41400 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 1.3489346577623136e-05, |
| "loss": 0.1626, |
| "step": 41600 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 1.345193332959201e-05, |
| "loss": 0.1588, |
| "step": 41800 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 1.3414520081560882e-05, |
| "loss": 0.1538, |
| "step": 42000 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 1.3377106833529754e-05, |
| "loss": 0.1557, |
| "step": 42200 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 1.3339693585498626e-05, |
| "loss": 0.1554, |
| "step": 42400 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 1.33022803374675e-05, |
| "loss": 0.1639, |
| "step": 42600 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 1.3264867089436371e-05, |
| "loss": 0.1519, |
| "step": 42800 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 1.3227453841405241e-05, |
| "loss": 0.1671, |
| "step": 43000 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 1.3190040593374113e-05, |
| "loss": 0.1614, |
| "step": 43200 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 1.3152627345342987e-05, |
| "loss": 0.1523, |
| "step": 43400 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 1.3115214097311859e-05, |
| "loss": 0.1564, |
| "step": 43600 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 1.307780084928073e-05, |
| "loss": 0.1662, |
| "step": 43800 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 1.3040387601249604e-05, |
| "loss": 0.1667, |
| "step": 44000 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 1.3002974353218476e-05, |
| "loss": 0.1631, |
| "step": 44200 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 1.2965561105187348e-05, |
| "loss": 0.1561, |
| "step": 44400 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 1.2928147857156219e-05, |
| "loss": 0.159, |
| "step": 44600 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 1.2890734609125092e-05, |
| "loss": 0.1618, |
| "step": 44800 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 1.2853321361093964e-05, |
| "loss": 0.1538, |
| "step": 45000 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 1.2815908113062836e-05, |
| "loss": 0.1586, |
| "step": 45200 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 1.277849486503171e-05, |
| "loss": 0.16, |
| "step": 45400 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 1.2741081617000581e-05, |
| "loss": 0.1483, |
| "step": 45600 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 1.2703668368969453e-05, |
| "loss": 0.1201, |
| "step": 45800 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 1.2666255120938324e-05, |
| "loss": 0.1278, |
| "step": 46000 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 1.2628841872907199e-05, |
| "loss": 0.1348, |
| "step": 46200 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 1.2591428624876069e-05, |
| "loss": 0.1238, |
| "step": 46400 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 1.2554015376844941e-05, |
| "loss": 0.1215, |
| "step": 46600 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 1.2516602128813815e-05, |
| "loss": 0.132, |
| "step": 46800 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 1.2479188880782687e-05, |
| "loss": 0.1244, |
| "step": 47000 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 1.2441775632751559e-05, |
| "loss": 0.1235, |
| "step": 47200 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 1.240436238472043e-05, |
| "loss": 0.1312, |
| "step": 47400 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 1.2366949136689304e-05, |
| "loss": 0.1226, |
| "step": 47600 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 1.2329535888658174e-05, |
| "loss": 0.1292, |
| "step": 47800 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 1.2292122640627046e-05, |
| "loss": 0.1255, |
| "step": 48000 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 1.2254709392595918e-05, |
| "loss": 0.1258, |
| "step": 48200 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 1.2217296144564792e-05, |
| "loss": 0.1292, |
| "step": 48400 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 1.2179882896533664e-05, |
| "loss": 0.1298, |
| "step": 48600 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 1.2142469648502536e-05, |
| "loss": 0.1254, |
| "step": 48800 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 1.210505640047141e-05, |
| "loss": 0.1241, |
| "step": 49000 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 1.2067643152440281e-05, |
| "loss": 0.1302, |
| "step": 49200 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 1.2030229904409151e-05, |
| "loss": 0.1309, |
| "step": 49400 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 1.1992816656378023e-05, |
| "loss": 0.1182, |
| "step": 49600 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 1.1955403408346897e-05, |
| "loss": 0.1331, |
| "step": 49800 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 1.1917990160315769e-05, |
| "loss": 0.1289, |
| "step": 50000 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 1.188057691228464e-05, |
| "loss": 0.1149, |
| "step": 50200 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 1.1843163664253514e-05, |
| "loss": 0.1201, |
| "step": 50400 |
| }, |
| { |
| "epoch": 4.45, |
| "learning_rate": 1.1805750416222386e-05, |
| "loss": 0.1218, |
| "step": 50600 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 1.1768337168191256e-05, |
| "loss": 0.1278, |
| "step": 50800 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 1.1730923920160128e-05, |
| "loss": 0.1275, |
| "step": 51000 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 1.1693510672129002e-05, |
| "loss": 0.1379, |
| "step": 51200 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 1.1656097424097874e-05, |
| "loss": 0.1298, |
| "step": 51400 |
| }, |
| { |
| "epoch": 4.54, |
| "learning_rate": 1.1618684176066746e-05, |
| "loss": 0.1375, |
| "step": 51600 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 1.1581270928035618e-05, |
| "loss": 0.1271, |
| "step": 51800 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 1.1543857680004491e-05, |
| "loss": 0.1487, |
| "step": 52000 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 1.1506444431973363e-05, |
| "loss": 0.1341, |
| "step": 52200 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 1.1469031183942233e-05, |
| "loss": 0.1314, |
| "step": 52400 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 1.1431617935911107e-05, |
| "loss": 0.1187, |
| "step": 52600 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 1.1394204687879979e-05, |
| "loss": 0.1308, |
| "step": 52800 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 1.1356791439848851e-05, |
| "loss": 0.1357, |
| "step": 53000 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 1.1319378191817723e-05, |
| "loss": 0.1346, |
| "step": 53200 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 1.1281964943786596e-05, |
| "loss": 0.1302, |
| "step": 53400 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 1.1244551695755468e-05, |
| "loss": 0.1318, |
| "step": 53600 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 1.120713844772434e-05, |
| "loss": 0.1406, |
| "step": 53800 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 1.1169725199693214e-05, |
| "loss": 0.1287, |
| "step": 54000 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 1.1132311951662084e-05, |
| "loss": 0.1296, |
| "step": 54200 |
| }, |
| { |
| "epoch": 4.78, |
| "learning_rate": 1.1094898703630956e-05, |
| "loss": 0.1239, |
| "step": 54400 |
| }, |
| { |
| "epoch": 4.8, |
| "learning_rate": 1.1057485455599828e-05, |
| "loss": 0.1289, |
| "step": 54600 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 1.1020072207568702e-05, |
| "loss": 0.1371, |
| "step": 54800 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 1.0982658959537573e-05, |
| "loss": 0.1371, |
| "step": 55000 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 1.0945245711506445e-05, |
| "loss": 0.1197, |
| "step": 55200 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 1.0907832463475319e-05, |
| "loss": 0.1316, |
| "step": 55400 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 1.0870419215444191e-05, |
| "loss": 0.1275, |
| "step": 55600 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 1.0833005967413061e-05, |
| "loss": 0.1287, |
| "step": 55800 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 1.0795592719381933e-05, |
| "loss": 0.1266, |
| "step": 56000 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 1.0758179471350807e-05, |
| "loss": 0.1275, |
| "step": 56200 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 1.0720766223319679e-05, |
| "loss": 0.1244, |
| "step": 56400 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 1.068335297528855e-05, |
| "loss": 0.1453, |
| "step": 56600 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 1.0645939727257422e-05, |
| "loss": 0.1343, |
| "step": 56800 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 1.0608526479226296e-05, |
| "loss": 0.114, |
| "step": 57000 |
| }, |
| { |
| "epoch": 5.03, |
| "learning_rate": 1.0571113231195166e-05, |
| "loss": 0.1038, |
| "step": 57200 |
| }, |
| { |
| "epoch": 5.05, |
| "learning_rate": 1.0533699983164038e-05, |
| "loss": 0.1064, |
| "step": 57400 |
| }, |
| { |
| "epoch": 5.07, |
| "learning_rate": 1.0496286735132912e-05, |
| "loss": 0.0928, |
| "step": 57600 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 1.0458873487101784e-05, |
| "loss": 0.1079, |
| "step": 57800 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 1.0421460239070656e-05, |
| "loss": 0.0976, |
| "step": 58000 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 1.0384046991039528e-05, |
| "loss": 0.1086, |
| "step": 58200 |
| }, |
| { |
| "epoch": 5.14, |
| "learning_rate": 1.0346633743008401e-05, |
| "loss": 0.105, |
| "step": 58400 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 1.0309220494977273e-05, |
| "loss": 0.1086, |
| "step": 58600 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 1.0271807246946143e-05, |
| "loss": 0.0972, |
| "step": 58800 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 1.0234393998915017e-05, |
| "loss": 0.1086, |
| "step": 59000 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 1.0196980750883889e-05, |
| "loss": 0.1151, |
| "step": 59200 |
| }, |
| { |
| "epoch": 5.22, |
| "learning_rate": 1.015956750285276e-05, |
| "loss": 0.1076, |
| "step": 59400 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 1.0122154254821633e-05, |
| "loss": 0.1061, |
| "step": 59600 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 1.0084741006790506e-05, |
| "loss": 0.1087, |
| "step": 59800 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 1.0047327758759378e-05, |
| "loss": 0.1081, |
| "step": 60000 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 1.0009914510728248e-05, |
| "loss": 0.1051, |
| "step": 60200 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 9.972501262697122e-06, |
| "loss": 0.1096, |
| "step": 60400 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 9.935088014665994e-06, |
| "loss": 0.1067, |
| "step": 60600 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 9.897674766634866e-06, |
| "loss": 0.1106, |
| "step": 60800 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 9.86026151860374e-06, |
| "loss": 0.1019, |
| "step": 61000 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 9.82284827057261e-06, |
| "loss": 0.1107, |
| "step": 61200 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 9.785435022541483e-06, |
| "loss": 0.1045, |
| "step": 61400 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 9.748021774510355e-06, |
| "loss": 0.1101, |
| "step": 61600 |
| }, |
| { |
| "epoch": 5.43, |
| "learning_rate": 9.710608526479227e-06, |
| "loss": 0.1001, |
| "step": 61800 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 9.673195278448099e-06, |
| "loss": 0.1093, |
| "step": 62000 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 9.635782030416971e-06, |
| "loss": 0.1091, |
| "step": 62200 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 9.598368782385845e-06, |
| "loss": 0.1073, |
| "step": 62400 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 9.560955534354715e-06, |
| "loss": 0.1175, |
| "step": 62600 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 9.523542286323588e-06, |
| "loss": 0.1018, |
| "step": 62800 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 9.48612903829246e-06, |
| "loss": 0.1015, |
| "step": 63000 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 9.448715790261332e-06, |
| "loss": 0.1159, |
| "step": 63200 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 9.411302542230204e-06, |
| "loss": 0.1104, |
| "step": 63400 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 9.373889294199076e-06, |
| "loss": 0.1105, |
| "step": 63600 |
| }, |
| { |
| "epoch": 5.61, |
| "learning_rate": 9.33647604616795e-06, |
| "loss": 0.1037, |
| "step": 63800 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 9.299062798136822e-06, |
| "loss": 0.103, |
| "step": 64000 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 9.261649550105694e-06, |
| "loss": 0.1129, |
| "step": 64200 |
| }, |
| { |
| "epoch": 5.66, |
| "learning_rate": 9.224236302074565e-06, |
| "loss": 0.1005, |
| "step": 64400 |
| }, |
| { |
| "epoch": 5.68, |
| "learning_rate": 9.186823054043437e-06, |
| "loss": 0.1082, |
| "step": 64600 |
| }, |
| { |
| "epoch": 5.7, |
| "learning_rate": 9.14940980601231e-06, |
| "loss": 0.1157, |
| "step": 64800 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 9.111996557981181e-06, |
| "loss": 0.1139, |
| "step": 65000 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 9.074583309950053e-06, |
| "loss": 0.1101, |
| "step": 65200 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 9.037170061918927e-06, |
| "loss": 0.1139, |
| "step": 65400 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 8.999756813887799e-06, |
| "loss": 0.1107, |
| "step": 65600 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 8.96234356585667e-06, |
| "loss": 0.1095, |
| "step": 65800 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 8.924930317825543e-06, |
| "loss": 0.1127, |
| "step": 66000 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 8.887517069794414e-06, |
| "loss": 0.1118, |
| "step": 66200 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 8.850103821763288e-06, |
| "loss": 0.1042, |
| "step": 66400 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 8.812690573732158e-06, |
| "loss": 0.1112, |
| "step": 66600 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 8.775277325701032e-06, |
| "loss": 0.1116, |
| "step": 66800 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 8.737864077669904e-06, |
| "loss": 0.1139, |
| "step": 67000 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 8.700450829638776e-06, |
| "loss": 0.1082, |
| "step": 67200 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 8.663037581607648e-06, |
| "loss": 0.1056, |
| "step": 67400 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 8.62562433357652e-06, |
| "loss": 0.102, |
| "step": 67600 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 8.588211085545393e-06, |
| "loss": 0.1026, |
| "step": 67800 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 8.550797837514263e-06, |
| "loss": 0.1103, |
| "step": 68000 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 8.513384589483137e-06, |
| "loss": 0.1147, |
| "step": 68200 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 8.475971341452009e-06, |
| "loss": 0.0773, |
| "step": 68400 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 8.43855809342088e-06, |
| "loss": 0.0812, |
| "step": 68600 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 8.401144845389754e-06, |
| "loss": 0.0801, |
| "step": 68800 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 8.363731597358625e-06, |
| "loss": 0.0884, |
| "step": 69000 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 8.326318349327498e-06, |
| "loss": 0.0914, |
| "step": 69200 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 8.28890510129637e-06, |
| "loss": 0.0868, |
| "step": 69400 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 8.251491853265242e-06, |
| "loss": 0.0948, |
| "step": 69600 |
| }, |
| { |
| "epoch": 6.14, |
| "learning_rate": 8.214078605234114e-06, |
| "loss": 0.0808, |
| "step": 69800 |
| }, |
| { |
| "epoch": 6.16, |
| "learning_rate": 8.176665357202986e-06, |
| "loss": 0.092, |
| "step": 70000 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 8.139252109171858e-06, |
| "loss": 0.0841, |
| "step": 70200 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 8.10183886114073e-06, |
| "loss": 0.0951, |
| "step": 70400 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 8.064425613109603e-06, |
| "loss": 0.0928, |
| "step": 70600 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 8.027012365078475e-06, |
| "loss": 0.0935, |
| "step": 70800 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 7.989599117047347e-06, |
| "loss": 0.0927, |
| "step": 71000 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 7.952185869016219e-06, |
| "loss": 0.0923, |
| "step": 71200 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 7.914772620985091e-06, |
| "loss": 0.0801, |
| "step": 71400 |
| }, |
| { |
| "epoch": 6.3, |
| "learning_rate": 7.877359372953963e-06, |
| "loss": 0.0937, |
| "step": 71600 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 7.839946124922837e-06, |
| "loss": 0.0865, |
| "step": 71800 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 7.802532876891707e-06, |
| "loss": 0.0871, |
| "step": 72000 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 7.76511962886058e-06, |
| "loss": 0.0786, |
| "step": 72200 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 7.727706380829452e-06, |
| "loss": 0.0934, |
| "step": 72400 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 7.690293132798324e-06, |
| "loss": 0.0838, |
| "step": 72600 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 7.652879884767196e-06, |
| "loss": 0.097, |
| "step": 72800 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 7.615466636736069e-06, |
| "loss": 0.0885, |
| "step": 73000 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 7.578053388704941e-06, |
| "loss": 0.0919, |
| "step": 73200 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 7.540640140673813e-06, |
| "loss": 0.0822, |
| "step": 73400 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 7.5032268926426856e-06, |
| "loss": 0.0837, |
| "step": 73600 |
| }, |
| { |
| "epoch": 6.49, |
| "learning_rate": 7.465813644611558e-06, |
| "loss": 0.0879, |
| "step": 73800 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 7.428400396580429e-06, |
| "loss": 0.0927, |
| "step": 74000 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 7.390987148549302e-06, |
| "loss": 0.0929, |
| "step": 74200 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 7.353573900518174e-06, |
| "loss": 0.0871, |
| "step": 74400 |
| }, |
| { |
| "epoch": 6.56, |
| "learning_rate": 7.316160652487047e-06, |
| "loss": 0.0886, |
| "step": 74600 |
| }, |
| { |
| "epoch": 6.58, |
| "learning_rate": 7.278747404455918e-06, |
| "loss": 0.0887, |
| "step": 74800 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 7.241334156424791e-06, |
| "loss": 0.0924, |
| "step": 75000 |
| }, |
| { |
| "epoch": 6.61, |
| "learning_rate": 7.203920908393663e-06, |
| "loss": 0.0971, |
| "step": 75200 |
| }, |
| { |
| "epoch": 6.63, |
| "learning_rate": 7.166507660362535e-06, |
| "loss": 0.0922, |
| "step": 75400 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 7.129094412331407e-06, |
| "loss": 0.0866, |
| "step": 75600 |
| }, |
| { |
| "epoch": 6.67, |
| "learning_rate": 7.091681164300279e-06, |
| "loss": 0.0822, |
| "step": 75800 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 7.054267916269152e-06, |
| "loss": 0.102, |
| "step": 76000 |
| }, |
| { |
| "epoch": 6.7, |
| "learning_rate": 7.016854668238023e-06, |
| "loss": 0.091, |
| "step": 76200 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 6.979441420206896e-06, |
| "loss": 0.0937, |
| "step": 76400 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 6.942028172175768e-06, |
| "loss": 0.0795, |
| "step": 76600 |
| }, |
| { |
| "epoch": 6.75, |
| "learning_rate": 6.9046149241446405e-06, |
| "loss": 0.0917, |
| "step": 76800 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 6.8672016761135115e-06, |
| "loss": 0.0987, |
| "step": 77000 |
| }, |
| { |
| "epoch": 6.79, |
| "learning_rate": 6.829788428082384e-06, |
| "loss": 0.0946, |
| "step": 77200 |
| }, |
| { |
| "epoch": 6.81, |
| "learning_rate": 6.792375180051257e-06, |
| "loss": 0.0915, |
| "step": 77400 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 6.754961932020129e-06, |
| "loss": 0.0889, |
| "step": 77600 |
| }, |
| { |
| "epoch": 6.84, |
| "learning_rate": 6.717548683989002e-06, |
| "loss": 0.0884, |
| "step": 77800 |
| }, |
| { |
| "epoch": 6.86, |
| "learning_rate": 6.680135435957873e-06, |
| "loss": 0.0854, |
| "step": 78000 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 6.642722187926746e-06, |
| "loss": 0.0847, |
| "step": 78200 |
| }, |
| { |
| "epoch": 6.89, |
| "learning_rate": 6.6053089398956175e-06, |
| "loss": 0.0907, |
| "step": 78400 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 6.5678956918644894e-06, |
| "loss": 0.0955, |
| "step": 78600 |
| }, |
| { |
| "epoch": 6.93, |
| "learning_rate": 6.530482443833361e-06, |
| "loss": 0.095, |
| "step": 78800 |
| }, |
| { |
| "epoch": 6.95, |
| "learning_rate": 6.493069195802234e-06, |
| "loss": 0.0948, |
| "step": 79000 |
| }, |
| { |
| "epoch": 6.97, |
| "learning_rate": 6.455655947771107e-06, |
| "loss": 0.0774, |
| "step": 79200 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 6.418242699739978e-06, |
| "loss": 0.0973, |
| "step": 79400 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 6.380829451708851e-06, |
| "loss": 0.0976, |
| "step": 79600 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 6.343416203677723e-06, |
| "loss": 0.0674, |
| "step": 79800 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 6.306002955646595e-06, |
| "loss": 0.0735, |
| "step": 80000 |
| }, |
| { |
| "epoch": 7.05, |
| "learning_rate": 6.2685897076154665e-06, |
| "loss": 0.0777, |
| "step": 80200 |
| }, |
| { |
| "epoch": 7.07, |
| "learning_rate": 6.231176459584339e-06, |
| "loss": 0.0688, |
| "step": 80400 |
| }, |
| { |
| "epoch": 7.09, |
| "learning_rate": 6.193763211553212e-06, |
| "loss": 0.0721, |
| "step": 80600 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 6.156349963522084e-06, |
| "loss": 0.0787, |
| "step": 80800 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 6.118936715490956e-06, |
| "loss": 0.0755, |
| "step": 81000 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 6.081523467459828e-06, |
| "loss": 0.072, |
| "step": 81200 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 6.0441102194287005e-06, |
| "loss": 0.0695, |
| "step": 81400 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 6.0066969713975724e-06, |
| "loss": 0.0713, |
| "step": 81600 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 5.969283723366444e-06, |
| "loss": 0.0771, |
| "step": 81800 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 5.931870475335316e-06, |
| "loss": 0.0695, |
| "step": 82000 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 5.894457227304189e-06, |
| "loss": 0.0676, |
| "step": 82200 |
| }, |
| { |
| "epoch": 7.25, |
| "learning_rate": 5.857043979273062e-06, |
| "loss": 0.0766, |
| "step": 82400 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 5.819630731241933e-06, |
| "loss": 0.0766, |
| "step": 82600 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 5.782217483210806e-06, |
| "loss": 0.0807, |
| "step": 82800 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 5.7448042351796775e-06, |
| "loss": 0.0834, |
| "step": 83000 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 5.70739098714855e-06, |
| "loss": 0.0789, |
| "step": 83200 |
| }, |
| { |
| "epoch": 7.33, |
| "learning_rate": 5.669977739117421e-06, |
| "loss": 0.0691, |
| "step": 83400 |
| }, |
| { |
| "epoch": 7.35, |
| "learning_rate": 5.632564491086294e-06, |
| "loss": 0.0777, |
| "step": 83600 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 5.595151243055166e-06, |
| "loss": 0.0725, |
| "step": 83800 |
| }, |
| { |
| "epoch": 7.39, |
| "learning_rate": 5.557737995024039e-06, |
| "loss": 0.0788, |
| "step": 84000 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 5.520324746992911e-06, |
| "loss": 0.075, |
| "step": 84200 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 5.482911498961783e-06, |
| "loss": 0.0742, |
| "step": 84400 |
| }, |
| { |
| "epoch": 7.44, |
| "learning_rate": 5.4454982509306554e-06, |
| "loss": 0.0666, |
| "step": 84600 |
| }, |
| { |
| "epoch": 7.46, |
| "learning_rate": 5.4080850028995265e-06, |
| "loss": 0.0688, |
| "step": 84800 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 5.370671754868399e-06, |
| "loss": 0.0747, |
| "step": 85000 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 5.333258506837271e-06, |
| "loss": 0.0741, |
| "step": 85200 |
| }, |
| { |
| "epoch": 7.51, |
| "learning_rate": 5.295845258806144e-06, |
| "loss": 0.0657, |
| "step": 85400 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 5.258432010775017e-06, |
| "loss": 0.0788, |
| "step": 85600 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 5.221018762743888e-06, |
| "loss": 0.0791, |
| "step": 85800 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 5.1836055147127605e-06, |
| "loss": 0.0752, |
| "step": 86000 |
| }, |
| { |
| "epoch": 7.58, |
| "learning_rate": 5.1461922666816325e-06, |
| "loss": 0.0762, |
| "step": 86200 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 5.108779018650505e-06, |
| "loss": 0.0771, |
| "step": 86400 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 5.071365770619376e-06, |
| "loss": 0.0727, |
| "step": 86600 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 5.033952522588249e-06, |
| "loss": 0.0843, |
| "step": 86800 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 4.996539274557121e-06, |
| "loss": 0.0741, |
| "step": 87000 |
| }, |
| { |
| "epoch": 7.67, |
| "learning_rate": 4.959126026525993e-06, |
| "loss": 0.0721, |
| "step": 87200 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 4.921712778494866e-06, |
| "loss": 0.0814, |
| "step": 87400 |
| }, |
| { |
| "epoch": 7.7, |
| "learning_rate": 4.884299530463738e-06, |
| "loss": 0.0703, |
| "step": 87600 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 4.8468862824326095e-06, |
| "loss": 0.0745, |
| "step": 87800 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 4.809473034401481e-06, |
| "loss": 0.0788, |
| "step": 88000 |
| }, |
| { |
| "epoch": 7.76, |
| "learning_rate": 4.772059786370354e-06, |
| "loss": 0.0721, |
| "step": 88200 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 4.734646538339226e-06, |
| "loss": 0.0689, |
| "step": 88400 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 4.697233290308099e-06, |
| "loss": 0.0651, |
| "step": 88600 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 4.659820042276971e-06, |
| "loss": 0.0775, |
| "step": 88800 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 4.622406794245843e-06, |
| "loss": 0.069, |
| "step": 89000 |
| }, |
| { |
| "epoch": 7.84, |
| "learning_rate": 4.584993546214715e-06, |
| "loss": 0.0807, |
| "step": 89200 |
| }, |
| { |
| "epoch": 7.86, |
| "learning_rate": 4.547580298183587e-06, |
| "loss": 0.0817, |
| "step": 89400 |
| }, |
| { |
| "epoch": 7.88, |
| "learning_rate": 4.510167050152459e-06, |
| "loss": 0.0771, |
| "step": 89600 |
| }, |
| { |
| "epoch": 7.9, |
| "learning_rate": 4.472753802121331e-06, |
| "loss": 0.0683, |
| "step": 89800 |
| }, |
| { |
| "epoch": 7.91, |
| "learning_rate": 4.435340554090204e-06, |
| "loss": 0.0704, |
| "step": 90000 |
| }, |
| { |
| "epoch": 7.93, |
| "learning_rate": 4.397927306059076e-06, |
| "loss": 0.0852, |
| "step": 90200 |
| }, |
| { |
| "epoch": 7.95, |
| "learning_rate": 4.360514058027948e-06, |
| "loss": 0.0773, |
| "step": 90400 |
| }, |
| { |
| "epoch": 7.97, |
| "learning_rate": 4.323100809996821e-06, |
| "loss": 0.0694, |
| "step": 90600 |
| }, |
| { |
| "epoch": 7.99, |
| "learning_rate": 4.2856875619656925e-06, |
| "loss": 0.0771, |
| "step": 90800 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 4.2482743139345644e-06, |
| "loss": 0.0702, |
| "step": 91000 |
| }, |
| { |
| "epoch": 8.02, |
| "learning_rate": 4.210861065903436e-06, |
| "loss": 0.0533, |
| "step": 91200 |
| }, |
| { |
| "epoch": 8.04, |
| "learning_rate": 4.173447817872308e-06, |
| "loss": 0.0666, |
| "step": 91400 |
| }, |
| { |
| "epoch": 8.06, |
| "learning_rate": 4.136034569841181e-06, |
| "loss": 0.0566, |
| "step": 91600 |
| }, |
| { |
| "epoch": 8.07, |
| "learning_rate": 4.098621321810054e-06, |
| "loss": 0.0611, |
| "step": 91800 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 4.061208073778926e-06, |
| "loss": 0.0589, |
| "step": 92000 |
| }, |
| { |
| "epoch": 8.11, |
| "learning_rate": 4.023794825747798e-06, |
| "loss": 0.0602, |
| "step": 92200 |
| }, |
| { |
| "epoch": 8.13, |
| "learning_rate": 3.9863815777166695e-06, |
| "loss": 0.0615, |
| "step": 92400 |
| }, |
| { |
| "epoch": 8.14, |
| "learning_rate": 3.948968329685542e-06, |
| "loss": 0.0682, |
| "step": 92600 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 3.911555081654414e-06, |
| "loss": 0.0678, |
| "step": 92800 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 3.874141833623286e-06, |
| "loss": 0.0586, |
| "step": 93000 |
| }, |
| { |
| "epoch": 8.2, |
| "learning_rate": 3.836728585592159e-06, |
| "loss": 0.0641, |
| "step": 93200 |
| }, |
| { |
| "epoch": 8.21, |
| "learning_rate": 3.799315337561031e-06, |
| "loss": 0.0586, |
| "step": 93400 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 3.761902089529903e-06, |
| "loss": 0.0684, |
| "step": 93600 |
| }, |
| { |
| "epoch": 8.25, |
| "learning_rate": 3.724488841498775e-06, |
| "loss": 0.0594, |
| "step": 93800 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 3.687075593467647e-06, |
| "loss": 0.061, |
| "step": 94000 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 3.6496623454365193e-06, |
| "loss": 0.0587, |
| "step": 94200 |
| }, |
| { |
| "epoch": 8.3, |
| "learning_rate": 3.6122490974053913e-06, |
| "loss": 0.0675, |
| "step": 94400 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 3.5748358493742636e-06, |
| "loss": 0.0621, |
| "step": 94600 |
| }, |
| { |
| "epoch": 8.34, |
| "learning_rate": 3.5374226013431355e-06, |
| "loss": 0.0556, |
| "step": 94800 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 3.5000093533120083e-06, |
| "loss": 0.0728, |
| "step": 95000 |
| }, |
| { |
| "epoch": 8.37, |
| "learning_rate": 3.46259610528088e-06, |
| "loss": 0.0554, |
| "step": 95200 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 3.4251828572497525e-06, |
| "loss": 0.0634, |
| "step": 95400 |
| }, |
| { |
| "epoch": 8.41, |
| "learning_rate": 3.3877696092186245e-06, |
| "loss": 0.063, |
| "step": 95600 |
| }, |
| { |
| "epoch": 8.42, |
| "learning_rate": 3.350356361187497e-06, |
| "loss": 0.0613, |
| "step": 95800 |
| }, |
| { |
| "epoch": 8.44, |
| "learning_rate": 3.3129431131563687e-06, |
| "loss": 0.0755, |
| "step": 96000 |
| }, |
| { |
| "epoch": 8.46, |
| "learning_rate": 3.275529865125241e-06, |
| "loss": 0.0616, |
| "step": 96200 |
| }, |
| { |
| "epoch": 8.48, |
| "learning_rate": 3.238116617094113e-06, |
| "loss": 0.0513, |
| "step": 96400 |
| }, |
| { |
| "epoch": 8.5, |
| "learning_rate": 3.2007033690629857e-06, |
| "loss": 0.0609, |
| "step": 96600 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 3.1632901210318577e-06, |
| "loss": 0.0625, |
| "step": 96800 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 3.12587687300073e-06, |
| "loss": 0.057, |
| "step": 97000 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 3.088463624969602e-06, |
| "loss": 0.0655, |
| "step": 97200 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 3.0510503769384743e-06, |
| "loss": 0.0543, |
| "step": 97400 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 3.013637128907346e-06, |
| "loss": 0.077, |
| "step": 97600 |
| }, |
| { |
| "epoch": 8.6, |
| "learning_rate": 2.9762238808762185e-06, |
| "loss": 0.0487, |
| "step": 97800 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 2.9388106328450904e-06, |
| "loss": 0.0655, |
| "step": 98000 |
| }, |
| { |
| "epoch": 8.64, |
| "learning_rate": 2.9013973848139628e-06, |
| "loss": 0.0655, |
| "step": 98200 |
| }, |
| { |
| "epoch": 8.65, |
| "learning_rate": 2.863984136782835e-06, |
| "loss": 0.0596, |
| "step": 98400 |
| }, |
| { |
| "epoch": 8.67, |
| "learning_rate": 2.8265708887517075e-06, |
| "loss": 0.0594, |
| "step": 98600 |
| }, |
| { |
| "epoch": 8.69, |
| "learning_rate": 2.7891576407205794e-06, |
| "loss": 0.0737, |
| "step": 98800 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 2.7517443926894517e-06, |
| "loss": 0.0616, |
| "step": 99000 |
| }, |
| { |
| "epoch": 8.72, |
| "learning_rate": 2.7143311446583236e-06, |
| "loss": 0.0531, |
| "step": 99200 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 2.676917896627196e-06, |
| "loss": 0.0717, |
| "step": 99400 |
| }, |
| { |
| "epoch": 8.76, |
| "learning_rate": 2.639504648596068e-06, |
| "loss": 0.0643, |
| "step": 99600 |
| }, |
| { |
| "epoch": 8.78, |
| "learning_rate": 2.60209140056494e-06, |
| "loss": 0.0515, |
| "step": 99800 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 2.5646781525338126e-06, |
| "loss": 0.0612, |
| "step": 100000 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 2.527264904502685e-06, |
| "loss": 0.0562, |
| "step": 100200 |
| }, |
| { |
| "epoch": 8.83, |
| "learning_rate": 2.489851656471557e-06, |
| "loss": 0.0605, |
| "step": 100400 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 2.452438408440429e-06, |
| "loss": 0.0621, |
| "step": 100600 |
| }, |
| { |
| "epoch": 8.86, |
| "learning_rate": 2.415025160409301e-06, |
| "loss": 0.0639, |
| "step": 100800 |
| }, |
| { |
| "epoch": 8.88, |
| "learning_rate": 2.3776119123781734e-06, |
| "loss": 0.0565, |
| "step": 101000 |
| }, |
| { |
| "epoch": 8.9, |
| "learning_rate": 2.3401986643470454e-06, |
| "loss": 0.0609, |
| "step": 101200 |
| }, |
| { |
| "epoch": 8.92, |
| "learning_rate": 2.3027854163159177e-06, |
| "loss": 0.0613, |
| "step": 101400 |
| }, |
| { |
| "epoch": 8.94, |
| "learning_rate": 2.26537216828479e-06, |
| "loss": 0.0673, |
| "step": 101600 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 2.227958920253662e-06, |
| "loss": 0.0598, |
| "step": 101800 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 2.190545672222534e-06, |
| "loss": 0.0649, |
| "step": 102000 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 2.1531324241914066e-06, |
| "loss": 0.0615, |
| "step": 102200 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 2.1157191761602786e-06, |
| "loss": 0.0538, |
| "step": 102400 |
| }, |
| { |
| "epoch": 9.02, |
| "learning_rate": 2.0783059281291505e-06, |
| "loss": 0.0462, |
| "step": 102600 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 2.040892680098023e-06, |
| "loss": 0.0542, |
| "step": 102800 |
| }, |
| { |
| "epoch": 9.06, |
| "learning_rate": 2.003479432066895e-06, |
| "loss": 0.0529, |
| "step": 103000 |
| }, |
| { |
| "epoch": 9.08, |
| "learning_rate": 1.966066184035767e-06, |
| "loss": 0.0585, |
| "step": 103200 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 1.9286529360046394e-06, |
| "loss": 0.0462, |
| "step": 103400 |
| }, |
| { |
| "epoch": 9.11, |
| "learning_rate": 1.8912396879735116e-06, |
| "loss": 0.0496, |
| "step": 103600 |
| }, |
| { |
| "epoch": 9.13, |
| "learning_rate": 1.8538264399423839e-06, |
| "loss": 0.0477, |
| "step": 103800 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 1.816413191911256e-06, |
| "loss": 0.048, |
| "step": 104000 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 1.7789999438801282e-06, |
| "loss": 0.0591, |
| "step": 104200 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 1.7415866958490003e-06, |
| "loss": 0.0445, |
| "step": 104400 |
| }, |
| { |
| "epoch": 9.2, |
| "learning_rate": 1.7041734478178726e-06, |
| "loss": 0.0508, |
| "step": 104600 |
| }, |
| { |
| "epoch": 9.22, |
| "learning_rate": 1.6667601997867448e-06, |
| "loss": 0.0534, |
| "step": 104800 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 1.6293469517556169e-06, |
| "loss": 0.046, |
| "step": 105000 |
| }, |
| { |
| "epoch": 9.25, |
| "learning_rate": 1.591933703724489e-06, |
| "loss": 0.0454, |
| "step": 105200 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 1.554520455693361e-06, |
| "loss": 0.0601, |
| "step": 105400 |
| }, |
| { |
| "epoch": 9.29, |
| "learning_rate": 1.5171072076622335e-06, |
| "loss": 0.0543, |
| "step": 105600 |
| }, |
| { |
| "epoch": 9.3, |
| "learning_rate": 1.4796939596311056e-06, |
| "loss": 0.0587, |
| "step": 105800 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 1.4422807115999775e-06, |
| "loss": 0.0526, |
| "step": 106000 |
| }, |
| { |
| "epoch": 9.34, |
| "learning_rate": 1.4048674635688497e-06, |
| "loss": 0.065, |
| "step": 106200 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 1.3674542155377222e-06, |
| "loss": 0.0531, |
| "step": 106400 |
| }, |
| { |
| "epoch": 9.37, |
| "learning_rate": 1.3300409675065941e-06, |
| "loss": 0.0607, |
| "step": 106600 |
| }, |
| { |
| "epoch": 9.39, |
| "learning_rate": 1.2926277194754663e-06, |
| "loss": 0.0425, |
| "step": 106800 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 1.2552144714443384e-06, |
| "loss": 0.0541, |
| "step": 107000 |
| }, |
| { |
| "epoch": 9.43, |
| "learning_rate": 1.2178012234132107e-06, |
| "loss": 0.0592, |
| "step": 107200 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 1.1803879753820829e-06, |
| "loss": 0.0494, |
| "step": 107400 |
| }, |
| { |
| "epoch": 9.46, |
| "learning_rate": 1.142974727350955e-06, |
| "loss": 0.0548, |
| "step": 107600 |
| }, |
| { |
| "epoch": 9.48, |
| "learning_rate": 1.1055614793198273e-06, |
| "loss": 0.0439, |
| "step": 107800 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 1.0681482312886995e-06, |
| "loss": 0.0543, |
| "step": 108000 |
| }, |
| { |
| "epoch": 9.52, |
| "learning_rate": 1.0307349832575716e-06, |
| "loss": 0.0604, |
| "step": 108200 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 9.933217352264437e-07, |
| "loss": 0.0546, |
| "step": 108400 |
| }, |
| { |
| "epoch": 9.55, |
| "learning_rate": 9.559084871953159e-07, |
| "loss": 0.0576, |
| "step": 108600 |
| }, |
| { |
| "epoch": 9.57, |
| "learning_rate": 9.184952391641881e-07, |
| "loss": 0.0441, |
| "step": 108800 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 8.810819911330602e-07, |
| "loss": 0.05, |
| "step": 109000 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 8.436687431019325e-07, |
| "loss": 0.0523, |
| "step": 109200 |
| }, |
| { |
| "epoch": 9.62, |
| "learning_rate": 8.062554950708046e-07, |
| "loss": 0.053, |
| "step": 109400 |
| }, |
| { |
| "epoch": 9.64, |
| "learning_rate": 7.688422470396768e-07, |
| "loss": 0.0456, |
| "step": 109600 |
| }, |
| { |
| "epoch": 9.66, |
| "learning_rate": 7.314289990085489e-07, |
| "loss": 0.0508, |
| "step": 109800 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 6.940157509774212e-07, |
| "loss": 0.0415, |
| "step": 110000 |
| } |
| ], |
| "max_steps": 113710, |
| "num_train_epochs": 10, |
| "total_flos": 2.6986684097812992e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|