| { |
| "best_metric": 86.3506916192026, |
| "best_model_checkpoint": "outputs/bitfit/t5-base/mnli/checkpoint-32800", |
| "epoch": 3.0, |
| "global_step": 36816, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 84.0520748576078, |
| "eval_average_metrics": 84.0520748576078, |
| "eval_loss": 0.19396202266216278, |
| "eval_runtime": 73.5315, |
| "eval_samples_per_second": 133.711, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_accuracy": 85.08950366151342, |
| "eval_average_metrics": 85.08950366151342, |
| "eval_loss": 0.1736125648021698, |
| "eval_runtime": 77.6953, |
| "eval_samples_per_second": 126.546, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0002959256844850065, |
| "loss": 0.2786, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_accuracy": 85.25223759153783, |
| "eval_average_metrics": 85.25223759153783, |
| "eval_loss": 0.1704595386981964, |
| "eval_runtime": 71.3646, |
| "eval_samples_per_second": 137.771, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_accuracy": 85.120016273393, |
| "eval_average_metrics": 85.120016273393, |
| "eval_loss": 0.17760007083415985, |
| "eval_runtime": 70.572, |
| "eval_samples_per_second": 139.319, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.000291851368970013, |
| "loss": 0.1754, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_accuracy": 85.81163547599675, |
| "eval_average_metrics": 85.81163547599675, |
| "eval_loss": 0.17395834624767303, |
| "eval_runtime": 68.4178, |
| "eval_samples_per_second": 143.705, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_accuracy": 86.04556550040684, |
| "eval_average_metrics": 86.04556550040684, |
| "eval_loss": 0.16720984876155853, |
| "eval_runtime": 74.935, |
| "eval_samples_per_second": 131.207, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_accuracy": 85.85231895850285, |
| "eval_average_metrics": 85.85231895850285, |
| "eval_loss": 0.1686050444841385, |
| "eval_runtime": 65.5334, |
| "eval_samples_per_second": 150.03, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00028777705345501956, |
| "loss": 0.17, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_accuracy": 84.83523189585028, |
| "eval_average_metrics": 84.83523189585028, |
| "eval_loss": 0.17506256699562073, |
| "eval_runtime": 72.658, |
| "eval_samples_per_second": 135.319, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_accuracy": 85.77095199349064, |
| "eval_average_metrics": 85.77095199349064, |
| "eval_loss": 0.16318167746067047, |
| "eval_runtime": 71.0929, |
| "eval_samples_per_second": 138.298, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000283702737940026, |
| "loss": 0.1593, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_accuracy": 84.98779495524816, |
| "eval_average_metrics": 84.98779495524816, |
| "eval_loss": 0.17500561475753784, |
| "eval_runtime": 76.1036, |
| "eval_samples_per_second": 129.192, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_accuracy": 85.22172497965825, |
| "eval_average_metrics": 85.22172497965825, |
| "eval_loss": 0.16974958777427673, |
| "eval_runtime": 73.5318, |
| "eval_samples_per_second": 133.711, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_accuracy": 84.93694060211554, |
| "eval_average_metrics": 84.93694060211554, |
| "eval_loss": 0.16803883016109467, |
| "eval_runtime": 71.8068, |
| "eval_samples_per_second": 136.923, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00027962842242503253, |
| "loss": 0.1626, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_accuracy": 85.64890154597234, |
| "eval_average_metrics": 85.64890154597234, |
| "eval_loss": 0.16620652377605438, |
| "eval_runtime": 70.4457, |
| "eval_samples_per_second": 139.569, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_accuracy": 85.17087062652563, |
| "eval_average_metrics": 85.17087062652563, |
| "eval_loss": 0.16685815155506134, |
| "eval_runtime": 70.323, |
| "eval_samples_per_second": 139.812, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0002755541069100391, |
| "loss": 0.1637, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_accuracy": 85.08950366151342, |
| "eval_average_metrics": 85.08950366151342, |
| "eval_loss": 0.1686829775571823, |
| "eval_runtime": 68.4934, |
| "eval_samples_per_second": 143.547, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_accuracy": 84.67249796582588, |
| "eval_average_metrics": 84.67249796582588, |
| "eval_loss": 0.1832115650177002, |
| "eval_runtime": 69.9104, |
| "eval_samples_per_second": 140.637, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_accuracy": 85.74043938161107, |
| "eval_average_metrics": 85.74043938161107, |
| "eval_loss": 0.15809670090675354, |
| "eval_runtime": 71.8214, |
| "eval_samples_per_second": 136.895, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0002714797913950456, |
| "loss": 0.1611, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_accuracy": 85.82180634662328, |
| "eval_average_metrics": 85.82180634662328, |
| "eval_loss": 0.16679300367832184, |
| "eval_runtime": 68.4877, |
| "eval_samples_per_second": 143.559, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_accuracy": 85.69975589910497, |
| "eval_average_metrics": 85.69975589910497, |
| "eval_loss": 0.1635247766971588, |
| "eval_runtime": 70.3174, |
| "eval_samples_per_second": 139.823, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0002674054758800521, |
| "loss": 0.1546, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_accuracy": 85.8319772172498, |
| "eval_average_metrics": 85.8319772172498, |
| "eval_loss": 0.16676998138427734, |
| "eval_runtime": 73.2289, |
| "eval_samples_per_second": 134.264, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_accuracy": 85.50650935720098, |
| "eval_average_metrics": 85.50650935720098, |
| "eval_loss": 0.17212657630443573, |
| "eval_runtime": 74.584, |
| "eval_samples_per_second": 131.824, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_accuracy": 85.54719283970708, |
| "eval_average_metrics": 85.54719283970708, |
| "eval_loss": 0.17162065207958221, |
| "eval_runtime": 69.8754, |
| "eval_samples_per_second": 140.708, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00026333116036505864, |
| "loss": 0.1531, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_accuracy": 85.96419853539463, |
| "eval_average_metrics": 85.96419853539463, |
| "eval_loss": 0.16573481261730194, |
| "eval_runtime": 67.7552, |
| "eval_samples_per_second": 145.111, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_accuracy": 85.78112286411717, |
| "eval_average_metrics": 85.78112286411717, |
| "eval_loss": 0.16396570205688477, |
| "eval_runtime": 63.3281, |
| "eval_samples_per_second": 155.255, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002592568448500652, |
| "loss": 0.1566, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_accuracy": 85.73026851098454, |
| "eval_average_metrics": 85.73026851098454, |
| "eval_loss": 0.16679789125919342, |
| "eval_runtime": 68.8228, |
| "eval_samples_per_second": 142.86, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_accuracy": 85.68958502847845, |
| "eval_average_metrics": 85.68958502847845, |
| "eval_loss": 0.16058295965194702, |
| "eval_runtime": 66.5592, |
| "eval_samples_per_second": 147.718, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_accuracy": 85.66924328722538, |
| "eval_average_metrics": 85.66924328722538, |
| "eval_loss": 0.1740991473197937, |
| "eval_runtime": 66.4465, |
| "eval_samples_per_second": 147.969, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00025518252933507166, |
| "loss": 0.1514, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_accuracy": 84.66232709519936, |
| "eval_average_metrics": 84.66232709519936, |
| "eval_loss": 0.18576639890670776, |
| "eval_runtime": 66.3297, |
| "eval_samples_per_second": 148.229, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_accuracy": 85.88283157038242, |
| "eval_average_metrics": 85.88283157038242, |
| "eval_loss": 0.16450409591197968, |
| "eval_runtime": 64.7793, |
| "eval_samples_per_second": 151.777, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002511082138200782, |
| "loss": 0.1531, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_accuracy": 85.13018714401953, |
| "eval_average_metrics": 85.13018714401953, |
| "eval_loss": 0.17607340216636658, |
| "eval_runtime": 63.5814, |
| "eval_samples_per_second": 154.637, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_accuracy": 85.35394629780309, |
| "eval_average_metrics": 85.35394629780309, |
| "eval_loss": 0.16797170042991638, |
| "eval_runtime": 63.9708, |
| "eval_samples_per_second": 153.695, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_accuracy": 85.75061025223759, |
| "eval_average_metrics": 85.75061025223759, |
| "eval_loss": 0.16036862134933472, |
| "eval_runtime": 64.9464, |
| "eval_samples_per_second": 151.386, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00024703389830508474, |
| "loss": 0.1544, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_accuracy": 85.64890154597234, |
| "eval_average_metrics": 85.64890154597234, |
| "eval_loss": 0.16572105884552002, |
| "eval_runtime": 67.1736, |
| "eval_samples_per_second": 146.367, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_accuracy": 85.40480065093573, |
| "eval_average_metrics": 85.40480065093573, |
| "eval_loss": 0.16141638159751892, |
| "eval_runtime": 67.3069, |
| "eval_samples_per_second": 146.077, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00024295958279009125, |
| "loss": 0.1549, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_accuracy": 85.84214808787632, |
| "eval_average_metrics": 85.84214808787632, |
| "eval_loss": 0.1581791192293167, |
| "eval_runtime": 70.2525, |
| "eval_samples_per_second": 139.952, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_accuracy": 85.78112286411717, |
| "eval_average_metrics": 85.78112286411717, |
| "eval_loss": 0.1584727168083191, |
| "eval_runtime": 70.5829, |
| "eval_samples_per_second": 139.297, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_accuracy": 85.84214808787632, |
| "eval_average_metrics": 85.84214808787632, |
| "eval_loss": 0.16103526949882507, |
| "eval_runtime": 66.5529, |
| "eval_samples_per_second": 147.732, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00023888526727509777, |
| "loss": 0.1572, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_accuracy": 86.00488201790073, |
| "eval_average_metrics": 86.00488201790073, |
| "eval_loss": 0.17075441777706146, |
| "eval_runtime": 61.313, |
| "eval_samples_per_second": 160.358, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_accuracy": 85.89300244100895, |
| "eval_average_metrics": 85.89300244100895, |
| "eval_loss": 0.1631649136543274, |
| "eval_runtime": 70.2006, |
| "eval_samples_per_second": 140.056, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0002348109517601043, |
| "loss": 0.1552, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_accuracy": 86.00488201790073, |
| "eval_average_metrics": 86.00488201790073, |
| "eval_loss": 0.15884214639663696, |
| "eval_runtime": 70.0105, |
| "eval_samples_per_second": 140.436, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_accuracy": 85.42514239218877, |
| "eval_average_metrics": 85.42514239218877, |
| "eval_loss": 0.16717489063739777, |
| "eval_runtime": 67.9572, |
| "eval_samples_per_second": 144.679, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_accuracy": 85.66924328722538, |
| "eval_average_metrics": 85.66924328722538, |
| "eval_loss": 0.16362008452415466, |
| "eval_runtime": 71.9947, |
| "eval_samples_per_second": 136.566, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00023073663624511082, |
| "loss": 0.1518, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_accuracy": 85.49633848657446, |
| "eval_average_metrics": 85.49633848657446, |
| "eval_loss": 0.16621538996696472, |
| "eval_runtime": 70.1174, |
| "eval_samples_per_second": 140.222, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_accuracy": 85.98454027664768, |
| "eval_average_metrics": 85.98454027664768, |
| "eval_loss": 0.16499604284763336, |
| "eval_runtime": 68.6629, |
| "eval_samples_per_second": 143.192, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0002266623207301173, |
| "loss": 0.1514, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_accuracy": 86.09641985353946, |
| "eval_average_metrics": 86.09641985353946, |
| "eval_loss": 0.1623799055814743, |
| "eval_runtime": 70.2156, |
| "eval_samples_per_second": 140.026, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_accuracy": 85.54719283970708, |
| "eval_average_metrics": 85.54719283970708, |
| "eval_loss": 0.1693897545337677, |
| "eval_runtime": 64.4752, |
| "eval_samples_per_second": 152.493, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_accuracy": 85.15052888527258, |
| "eval_average_metrics": 85.15052888527258, |
| "eval_loss": 0.16912253201007843, |
| "eval_runtime": 71.6316, |
| "eval_samples_per_second": 137.258, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00022258800521512384, |
| "loss": 0.1492, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_accuracy": 85.81163547599675, |
| "eval_average_metrics": 85.81163547599675, |
| "eval_loss": 0.16445724666118622, |
| "eval_runtime": 70.0896, |
| "eval_samples_per_second": 140.278, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_accuracy": 85.51668022782751, |
| "eval_average_metrics": 85.51668022782751, |
| "eval_loss": 0.171467587351799, |
| "eval_runtime": 67.0912, |
| "eval_samples_per_second": 146.547, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00021851368970013035, |
| "loss": 0.1465, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_accuracy": 85.76078112286412, |
| "eval_average_metrics": 85.76078112286412, |
| "eval_loss": 0.16485248506069183, |
| "eval_runtime": 71.6667, |
| "eval_samples_per_second": 137.191, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_accuracy": 85.54719283970708, |
| "eval_average_metrics": 85.54719283970708, |
| "eval_loss": 0.16628311574459076, |
| "eval_runtime": 69.3952, |
| "eval_samples_per_second": 141.681, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_accuracy": 85.72009764035802, |
| "eval_average_metrics": 85.72009764035802, |
| "eval_loss": 0.1626047044992447, |
| "eval_runtime": 63.0097, |
| "eval_samples_per_second": 156.039, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0002144393741851369, |
| "loss": 0.1478, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_accuracy": 85.64890154597234, |
| "eval_average_metrics": 85.64890154597234, |
| "eval_loss": 0.16279704868793488, |
| "eval_runtime": 72.6628, |
| "eval_samples_per_second": 135.31, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_accuracy": 85.80146460537021, |
| "eval_average_metrics": 85.80146460537021, |
| "eval_loss": 0.1637255698442459, |
| "eval_runtime": 71.5577, |
| "eval_samples_per_second": 137.4, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0002103650586701434, |
| "loss": 0.1509, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_accuracy": 85.79129373474369, |
| "eval_average_metrics": 85.79129373474369, |
| "eval_loss": 0.16114714741706848, |
| "eval_runtime": 72.3627, |
| "eval_samples_per_second": 135.871, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_accuracy": 85.8319772172498, |
| "eval_average_metrics": 85.8319772172498, |
| "eval_loss": 0.15985067188739777, |
| "eval_runtime": 68.9168, |
| "eval_samples_per_second": 142.665, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_accuracy": 85.40480065093573, |
| "eval_average_metrics": 85.40480065093573, |
| "eval_loss": 0.17174053192138672, |
| "eval_runtime": 68.3249, |
| "eval_samples_per_second": 143.901, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00020629074315514992, |
| "loss": 0.15, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_accuracy": 85.97436940602115, |
| "eval_average_metrics": 85.97436940602115, |
| "eval_loss": 0.15932144224643707, |
| "eval_runtime": 68.7187, |
| "eval_samples_per_second": 143.076, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_accuracy": 85.59804719283972, |
| "eval_average_metrics": 85.59804719283972, |
| "eval_loss": 0.16465091705322266, |
| "eval_runtime": 68.0374, |
| "eval_samples_per_second": 144.509, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00020221642764015643, |
| "loss": 0.1514, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_accuracy": 85.88283157038242, |
| "eval_average_metrics": 85.88283157038242, |
| "eval_loss": 0.15999911725521088, |
| "eval_runtime": 67.678, |
| "eval_samples_per_second": 145.276, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_accuracy": 86.0353946297803, |
| "eval_average_metrics": 86.0353946297803, |
| "eval_loss": 0.16025537252426147, |
| "eval_runtime": 63.6019, |
| "eval_samples_per_second": 154.587, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.01, |
| "eval_accuracy": 85.8726606997559, |
| "eval_average_metrics": 85.8726606997559, |
| "eval_loss": 0.1621241718530655, |
| "eval_runtime": 68.3301, |
| "eval_samples_per_second": 143.89, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00019814211212516294, |
| "loss": 0.147, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_accuracy": 85.76078112286412, |
| "eval_average_metrics": 85.76078112286412, |
| "eval_loss": 0.17349866032600403, |
| "eval_runtime": 66.9259, |
| "eval_samples_per_second": 146.909, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_accuracy": 85.78112286411717, |
| "eval_average_metrics": 85.78112286411717, |
| "eval_loss": 0.1615545153617859, |
| "eval_runtime": 69.1346, |
| "eval_samples_per_second": 142.215, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00019406779661016945, |
| "loss": 0.143, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_accuracy": 85.84214808787632, |
| "eval_average_metrics": 85.84214808787632, |
| "eval_loss": 0.1661369502544403, |
| "eval_runtime": 62.8833, |
| "eval_samples_per_second": 156.353, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_accuracy": 86.06590724165989, |
| "eval_average_metrics": 86.06590724165989, |
| "eval_loss": 0.16263821721076965, |
| "eval_runtime": 68.8917, |
| "eval_samples_per_second": 142.717, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_accuracy": 85.69975589910497, |
| "eval_average_metrics": 85.69975589910497, |
| "eval_loss": 0.1634710133075714, |
| "eval_runtime": 62.7526, |
| "eval_samples_per_second": 156.679, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.000189993481095176, |
| "loss": 0.1444, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_accuracy": 85.78112286411717, |
| "eval_average_metrics": 85.78112286411717, |
| "eval_loss": 0.16405758261680603, |
| "eval_runtime": 66.2833, |
| "eval_samples_per_second": 148.333, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_accuracy": 85.9540276647681, |
| "eval_average_metrics": 85.9540276647681, |
| "eval_loss": 0.16064594686031342, |
| "eval_runtime": 63.0321, |
| "eval_samples_per_second": 155.984, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0001859191655801825, |
| "loss": 0.1466, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_accuracy": 86.31000813669651, |
| "eval_average_metrics": 86.31000813669651, |
| "eval_loss": 0.15932226181030273, |
| "eval_runtime": 66.0369, |
| "eval_samples_per_second": 148.886, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_accuracy": 85.72009764035802, |
| "eval_average_metrics": 85.72009764035802, |
| "eval_loss": 0.16476964950561523, |
| "eval_runtime": 67.6592, |
| "eval_samples_per_second": 145.316, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.17, |
| "eval_accuracy": 85.36411716842962, |
| "eval_average_metrics": 85.36411716842962, |
| "eval_loss": 0.17415712773799896, |
| "eval_runtime": 69.9268, |
| "eval_samples_per_second": 140.604, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00018184485006518905, |
| "loss": 0.1493, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_accuracy": 85.65907241659886, |
| "eval_average_metrics": 85.65907241659886, |
| "eval_loss": 0.1634403020143509, |
| "eval_runtime": 70.839, |
| "eval_samples_per_second": 138.794, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_accuracy": 85.94385679414158, |
| "eval_average_metrics": 85.94385679414158, |
| "eval_loss": 0.16072088479995728, |
| "eval_runtime": 67.4524, |
| "eval_samples_per_second": 145.762, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00017777053455019556, |
| "loss": 0.1453, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_accuracy": 85.97436940602115, |
| "eval_average_metrics": 85.97436940602115, |
| "eval_loss": 0.16354931890964508, |
| "eval_runtime": 71.4854, |
| "eval_samples_per_second": 137.539, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.24, |
| "eval_accuracy": 86.04556550040684, |
| "eval_average_metrics": 86.04556550040684, |
| "eval_loss": 0.16333648562431335, |
| "eval_runtime": 68.0001, |
| "eval_samples_per_second": 144.588, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_accuracy": 86.10659072416598, |
| "eval_average_metrics": 86.10659072416598, |
| "eval_loss": 0.1655624806880951, |
| "eval_runtime": 70.1652, |
| "eval_samples_per_second": 140.126, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00017369621903520204, |
| "loss": 0.1409, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.27, |
| "eval_accuracy": 86.08624898291293, |
| "eval_average_metrics": 86.08624898291293, |
| "eval_loss": 0.16712406277656555, |
| "eval_runtime": 70.4214, |
| "eval_samples_per_second": 139.617, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_accuracy": 85.65907241659886, |
| "eval_average_metrics": 85.65907241659886, |
| "eval_loss": 0.16410161554813385, |
| "eval_runtime": 69.5518, |
| "eval_samples_per_second": 141.362, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00016962190352020858, |
| "loss": 0.144, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_accuracy": 86.0353946297803, |
| "eval_average_metrics": 86.0353946297803, |
| "eval_loss": 0.1600012332201004, |
| "eval_runtime": 69.4158, |
| "eval_samples_per_second": 141.639, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_accuracy": 85.9947111472742, |
| "eval_average_metrics": 85.9947111472742, |
| "eval_loss": 0.1664758175611496, |
| "eval_runtime": 70.4437, |
| "eval_samples_per_second": 139.572, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_accuracy": 86.10659072416598, |
| "eval_average_metrics": 86.10659072416598, |
| "eval_loss": 0.16372230648994446, |
| "eval_runtime": 70.0549, |
| "eval_samples_per_second": 140.347, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.0001655475880052151, |
| "loss": 0.142, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_accuracy": 86.01505288852725, |
| "eval_average_metrics": 86.01505288852725, |
| "eval_loss": 0.16394633054733276, |
| "eval_runtime": 73.1757, |
| "eval_samples_per_second": 134.362, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.37, |
| "eval_accuracy": 86.19812855980472, |
| "eval_average_metrics": 86.19812855980472, |
| "eval_loss": 0.16236965358257294, |
| "eval_runtime": 67.0918, |
| "eval_samples_per_second": 146.545, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00016147327249022163, |
| "loss": 0.1469, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_accuracy": 86.06590724165989, |
| "eval_average_metrics": 86.06590724165989, |
| "eval_loss": 0.15920616686344147, |
| "eval_runtime": 73.7251, |
| "eval_samples_per_second": 133.36, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_accuracy": 86.31000813669651, |
| "eval_average_metrics": 86.31000813669651, |
| "eval_loss": 0.16292713582515717, |
| "eval_runtime": 70.7554, |
| "eval_samples_per_second": 138.958, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_accuracy": 85.86248982912937, |
| "eval_average_metrics": 85.86248982912937, |
| "eval_loss": 0.16360752284526825, |
| "eval_runtime": 72.9243, |
| "eval_samples_per_second": 134.825, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00015739895697522815, |
| "loss": 0.145, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_accuracy": 85.88283157038242, |
| "eval_average_metrics": 85.88283157038242, |
| "eval_loss": 0.16182997822761536, |
| "eval_runtime": 71.4174, |
| "eval_samples_per_second": 137.67, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_accuracy": 85.79129373474369, |
| "eval_average_metrics": 85.79129373474369, |
| "eval_loss": 0.16671514511108398, |
| "eval_runtime": 72.782, |
| "eval_samples_per_second": 135.088, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00015332464146023469, |
| "loss": 0.1416, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_accuracy": 85.79129373474369, |
| "eval_average_metrics": 85.79129373474369, |
| "eval_loss": 0.16370686888694763, |
| "eval_runtime": 69.6237, |
| "eval_samples_per_second": 141.216, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_accuracy": 85.77095199349064, |
| "eval_average_metrics": 85.77095199349064, |
| "eval_loss": 0.15921832621097565, |
| "eval_runtime": 69.2043, |
| "eval_samples_per_second": 142.072, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_accuracy": 85.82180634662328, |
| "eval_average_metrics": 85.82180634662328, |
| "eval_loss": 0.1640625149011612, |
| "eval_runtime": 68.8973, |
| "eval_samples_per_second": 142.705, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00014925032594524117, |
| "loss": 0.1453, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.52, |
| "eval_accuracy": 85.36411716842962, |
| "eval_average_metrics": 85.36411716842962, |
| "eval_loss": 0.1784326434135437, |
| "eval_runtime": 73.5853, |
| "eval_samples_per_second": 133.614, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_accuracy": 85.89300244100895, |
| "eval_average_metrics": 85.89300244100895, |
| "eval_loss": 0.16068434715270996, |
| "eval_runtime": 69.0604, |
| "eval_samples_per_second": 142.368, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.0001451760104302477, |
| "loss": 0.1414, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_accuracy": 85.75061025223759, |
| "eval_average_metrics": 85.75061025223759, |
| "eval_loss": 0.164332315325737, |
| "eval_runtime": 75.2094, |
| "eval_samples_per_second": 130.728, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.56, |
| "eval_accuracy": 85.66924328722538, |
| "eval_average_metrics": 85.66924328722538, |
| "eval_loss": 0.15945520997047424, |
| "eval_runtime": 70.4879, |
| "eval_samples_per_second": 139.485, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_accuracy": 85.74043938161107, |
| "eval_average_metrics": 85.74043938161107, |
| "eval_loss": 0.15915806591510773, |
| "eval_runtime": 72.0792, |
| "eval_samples_per_second": 136.405, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.00014110169491525422, |
| "loss": 0.1463, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_accuracy": 86.10659072416598, |
| "eval_average_metrics": 86.10659072416598, |
| "eval_loss": 0.16554424166679382, |
| "eval_runtime": 69.3189, |
| "eval_samples_per_second": 141.837, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_accuracy": 85.8726606997559, |
| "eval_average_metrics": 85.8726606997559, |
| "eval_loss": 0.1639343500137329, |
| "eval_runtime": 70.0392, |
| "eval_samples_per_second": 140.378, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.00013702737940026073, |
| "loss": 0.1435, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.63, |
| "eval_accuracy": 85.79129373474369, |
| "eval_average_metrics": 85.79129373474369, |
| "eval_loss": 0.1651633232831955, |
| "eval_runtime": 72.4148, |
| "eval_samples_per_second": 135.773, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.65, |
| "eval_accuracy": 85.90317331163547, |
| "eval_average_metrics": 85.90317331163547, |
| "eval_loss": 0.163535937666893, |
| "eval_runtime": 73.7758, |
| "eval_samples_per_second": 133.269, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_accuracy": 85.81163547599675, |
| "eval_average_metrics": 85.81163547599675, |
| "eval_loss": 0.16132992506027222, |
| "eval_runtime": 74.1683, |
| "eval_samples_per_second": 132.563, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.00013295306388526727, |
| "loss": 0.1393, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.68, |
| "eval_accuracy": 85.86248982912937, |
| "eval_average_metrics": 85.86248982912937, |
| "eval_loss": 0.16424906253814697, |
| "eval_runtime": 75.4388, |
| "eval_samples_per_second": 130.331, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_accuracy": 86.20829943043124, |
| "eval_average_metrics": 86.20829943043124, |
| "eval_loss": 0.16280879080295563, |
| "eval_runtime": 73.6216, |
| "eval_samples_per_second": 133.548, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.00012887874837027379, |
| "loss": 0.1476, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_accuracy": 86.32017900732303, |
| "eval_average_metrics": 86.32017900732303, |
| "eval_loss": 0.1631232500076294, |
| "eval_runtime": 75.2489, |
| "eval_samples_per_second": 130.66, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_accuracy": 85.97436940602115, |
| "eval_average_metrics": 85.97436940602115, |
| "eval_loss": 0.16299067437648773, |
| "eval_runtime": 66.4642, |
| "eval_samples_per_second": 147.929, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.74, |
| "eval_accuracy": 86.14727420667208, |
| "eval_average_metrics": 86.14727420667208, |
| "eval_loss": 0.16605544090270996, |
| "eval_runtime": 72.8565, |
| "eval_samples_per_second": 134.95, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.0001248044328552803, |
| "loss": 0.1434, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_accuracy": 85.73026851098454, |
| "eval_average_metrics": 85.73026851098454, |
| "eval_loss": 0.16752640902996063, |
| "eval_runtime": 73.5899, |
| "eval_samples_per_second": 133.605, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.78, |
| "eval_accuracy": 86.05573637103336, |
| "eval_average_metrics": 86.05573637103336, |
| "eval_loss": 0.1640099287033081, |
| "eval_runtime": 70.9879, |
| "eval_samples_per_second": 138.503, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.00012073011734028682, |
| "loss": 0.1425, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_accuracy": 86.02522375915377, |
| "eval_average_metrics": 86.02522375915377, |
| "eval_loss": 0.1621551811695099, |
| "eval_runtime": 67.2101, |
| "eval_samples_per_second": 146.288, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_accuracy": 86.01505288852725, |
| "eval_average_metrics": 86.01505288852725, |
| "eval_loss": 0.1614847183227539, |
| "eval_runtime": 68.053, |
| "eval_samples_per_second": 144.476, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_accuracy": 85.84214808787632, |
| "eval_average_metrics": 85.84214808787632, |
| "eval_loss": 0.1623518317937851, |
| "eval_runtime": 67.5192, |
| "eval_samples_per_second": 145.618, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.00011665580182529335, |
| "loss": 0.1441, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_accuracy": 86.06590724165989, |
| "eval_average_metrics": 86.06590724165989, |
| "eval_loss": 0.1657322645187378, |
| "eval_runtime": 65.1547, |
| "eval_samples_per_second": 150.902, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_accuracy": 86.00488201790073, |
| "eval_average_metrics": 86.00488201790073, |
| "eval_loss": 0.16235147416591644, |
| "eval_runtime": 57.9601, |
| "eval_samples_per_second": 169.634, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.00011258148631029986, |
| "loss": 0.1391, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_accuracy": 85.82180634662328, |
| "eval_average_metrics": 85.82180634662328, |
| "eval_loss": 0.15935710072517395, |
| "eval_runtime": 57.3417, |
| "eval_samples_per_second": 171.463, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_accuracy": 85.81163547599675, |
| "eval_average_metrics": 85.81163547599675, |
| "eval_loss": 0.1635563224554062, |
| "eval_runtime": 56.1817, |
| "eval_samples_per_second": 175.003, |
| "step": 23200 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_accuracy": 85.7099267697315, |
| "eval_average_metrics": 85.7099267697315, |
| "eval_loss": 0.16560596227645874, |
| "eval_runtime": 59.2499, |
| "eval_samples_per_second": 165.941, |
| "step": 23400 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.00010850717079530637, |
| "loss": 0.1382, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_accuracy": 86.02522375915377, |
| "eval_average_metrics": 86.02522375915377, |
| "eval_loss": 0.1604122817516327, |
| "eval_runtime": 59.8309, |
| "eval_samples_per_second": 164.33, |
| "step": 23600 |
| }, |
| { |
| "epoch": 1.94, |
| "eval_accuracy": 85.80146460537021, |
| "eval_average_metrics": 85.80146460537021, |
| "eval_loss": 0.16524049639701843, |
| "eval_runtime": 61.3749, |
| "eval_samples_per_second": 160.196, |
| "step": 23800 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.0001044328552803129, |
| "loss": 0.1379, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_accuracy": 85.73026851098454, |
| "eval_average_metrics": 85.73026851098454, |
| "eval_loss": 0.16642265021800995, |
| "eval_runtime": 61.5426, |
| "eval_samples_per_second": 159.759, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_accuracy": 86.23881204231083, |
| "eval_average_metrics": 86.23881204231083, |
| "eval_loss": 0.1592371165752411, |
| "eval_runtime": 61.4113, |
| "eval_samples_per_second": 160.101, |
| "step": 24200 |
| }, |
| { |
| "epoch": 1.99, |
| "eval_accuracy": 86.23881204231083, |
| "eval_average_metrics": 86.23881204231083, |
| "eval_loss": 0.16144132614135742, |
| "eval_runtime": 60.7982, |
| "eval_samples_per_second": 161.715, |
| "step": 24400 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.00010035853976531943, |
| "loss": 0.1454, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 86.09641985353946, |
| "eval_average_metrics": 86.09641985353946, |
| "eval_loss": 0.1667686253786087, |
| "eval_runtime": 60.7535, |
| "eval_samples_per_second": 161.834, |
| "step": 24600 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_accuracy": 86.06590724165989, |
| "eval_average_metrics": 86.06590724165989, |
| "eval_loss": 0.1680220663547516, |
| "eval_runtime": 58.6093, |
| "eval_samples_per_second": 167.755, |
| "step": 24800 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 9.628422425032592e-05, |
| "loss": 0.138, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_accuracy": 85.84214808787632, |
| "eval_average_metrics": 85.84214808787632, |
| "eval_loss": 0.16406849026679993, |
| "eval_runtime": 56.628, |
| "eval_samples_per_second": 173.624, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.05, |
| "eval_accuracy": 85.89300244100895, |
| "eval_average_metrics": 85.89300244100895, |
| "eval_loss": 0.16717499494552612, |
| "eval_runtime": 55.5415, |
| "eval_samples_per_second": 177.021, |
| "step": 25200 |
| }, |
| { |
| "epoch": 2.07, |
| "eval_accuracy": 86.18795768917819, |
| "eval_average_metrics": 86.18795768917819, |
| "eval_loss": 0.16641969978809357, |
| "eval_runtime": 52.5873, |
| "eval_samples_per_second": 186.965, |
| "step": 25400 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 9.220990873533245e-05, |
| "loss": 0.1356, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_accuracy": 86.0353946297803, |
| "eval_average_metrics": 86.0353946297803, |
| "eval_loss": 0.16517092287540436, |
| "eval_runtime": 54.8202, |
| "eval_samples_per_second": 179.35, |
| "step": 25600 |
| }, |
| { |
| "epoch": 2.1, |
| "eval_accuracy": 86.20829943043124, |
| "eval_average_metrics": 86.20829943043124, |
| "eval_loss": 0.1606525331735611, |
| "eval_runtime": 58.0473, |
| "eval_samples_per_second": 169.379, |
| "step": 25800 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 8.813559322033898e-05, |
| "loss": 0.138, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.12, |
| "eval_accuracy": 85.75061025223759, |
| "eval_average_metrics": 85.75061025223759, |
| "eval_loss": 0.1638970524072647, |
| "eval_runtime": 60.2198, |
| "eval_samples_per_second": 163.268, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.13, |
| "eval_accuracy": 85.913344182262, |
| "eval_average_metrics": 85.913344182262, |
| "eval_loss": 0.16841016709804535, |
| "eval_runtime": 59.4106, |
| "eval_samples_per_second": 165.492, |
| "step": 26200 |
| }, |
| { |
| "epoch": 2.15, |
| "eval_accuracy": 86.11676159479251, |
| "eval_average_metrics": 86.11676159479251, |
| "eval_loss": 0.16477040946483612, |
| "eval_runtime": 59.3558, |
| "eval_samples_per_second": 165.645, |
| "step": 26400 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 8.406127770534549e-05, |
| "loss": 0.1315, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_accuracy": 85.56753458096013, |
| "eval_average_metrics": 85.56753458096013, |
| "eval_loss": 0.17194555699825287, |
| "eval_runtime": 59.1291, |
| "eval_samples_per_second": 166.28, |
| "step": 26600 |
| }, |
| { |
| "epoch": 2.18, |
| "eval_accuracy": 85.94385679414158, |
| "eval_average_metrics": 85.94385679414158, |
| "eval_loss": 0.16446976363658905, |
| "eval_runtime": 46.5169, |
| "eval_samples_per_second": 211.364, |
| "step": 26800 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 7.998696219035201e-05, |
| "loss": 0.1332, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_accuracy": 85.81163547599675, |
| "eval_average_metrics": 85.81163547599675, |
| "eval_loss": 0.1695818454027176, |
| "eval_runtime": 45.2415, |
| "eval_samples_per_second": 217.323, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.22, |
| "eval_accuracy": 85.93368592351506, |
| "eval_average_metrics": 85.93368592351506, |
| "eval_loss": 0.1703195720911026, |
| "eval_runtime": 45.7196, |
| "eval_samples_per_second": 215.05, |
| "step": 27200 |
| }, |
| { |
| "epoch": 2.23, |
| "eval_accuracy": 86.14727420667208, |
| "eval_average_metrics": 86.14727420667208, |
| "eval_loss": 0.1657610833644867, |
| "eval_runtime": 48.3877, |
| "eval_samples_per_second": 203.192, |
| "step": 27400 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 7.591264667535854e-05, |
| "loss": 0.1354, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.25, |
| "eval_accuracy": 86.09641985353946, |
| "eval_average_metrics": 86.09641985353946, |
| "eval_loss": 0.1658048778772354, |
| "eval_runtime": 49.7336, |
| "eval_samples_per_second": 197.693, |
| "step": 27600 |
| }, |
| { |
| "epoch": 2.27, |
| "eval_accuracy": 85.9947111472742, |
| "eval_average_metrics": 85.9947111472742, |
| "eval_loss": 0.16919544339179993, |
| "eval_runtime": 56.1273, |
| "eval_samples_per_second": 175.173, |
| "step": 27800 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 7.183833116036505e-05, |
| "loss": 0.1311, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.28, |
| "eval_accuracy": 86.14727420667208, |
| "eval_average_metrics": 86.14727420667208, |
| "eval_loss": 0.16675373911857605, |
| "eval_runtime": 48.3863, |
| "eval_samples_per_second": 203.198, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_accuracy": 85.73026851098454, |
| "eval_average_metrics": 85.73026851098454, |
| "eval_loss": 0.16785795986652374, |
| "eval_runtime": 49.8131, |
| "eval_samples_per_second": 197.378, |
| "step": 28200 |
| }, |
| { |
| "epoch": 2.31, |
| "eval_accuracy": 85.62855980471929, |
| "eval_average_metrics": 85.62855980471929, |
| "eval_loss": 0.1680869162082672, |
| "eval_runtime": 49.9453, |
| "eval_samples_per_second": 196.855, |
| "step": 28400 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 6.776401564537158e-05, |
| "loss": 0.1351, |
| "step": 28500 |
| }, |
| { |
| "epoch": 2.33, |
| "eval_accuracy": 85.86248982912937, |
| "eval_average_metrics": 85.86248982912937, |
| "eval_loss": 0.16826093196868896, |
| "eval_runtime": 47.9335, |
| "eval_samples_per_second": 205.117, |
| "step": 28600 |
| }, |
| { |
| "epoch": 2.35, |
| "eval_accuracy": 85.84214808787632, |
| "eval_average_metrics": 85.84214808787632, |
| "eval_loss": 0.16651229560375214, |
| "eval_runtime": 49.892, |
| "eval_samples_per_second": 197.066, |
| "step": 28800 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 6.368970013037809e-05, |
| "loss": 0.1347, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.36, |
| "eval_accuracy": 86.01505288852725, |
| "eval_average_metrics": 86.01505288852725, |
| "eval_loss": 0.16772997379302979, |
| "eval_runtime": 49.8947, |
| "eval_samples_per_second": 197.055, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_accuracy": 86.2693246541904, |
| "eval_average_metrics": 86.2693246541904, |
| "eval_loss": 0.16580338776111603, |
| "eval_runtime": 50.1072, |
| "eval_samples_per_second": 196.219, |
| "step": 29200 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_accuracy": 86.20829943043124, |
| "eval_average_metrics": 86.20829943043124, |
| "eval_loss": 0.16437767446041107, |
| "eval_runtime": 52.1067, |
| "eval_samples_per_second": 188.69, |
| "step": 29400 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 5.961538461538461e-05, |
| "loss": 0.1308, |
| "step": 29500 |
| }, |
| { |
| "epoch": 2.41, |
| "eval_accuracy": 86.10659072416598, |
| "eval_average_metrics": 86.10659072416598, |
| "eval_loss": 0.16909147799015045, |
| "eval_runtime": 48.4579, |
| "eval_samples_per_second": 202.898, |
| "step": 29600 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_accuracy": 86.07607811228641, |
| "eval_average_metrics": 86.07607811228641, |
| "eval_loss": 0.16544800996780396, |
| "eval_runtime": 53.3838, |
| "eval_samples_per_second": 184.176, |
| "step": 29800 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 5.554106910039113e-05, |
| "loss": 0.1301, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.44, |
| "eval_accuracy": 86.15744507729862, |
| "eval_average_metrics": 86.15744507729862, |
| "eval_loss": 0.16652615368366241, |
| "eval_runtime": 45.6168, |
| "eval_samples_per_second": 215.535, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.46, |
| "eval_accuracy": 86.00488201790073, |
| "eval_average_metrics": 86.00488201790073, |
| "eval_loss": 0.16784194111824036, |
| "eval_runtime": 46.0567, |
| "eval_samples_per_second": 213.476, |
| "step": 30200 |
| }, |
| { |
| "epoch": 2.48, |
| "eval_accuracy": 86.12693246541903, |
| "eval_average_metrics": 86.12693246541903, |
| "eval_loss": 0.16726775467395782, |
| "eval_runtime": 53.9156, |
| "eval_samples_per_second": 182.359, |
| "step": 30400 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 5.146675358539765e-05, |
| "loss": 0.1324, |
| "step": 30500 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_accuracy": 86.2693246541904, |
| "eval_average_metrics": 86.2693246541904, |
| "eval_loss": 0.16430824995040894, |
| "eval_runtime": 48.9757, |
| "eval_samples_per_second": 200.753, |
| "step": 30600 |
| }, |
| { |
| "epoch": 2.51, |
| "eval_accuracy": 85.88283157038242, |
| "eval_average_metrics": 85.88283157038242, |
| "eval_loss": 0.16779069602489471, |
| "eval_runtime": 45.747, |
| "eval_samples_per_second": 214.921, |
| "step": 30800 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 4.7392438070404173e-05, |
| "loss": 0.1333, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.53, |
| "eval_accuracy": 86.12693246541903, |
| "eval_average_metrics": 86.12693246541903, |
| "eval_loss": 0.16358087956905365, |
| "eval_runtime": 45.8832, |
| "eval_samples_per_second": 214.283, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.54, |
| "eval_accuracy": 86.20829943043124, |
| "eval_average_metrics": 86.20829943043124, |
| "eval_loss": 0.16272908449172974, |
| "eval_runtime": 44.9382, |
| "eval_samples_per_second": 218.789, |
| "step": 31200 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_accuracy": 86.21847030105776, |
| "eval_average_metrics": 86.21847030105776, |
| "eval_loss": 0.16233167052268982, |
| "eval_runtime": 45.3879, |
| "eval_samples_per_second": 216.621, |
| "step": 31400 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 4.3318122555410686e-05, |
| "loss": 0.1366, |
| "step": 31500 |
| }, |
| { |
| "epoch": 2.57, |
| "eval_accuracy": 86.12693246541903, |
| "eval_average_metrics": 86.12693246541903, |
| "eval_loss": 0.16067072749137878, |
| "eval_runtime": 45.3275, |
| "eval_samples_per_second": 216.91, |
| "step": 31600 |
| }, |
| { |
| "epoch": 2.59, |
| "eval_accuracy": 86.00488201790073, |
| "eval_average_metrics": 86.00488201790073, |
| "eval_loss": 0.16438935697078705, |
| "eval_runtime": 45.069, |
| "eval_samples_per_second": 218.154, |
| "step": 31800 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 3.924380704041721e-05, |
| "loss": 0.1401, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_accuracy": 86.13710333604556, |
| "eval_average_metrics": 86.13710333604556, |
| "eval_loss": 0.15911179780960083, |
| "eval_runtime": 45.1936, |
| "eval_samples_per_second": 217.553, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.62, |
| "eval_accuracy": 86.0353946297803, |
| "eval_average_metrics": 86.0353946297803, |
| "eval_loss": 0.16360121965408325, |
| "eval_runtime": 46.2972, |
| "eval_samples_per_second": 212.367, |
| "step": 32200 |
| }, |
| { |
| "epoch": 2.64, |
| "eval_accuracy": 86.19812855980472, |
| "eval_average_metrics": 86.19812855980472, |
| "eval_loss": 0.1620582789182663, |
| "eval_runtime": 45.431, |
| "eval_samples_per_second": 216.416, |
| "step": 32400 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 3.5169491525423724e-05, |
| "loss": 0.1343, |
| "step": 32500 |
| }, |
| { |
| "epoch": 2.66, |
| "eval_accuracy": 86.08624898291293, |
| "eval_average_metrics": 86.08624898291293, |
| "eval_loss": 0.16568879783153534, |
| "eval_runtime": 45.3557, |
| "eval_samples_per_second": 216.775, |
| "step": 32600 |
| }, |
| { |
| "epoch": 2.67, |
| "eval_accuracy": 86.3506916192026, |
| "eval_average_metrics": 86.3506916192026, |
| "eval_loss": 0.16203464567661285, |
| "eval_runtime": 44.9934, |
| "eval_samples_per_second": 218.521, |
| "step": 32800 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 3.109517601043025e-05, |
| "loss": 0.1345, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.69, |
| "eval_accuracy": 86.2286411716843, |
| "eval_average_metrics": 86.2286411716843, |
| "eval_loss": 0.1651608943939209, |
| "eval_runtime": 45.6781, |
| "eval_samples_per_second": 215.246, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.71, |
| "eval_accuracy": 86.28966639544345, |
| "eval_average_metrics": 86.28966639544345, |
| "eval_loss": 0.16327986121177673, |
| "eval_runtime": 45.4189, |
| "eval_samples_per_second": 216.474, |
| "step": 33200 |
| }, |
| { |
| "epoch": 2.72, |
| "eval_accuracy": 86.32017900732303, |
| "eval_average_metrics": 86.32017900732303, |
| "eval_loss": 0.16431905329227448, |
| "eval_runtime": 44.9451, |
| "eval_samples_per_second": 218.756, |
| "step": 33400 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 2.7020860495436762e-05, |
| "loss": 0.1321, |
| "step": 33500 |
| }, |
| { |
| "epoch": 2.74, |
| "eval_accuracy": 86.12693246541903, |
| "eval_average_metrics": 86.12693246541903, |
| "eval_loss": 0.16271112859249115, |
| "eval_runtime": 45.4697, |
| "eval_samples_per_second": 216.232, |
| "step": 33600 |
| }, |
| { |
| "epoch": 2.75, |
| "eval_accuracy": 86.27949552481692, |
| "eval_average_metrics": 86.27949552481692, |
| "eval_loss": 0.16375945508480072, |
| "eval_runtime": 45.4502, |
| "eval_samples_per_second": 216.325, |
| "step": 33800 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 2.294654498044328e-05, |
| "loss": 0.1348, |
| "step": 34000 |
| }, |
| { |
| "epoch": 2.77, |
| "eval_accuracy": 86.12693246541903, |
| "eval_average_metrics": 86.12693246541903, |
| "eval_loss": 0.16407504677772522, |
| "eval_runtime": 45.1689, |
| "eval_samples_per_second": 217.672, |
| "step": 34000 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_accuracy": 86.2693246541904, |
| "eval_average_metrics": 86.2693246541904, |
| "eval_loss": 0.16450707614421844, |
| "eval_runtime": 45.2377, |
| "eval_samples_per_second": 217.341, |
| "step": 34200 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_accuracy": 86.10659072416598, |
| "eval_average_metrics": 86.10659072416598, |
| "eval_loss": 0.16434065997600555, |
| "eval_runtime": 45.1061, |
| "eval_samples_per_second": 217.975, |
| "step": 34400 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1.8872229465449803e-05, |
| "loss": 0.1361, |
| "step": 34500 |
| }, |
| { |
| "epoch": 2.82, |
| "eval_accuracy": 86.2286411716843, |
| "eval_average_metrics": 86.2286411716843, |
| "eval_loss": 0.16249413788318634, |
| "eval_runtime": 45.4837, |
| "eval_samples_per_second": 216.165, |
| "step": 34600 |
| }, |
| { |
| "epoch": 2.84, |
| "eval_accuracy": 86.14727420667208, |
| "eval_average_metrics": 86.14727420667208, |
| "eval_loss": 0.1645725518465042, |
| "eval_runtime": 45.3804, |
| "eval_samples_per_second": 216.657, |
| "step": 34800 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.4797913950456322e-05, |
| "loss": 0.1335, |
| "step": 35000 |
| }, |
| { |
| "epoch": 2.85, |
| "eval_accuracy": 86.16761594792514, |
| "eval_average_metrics": 86.16761594792514, |
| "eval_loss": 0.16331711411476135, |
| "eval_runtime": 45.6878, |
| "eval_samples_per_second": 215.2, |
| "step": 35000 |
| }, |
| { |
| "epoch": 2.87, |
| "eval_accuracy": 86.12693246541903, |
| "eval_average_metrics": 86.12693246541903, |
| "eval_loss": 0.16130615770816803, |
| "eval_runtime": 45.0413, |
| "eval_samples_per_second": 218.289, |
| "step": 35200 |
| }, |
| { |
| "epoch": 2.88, |
| "eval_accuracy": 86.25915378356387, |
| "eval_average_metrics": 86.25915378356387, |
| "eval_loss": 0.16118405759334564, |
| "eval_runtime": 45.4606, |
| "eval_samples_per_second": 216.275, |
| "step": 35400 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.0723598435462841e-05, |
| "loss": 0.1368, |
| "step": 35500 |
| }, |
| { |
| "epoch": 2.9, |
| "eval_accuracy": 86.20829943043124, |
| "eval_average_metrics": 86.20829943043124, |
| "eval_loss": 0.16242747008800507, |
| "eval_runtime": 45.4511, |
| "eval_samples_per_second": 216.32, |
| "step": 35600 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_accuracy": 86.16761594792514, |
| "eval_average_metrics": 86.16761594792514, |
| "eval_loss": 0.16256776452064514, |
| "eval_runtime": 45.2281, |
| "eval_samples_per_second": 217.387, |
| "step": 35800 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 6.649282920469361e-06, |
| "loss": 0.1323, |
| "step": 36000 |
| }, |
| { |
| "epoch": 2.93, |
| "eval_accuracy": 86.33034987794956, |
| "eval_average_metrics": 86.33034987794956, |
| "eval_loss": 0.1619912087917328, |
| "eval_runtime": 45.4939, |
| "eval_samples_per_second": 216.117, |
| "step": 36000 |
| }, |
| { |
| "epoch": 2.95, |
| "eval_accuracy": 86.20829943043124, |
| "eval_average_metrics": 86.20829943043124, |
| "eval_loss": 0.1618933379650116, |
| "eval_runtime": 45.7568, |
| "eval_samples_per_second": 214.875, |
| "step": 36200 |
| }, |
| { |
| "epoch": 2.97, |
| "eval_accuracy": 86.17778681855167, |
| "eval_average_metrics": 86.17778681855167, |
| "eval_loss": 0.1621612161397934, |
| "eval_runtime": 44.9098, |
| "eval_samples_per_second": 218.928, |
| "step": 36400 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 2.5749674054758798e-06, |
| "loss": 0.1334, |
| "step": 36500 |
| }, |
| { |
| "epoch": 2.98, |
| "eval_accuracy": 86.21847030105776, |
| "eval_average_metrics": 86.21847030105776, |
| "eval_loss": 0.1621207445859909, |
| "eval_runtime": 45.2906, |
| "eval_samples_per_second": 217.087, |
| "step": 36600 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 86.19812855980472, |
| "eval_average_metrics": 86.19812855980472, |
| "eval_loss": 0.16204114258289337, |
| "eval_runtime": 45.2396, |
| "eval_samples_per_second": 217.332, |
| "step": 36800 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 36816, |
| "total_flos": 1.4734111386140467e+17, |
| "train_loss": 0.14615808979732375, |
| "train_runtime": 23668.4692, |
| "train_samples_per_second": 49.775, |
| "train_steps_per_second": 1.555 |
| } |
| ], |
| "max_steps": 36816, |
| "num_train_epochs": 3, |
| "total_flos": 1.4734111386140467e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|