{ "best_global_step": 2184, "best_metric": 0.862, "best_model_checkpoint": "dfine_squirrel/checkpoint-2184", "epoch": 30.0, "eval_steps": 500, "global_step": 2730, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 2.367178440093994, "eval_map": 0.1766, "eval_map_50": 0.23, "eval_map_75": 0.1862, "eval_map_class_0": 0.1766, "eval_map_large": 0.1989, "eval_map_medium": 0.0031, "eval_map_small": 0.0, "eval_mar_1": 0.3422, "eval_mar_10": 0.5248, "eval_mar_100": 0.6224, "eval_mar_100_class_0": 0.6224, "eval_mar_large": 0.651, "eval_mar_medium": 0.3462, "eval_mar_small": 0.0, "eval_runtime": 6.6845, "eval_samples_per_second": 19.149, "eval_steps_per_second": 2.394, "step": 91 }, { "epoch": 2.0, "eval_loss": 1.0650697946548462, "eval_map": 0.7244, "eval_map_50": 0.8127, "eval_map_75": 0.7856, "eval_map_class_0": 0.7244, "eval_map_large": 0.7538, "eval_map_medium": 0.4261, "eval_map_small": 0.0, "eval_mar_1": 0.6901, "eval_mar_10": 0.7888, "eval_mar_100": 0.8596, "eval_mar_100_class_0": 0.8596, "eval_mar_large": 0.885, "eval_mar_medium": 0.6385, "eval_mar_small": 0.0, "eval_runtime": 6.633, "eval_samples_per_second": 19.298, "eval_steps_per_second": 2.412, "step": 182 }, { "epoch": 3.0, "eval_loss": 0.8899274468421936, "eval_map": 0.7577, "eval_map_50": 0.8321, "eval_map_75": 0.7881, "eval_map_class_0": 0.7577, "eval_map_large": 0.7986, "eval_map_medium": 0.4732, "eval_map_small": 0.0, "eval_mar_1": 0.7137, "eval_mar_10": 0.8311, "eval_mar_100": 0.8863, "eval_mar_100_class_0": 0.8863, "eval_mar_large": 0.9109, "eval_mar_medium": 0.6769, "eval_mar_small": 0.0, "eval_runtime": 6.5407, "eval_samples_per_second": 19.57, "eval_steps_per_second": 2.446, "step": 273 }, { "epoch": 4.0, "eval_loss": 0.8310188055038452, "eval_map": 0.7773, "eval_map_50": 0.8432, "eval_map_75": 0.8045, "eval_map_class_0": 0.7773, "eval_map_large": 0.814, "eval_map_medium": 0.586, "eval_map_small": 0.0, "eval_mar_1": 0.7124, "eval_mar_10": 0.841, "eval_mar_100": 0.8894, "eval_mar_100_class_0": 0.8894, "eval_mar_large": 0.9122, "eval_mar_medium": 0.7, "eval_mar_small": 0.0, "eval_runtime": 6.599, "eval_samples_per_second": 19.397, "eval_steps_per_second": 2.425, "step": 364 }, { "epoch": 5.0, "eval_loss": 0.8332623243331909, "eval_map": 0.7792, "eval_map_50": 0.8513, "eval_map_75": 0.8058, "eval_map_class_0": 0.7792, "eval_map_large": 0.8197, "eval_map_medium": 0.6129, "eval_map_small": 0.0, "eval_mar_1": 0.718, "eval_mar_10": 0.8317, "eval_mar_100": 0.9031, "eval_mar_100_class_0": 0.9031, "eval_mar_large": 0.9265, "eval_mar_medium": 0.7077, "eval_mar_small": 0.0, "eval_runtime": 6.61, "eval_samples_per_second": 19.365, "eval_steps_per_second": 2.421, "step": 455 }, { "epoch": 5.4945054945054945, "grad_norm": 45.534889221191406, "learning_rate": 4.086080586080586e-05, "loss": 27.38396484375, "step": 500 }, { "epoch": 6.0, "eval_loss": 0.8361273407936096, "eval_map": 0.8008, "eval_map_50": 0.8684, "eval_map_75": 0.8296, "eval_map_class_0": 0.8008, "eval_map_large": 0.8348, "eval_map_medium": 0.5788, "eval_map_small": 0.0, "eval_mar_1": 0.723, "eval_mar_10": 0.8609, "eval_mar_100": 0.9124, "eval_mar_100_class_0": 0.9124, "eval_mar_large": 0.9367, "eval_mar_medium": 0.7077, "eval_mar_small": 0.0, "eval_runtime": 6.591, "eval_samples_per_second": 19.42, "eval_steps_per_second": 2.428, "step": 546 }, { "epoch": 7.0, "eval_loss": 0.8077548146247864, "eval_map": 0.8, "eval_map_50": 0.8587, "eval_map_75": 0.8191, "eval_map_class_0": 0.8, "eval_map_large": 0.8365, "eval_map_medium": 0.5968, "eval_map_small": 0.0, "eval_mar_1": 0.7224, "eval_mar_10": 0.8739, "eval_mar_100": 0.9211, "eval_mar_100_class_0": 0.9211, "eval_mar_large": 0.9442, "eval_mar_medium": 0.7308, "eval_mar_small": 0.0, "eval_runtime": 6.4826, "eval_samples_per_second": 19.745, "eval_steps_per_second": 2.468, "step": 637 }, { "epoch": 8.0, "eval_loss": 0.8231956958770752, "eval_map": 0.8173, "eval_map_50": 0.8877, "eval_map_75": 0.837, "eval_map_class_0": 0.8173, "eval_map_large": 0.8476, "eval_map_medium": 0.6199, "eval_map_small": 0.0, "eval_mar_1": 0.7205, "eval_mar_10": 0.8677, "eval_mar_100": 0.9161, "eval_mar_100_class_0": 0.9161, "eval_mar_large": 0.9415, "eval_mar_medium": 0.7, "eval_mar_small": 0.0, "eval_runtime": 6.4352, "eval_samples_per_second": 19.891, "eval_steps_per_second": 2.486, "step": 728 }, { "epoch": 9.0, "eval_loss": 0.7746098041534424, "eval_map": 0.8268, "eval_map_50": 0.8863, "eval_map_75": 0.8541, "eval_map_class_0": 0.8268, "eval_map_large": 0.8577, "eval_map_medium": 0.6029, "eval_map_small": 0.0, "eval_mar_1": 0.7211, "eval_mar_10": 0.8702, "eval_mar_100": 0.9224, "eval_mar_100_class_0": 0.9224, "eval_mar_large": 0.951, "eval_mar_medium": 0.6692, "eval_mar_small": 0.0, "eval_runtime": 6.6084, "eval_samples_per_second": 19.369, "eval_steps_per_second": 2.421, "step": 819 }, { "epoch": 10.0, "eval_loss": 0.8223000764846802, "eval_map": 0.8324, "eval_map_50": 0.8875, "eval_map_75": 0.8682, "eval_map_class_0": 0.8324, "eval_map_large": 0.8586, "eval_map_medium": 0.6198, "eval_map_small": 0.0, "eval_mar_1": 0.7311, "eval_mar_10": 0.8801, "eval_mar_100": 0.9193, "eval_mar_100_class_0": 0.9193, "eval_mar_large": 0.9456, "eval_mar_medium": 0.6923, "eval_mar_small": 0.0, "eval_runtime": 6.6591, "eval_samples_per_second": 19.222, "eval_steps_per_second": 2.403, "step": 910 }, { "epoch": 10.989010989010989, "grad_norm": 34.31671142578125, "learning_rate": 3.17032967032967e-05, "loss": 9.0921865234375, "step": 1000 }, { "epoch": 11.0, "eval_loss": 0.7678491473197937, "eval_map": 0.8327, "eval_map_50": 0.8851, "eval_map_75": 0.8689, "eval_map_class_0": 0.8327, "eval_map_large": 0.8634, "eval_map_medium": 0.5991, "eval_map_small": 0.0, "eval_mar_1": 0.7422, "eval_mar_10": 0.8671, "eval_mar_100": 0.9255, "eval_mar_100_class_0": 0.9255, "eval_mar_large": 0.9531, "eval_mar_medium": 0.6846, "eval_mar_small": 0.0, "eval_runtime": 6.6366, "eval_samples_per_second": 19.287, "eval_steps_per_second": 2.411, "step": 1001 }, { "epoch": 12.0, "eval_loss": 0.7850316166877747, "eval_map": 0.8328, "eval_map_50": 0.8888, "eval_map_75": 0.8545, "eval_map_class_0": 0.8328, "eval_map_large": 0.8635, "eval_map_medium": 0.6134, "eval_map_small": 0.0, "eval_mar_1": 0.7366, "eval_mar_10": 0.8752, "eval_mar_100": 0.9335, "eval_mar_100_class_0": 0.9335, "eval_mar_large": 0.9585, "eval_mar_medium": 0.7231, "eval_mar_small": 0.0, "eval_runtime": 6.866, "eval_samples_per_second": 18.643, "eval_steps_per_second": 2.33, "step": 1092 }, { "epoch": 13.0, "eval_loss": 0.8330104351043701, "eval_map": 0.8294, "eval_map_50": 0.8882, "eval_map_75": 0.8485, "eval_map_class_0": 0.8294, "eval_map_large": 0.8585, "eval_map_medium": 0.583, "eval_map_small": 0.0, "eval_mar_1": 0.7416, "eval_mar_10": 0.8689, "eval_mar_100": 0.9267, "eval_mar_100_class_0": 0.9267, "eval_mar_large": 0.9503, "eval_mar_medium": 0.7308, "eval_mar_small": 0.0, "eval_runtime": 6.6667, "eval_samples_per_second": 19.2, "eval_steps_per_second": 2.4, "step": 1183 }, { "epoch": 14.0, "eval_loss": 0.7800494432449341, "eval_map": 0.8399, "eval_map_50": 0.8977, "eval_map_75": 0.8761, "eval_map_class_0": 0.8399, "eval_map_large": 0.87, "eval_map_medium": 0.5989, "eval_map_small": 0.0056, "eval_mar_1": 0.741, "eval_mar_10": 0.8758, "eval_mar_100": 0.9348, "eval_mar_100_class_0": 0.9348, "eval_mar_large": 0.9517, "eval_mar_medium": 0.7692, "eval_mar_small": 0.6, "eval_runtime": 6.5377, "eval_samples_per_second": 19.579, "eval_steps_per_second": 2.447, "step": 1274 }, { "epoch": 15.0, "eval_loss": 0.7594577074050903, "eval_map": 0.8437, "eval_map_50": 0.8993, "eval_map_75": 0.8771, "eval_map_class_0": 0.8437, "eval_map_large": 0.8731, "eval_map_medium": 0.6092, "eval_map_small": 0.0061, "eval_mar_1": 0.7509, "eval_mar_10": 0.8925, "eval_mar_100": 0.9466, "eval_mar_100_class_0": 0.9466, "eval_mar_large": 0.9633, "eval_mar_medium": 0.7846, "eval_mar_small": 0.6, "eval_runtime": 6.6179, "eval_samples_per_second": 19.341, "eval_steps_per_second": 2.418, "step": 1365 }, { "epoch": 16.0, "eval_loss": 0.7757935523986816, "eval_map": 0.843, "eval_map_50": 0.9059, "eval_map_75": 0.8657, "eval_map_class_0": 0.843, "eval_map_large": 0.8713, "eval_map_medium": 0.6186, "eval_map_small": 0.0, "eval_mar_1": 0.7472, "eval_mar_10": 0.8783, "eval_mar_100": 0.9385, "eval_mar_100_class_0": 0.9385, "eval_mar_large": 0.9633, "eval_mar_medium": 0.7308, "eval_mar_small": 0.0, "eval_runtime": 6.6266, "eval_samples_per_second": 19.316, "eval_steps_per_second": 2.415, "step": 1456 }, { "epoch": 16.483516483516482, "grad_norm": 38.72512435913086, "learning_rate": 2.2545787545787548e-05, "loss": 8.323384765625, "step": 1500 }, { "epoch": 17.0, "eval_loss": 0.8078031539916992, "eval_map": 0.844, "eval_map_50": 0.9017, "eval_map_75": 0.8816, "eval_map_class_0": 0.844, "eval_map_large": 0.8754, "eval_map_medium": 0.6091, "eval_map_small": 0.009, "eval_mar_1": 0.7453, "eval_mar_10": 0.8957, "eval_mar_100": 0.9366, "eval_mar_100_class_0": 0.9366, "eval_mar_large": 0.9592, "eval_mar_medium": 0.7077, "eval_mar_small": 0.6, "eval_runtime": 6.5387, "eval_samples_per_second": 19.576, "eval_steps_per_second": 2.447, "step": 1547 }, { "epoch": 18.0, "eval_loss": 0.771867036819458, "eval_map": 0.8479, "eval_map_50": 0.9069, "eval_map_75": 0.8728, "eval_map_class_0": 0.8479, "eval_map_large": 0.8785, "eval_map_medium": 0.6086, "eval_map_small": 0.0, "eval_mar_1": 0.7522, "eval_mar_10": 0.8882, "eval_mar_100": 0.9273, "eval_mar_100_class_0": 0.9273, "eval_mar_large": 0.9531, "eval_mar_medium": 0.7077, "eval_mar_small": 0.0, "eval_runtime": 6.8246, "eval_samples_per_second": 18.756, "eval_steps_per_second": 2.344, "step": 1638 }, { "epoch": 19.0, "eval_loss": 0.756177544593811, "eval_map": 0.8552, "eval_map_50": 0.9105, "eval_map_75": 0.8779, "eval_map_class_0": 0.8552, "eval_map_large": 0.8853, "eval_map_medium": 0.6193, "eval_map_small": 0.0, "eval_mar_1": 0.7509, "eval_mar_10": 0.8882, "eval_mar_100": 0.9323, "eval_mar_100_class_0": 0.9323, "eval_mar_large": 0.9585, "eval_mar_medium": 0.7077, "eval_mar_small": 0.0, "eval_runtime": 6.5483, "eval_samples_per_second": 19.547, "eval_steps_per_second": 2.443, "step": 1729 }, { "epoch": 20.0, "eval_loss": 0.8165637850761414, "eval_map": 0.8478, "eval_map_50": 0.9076, "eval_map_75": 0.8774, "eval_map_class_0": 0.8478, "eval_map_large": 0.8765, "eval_map_medium": 0.6428, "eval_map_small": 0.0, "eval_mar_1": 0.7528, "eval_mar_10": 0.8988, "eval_mar_100": 0.9379, "eval_mar_100_class_0": 0.9379, "eval_mar_large": 0.9633, "eval_mar_medium": 0.7231, "eval_mar_small": 0.0, "eval_runtime": 6.5543, "eval_samples_per_second": 19.529, "eval_steps_per_second": 2.441, "step": 1820 }, { "epoch": 21.0, "eval_loss": 0.7615771293640137, "eval_map": 0.8519, "eval_map_50": 0.9053, "eval_map_75": 0.8843, "eval_map_class_0": 0.8519, "eval_map_large": 0.8818, "eval_map_medium": 0.6332, "eval_map_small": 0.0, "eval_mar_1": 0.7578, "eval_mar_10": 0.8919, "eval_mar_100": 0.9385, "eval_mar_100_class_0": 0.9385, "eval_mar_large": 0.9646, "eval_mar_medium": 0.7154, "eval_mar_small": 0.0, "eval_runtime": 6.6955, "eval_samples_per_second": 19.117, "eval_steps_per_second": 2.39, "step": 1911 }, { "epoch": 21.978021978021978, "grad_norm": 37.53008270263672, "learning_rate": 1.3388278388278389e-05, "loss": 7.99486328125, "step": 2000 }, { "epoch": 22.0, "eval_loss": 0.7444530129432678, "eval_map": 0.857, "eval_map_50": 0.9102, "eval_map_75": 0.8872, "eval_map_class_0": 0.857, "eval_map_large": 0.8856, "eval_map_medium": 0.6327, "eval_map_small": 0.0, "eval_mar_1": 0.7497, "eval_mar_10": 0.8839, "eval_mar_100": 0.9354, "eval_mar_100_class_0": 0.9354, "eval_mar_large": 0.9605, "eval_mar_medium": 0.7231, "eval_mar_small": 0.0, "eval_runtime": 6.5313, "eval_samples_per_second": 19.598, "eval_steps_per_second": 2.45, "step": 2002 }, { "epoch": 23.0, "eval_loss": 0.7569313645362854, "eval_map": 0.85, "eval_map_50": 0.9003, "eval_map_75": 0.8792, "eval_map_class_0": 0.85, "eval_map_large": 0.8802, "eval_map_medium": 0.6308, "eval_map_small": 0.0, "eval_mar_1": 0.7516, "eval_mar_10": 0.9019, "eval_mar_100": 0.9366, "eval_mar_100_class_0": 0.9366, "eval_mar_large": 0.9626, "eval_mar_medium": 0.7154, "eval_mar_small": 0.0, "eval_runtime": 6.5869, "eval_samples_per_second": 19.433, "eval_steps_per_second": 2.429, "step": 2093 }, { "epoch": 24.0, "eval_loss": 0.755048930644989, "eval_map": 0.862, "eval_map_50": 0.9093, "eval_map_75": 0.8837, "eval_map_class_0": 0.862, "eval_map_large": 0.8917, "eval_map_medium": 0.6277, "eval_map_small": 0.0, "eval_mar_1": 0.7516, "eval_mar_10": 0.8882, "eval_mar_100": 0.9441, "eval_mar_100_class_0": 0.9441, "eval_mar_large": 0.9653, "eval_mar_medium": 0.7769, "eval_mar_small": 0.0, "eval_runtime": 6.6153, "eval_samples_per_second": 19.349, "eval_steps_per_second": 2.419, "step": 2184 }, { "epoch": 25.0, "eval_loss": 0.7434877753257751, "eval_map": 0.8561, "eval_map_50": 0.9053, "eval_map_75": 0.8875, "eval_map_class_0": 0.8561, "eval_map_large": 0.885, "eval_map_medium": 0.6371, "eval_map_small": 0.0055, "eval_mar_1": 0.7559, "eval_mar_10": 0.8981, "eval_mar_100": 0.9478, "eval_mar_100_class_0": 0.9478, "eval_mar_large": 0.9633, "eval_mar_medium": 0.7923, "eval_mar_small": 0.7, "eval_runtime": 6.5533, "eval_samples_per_second": 19.532, "eval_steps_per_second": 2.442, "step": 2275 }, { "epoch": 26.0, "eval_loss": 0.7752219438552856, "eval_map": 0.8441, "eval_map_50": 0.8961, "eval_map_75": 0.8669, "eval_map_class_0": 0.8441, "eval_map_large": 0.8717, "eval_map_medium": 0.6392, "eval_map_small": 0.0, "eval_mar_1": 0.7553, "eval_mar_10": 0.8857, "eval_mar_100": 0.9398, "eval_mar_100_class_0": 0.9398, "eval_mar_large": 0.9592, "eval_mar_medium": 0.7923, "eval_mar_small": 0.0, "eval_runtime": 6.5969, "eval_samples_per_second": 19.403, "eval_steps_per_second": 2.425, "step": 2366 }, { "epoch": 27.0, "eval_loss": 0.7504961490631104, "eval_map": 0.8505, "eval_map_50": 0.8966, "eval_map_75": 0.8745, "eval_map_class_0": 0.8505, "eval_map_large": 0.88, "eval_map_medium": 0.6337, "eval_map_small": 0.0, "eval_mar_1": 0.7565, "eval_mar_10": 0.8857, "eval_mar_100": 0.9441, "eval_mar_100_class_0": 0.9441, "eval_mar_large": 0.9639, "eval_mar_medium": 0.7923, "eval_mar_small": 0.0, "eval_runtime": 6.5131, "eval_samples_per_second": 19.653, "eval_steps_per_second": 2.457, "step": 2457 }, { "epoch": 27.47252747252747, "grad_norm": 72.2485580444336, "learning_rate": 4.230769230769231e-06, "loss": 7.68200732421875, "step": 2500 }, { "epoch": 28.0, "eval_loss": 0.7449190616607666, "eval_map": 0.8533, "eval_map_50": 0.9012, "eval_map_75": 0.8801, "eval_map_class_0": 0.8533, "eval_map_large": 0.8826, "eval_map_medium": 0.627, "eval_map_small": 0.0, "eval_mar_1": 0.7553, "eval_mar_10": 0.8901, "eval_mar_100": 0.9416, "eval_mar_100_class_0": 0.9416, "eval_mar_large": 0.9626, "eval_mar_medium": 0.7769, "eval_mar_small": 0.0, "eval_runtime": 6.6251, "eval_samples_per_second": 19.32, "eval_steps_per_second": 2.415, "step": 2548 }, { "epoch": 29.0, "eval_loss": 0.7330126166343689, "eval_map": 0.8577, "eval_map_50": 0.9052, "eval_map_75": 0.8842, "eval_map_class_0": 0.8577, "eval_map_large": 0.8874, "eval_map_medium": 0.6278, "eval_map_small": 0.0076, "eval_mar_1": 0.7571, "eval_mar_10": 0.8975, "eval_mar_100": 0.9497, "eval_mar_100_class_0": 0.9497, "eval_mar_large": 0.966, "eval_mar_medium": 0.7769, "eval_mar_small": 0.8, "eval_runtime": 6.6188, "eval_samples_per_second": 19.339, "eval_steps_per_second": 2.417, "step": 2639 }, { "epoch": 30.0, "eval_loss": 0.741165041923523, "eval_map": 0.8559, "eval_map_50": 0.9016, "eval_map_75": 0.8852, "eval_map_class_0": 0.8559, "eval_map_large": 0.8848, "eval_map_medium": 0.6332, "eval_map_small": 0.0, "eval_mar_1": 0.7571, "eval_mar_10": 0.8957, "eval_mar_100": 0.9385, "eval_mar_100_class_0": 0.9385, "eval_mar_large": 0.9639, "eval_mar_medium": 0.7231, "eval_mar_small": 0.0, "eval_runtime": 6.5118, "eval_samples_per_second": 19.656, "eval_steps_per_second": 2.457, "step": 2730 }, { "epoch": 30.0, "step": 2730, "total_flos": 1.6368835442688e+18, "train_loss": 11.706639954283997, "train_runtime": 1986.2673, "train_samples_per_second": 10.95, "train_steps_per_second": 1.374 } ], "logging_steps": 500, "max_steps": 2730, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6368835442688e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }