{ "best_metric": 93.36666666666666, "best_model_checkpoint": "../results/phrase_retrieval/PR-pass/qa/albert/albert-base-v2/finetuned/checkpoint-5000", "epoch": 2.0, "eval_steps": 500, "global_step": 5078, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19692792437967704, "grad_norm": 70.44538879394531, "learning_rate": 2.7046081134304843e-05, "loss": 0.7453, "step": 500 }, { "epoch": 0.19692792437967704, "eval_exact_match": 87.36666666666666, "eval_f1": 90.2462371136489, "step": 500 }, { "epoch": 0.3938558487593541, "grad_norm": 74.7574234008789, "learning_rate": 2.409216226860969e-05, "loss": 0.4863, "step": 1000 }, { "epoch": 0.3938558487593541, "eval_exact_match": 88.76666666666667, "eval_f1": 90.95727956254274, "step": 1000 }, { "epoch": 0.5907837731390311, "grad_norm": 11.323880195617676, "learning_rate": 2.1138243402914533e-05, "loss": 0.413, "step": 1500 }, { "epoch": 0.5907837731390311, "eval_exact_match": 90.46666666666667, "eval_f1": 92.3909417989418, "step": 1500 }, { "epoch": 0.7877116975187082, "grad_norm": 57.2664680480957, "learning_rate": 1.8184324537219378e-05, "loss": 0.3762, "step": 2000 }, { "epoch": 0.7877116975187082, "eval_exact_match": 90.76666666666667, "eval_f1": 92.51397306397307, "step": 2000 }, { "epoch": 0.9846396218983852, "grad_norm": 39.49021911621094, "learning_rate": 1.5230405671524222e-05, "loss": 0.3563, "step": 2500 }, { "epoch": 0.9846396218983852, "eval_exact_match": 91.66666666666667, "eval_f1": 93.64239682539686, "step": 2500 }, { "epoch": 1.1815675462780622, "grad_norm": 174.0330047607422, "learning_rate": 1.2276486805829067e-05, "loss": 0.1942, "step": 3000 }, { "epoch": 1.1815675462780622, "eval_exact_match": 91.6, "eval_f1": 93.43199108883323, "step": 3000 }, { "epoch": 1.3784954706577393, "grad_norm": 0.0160669032484293, "learning_rate": 9.322567940133912e-06, "loss": 0.1709, "step": 3500 }, { "epoch": 1.3784954706577393, "eval_exact_match": 92.8, "eval_f1": 94.44685185185187, "step": 3500 }, { "epoch": 1.5754233950374164, "grad_norm": 212.05442810058594, "learning_rate": 6.368649074438756e-06, "loss": 0.1839, "step": 4000 }, { "epoch": 1.5754233950374164, "eval_exact_match": 93.06666666666666, "eval_f1": 94.5925444925445, "step": 4000 }, { "epoch": 1.7723513194170932, "grad_norm": 0.17131257057189941, "learning_rate": 3.4147302087436e-06, "loss": 0.1156, "step": 4500 }, { "epoch": 1.7723513194170932, "eval_exact_match": 93.06666666666666, "eval_f1": 94.59888888888891, "step": 4500 }, { "epoch": 1.9692792437967703, "grad_norm": 0.042494483292102814, "learning_rate": 4.608113430484443e-07, "loss": 0.1359, "step": 5000 }, { "epoch": 1.9692792437967703, "eval_exact_match": 93.36666666666666, "eval_f1": 94.88777777777779, "step": 5000 }, { "epoch": 2.0, "step": 5078, "total_flos": 896955748171776.0, "train_loss": 0.31565969917144115, "train_runtime": 5394.4597, "train_samples_per_second": 7.529, "train_steps_per_second": 0.941 } ], "logging_steps": 500, "max_steps": 5078, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 896955748171776.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }