| { |
| "best_metric": 0.8050541516245487, |
| "best_model_checkpoint": "./fp32_1e_5/models/rte-roberta-base/checkpoint-1000", |
| "epoch": 12.820512820512821, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.319148936170213e-06, |
| "loss": 0.6932, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_accuracy": 0.4729241877256318, |
| "eval_loss": 0.6963141560554504, |
| "eval_runtime": 2.0901, |
| "eval_samples_per_second": 132.533, |
| "eval_steps_per_second": 4.306, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 9.959072305593452e-06, |
| "loss": 0.6957, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_accuracy": 0.4729241877256318, |
| "eval_loss": 0.69448322057724, |
| "eval_runtime": 0.7491, |
| "eval_samples_per_second": 369.778, |
| "eval_steps_per_second": 12.014, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 9.618008185538881e-06, |
| "loss": 0.6869, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_accuracy": 0.6245487364620939, |
| "eval_loss": 0.6610168218612671, |
| "eval_runtime": 0.7857, |
| "eval_samples_per_second": 352.54, |
| "eval_steps_per_second": 11.454, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 9.276944065484312e-06, |
| "loss": 0.6292, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_accuracy": 0.6570397111913358, |
| "eval_loss": 0.6132365465164185, |
| "eval_runtime": 0.7843, |
| "eval_samples_per_second": 353.202, |
| "eval_steps_per_second": 11.476, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 8.935879945429742e-06, |
| "loss": 0.5473, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.21, |
| "eval_accuracy": 0.7075812274368231, |
| "eval_loss": 0.6135809421539307, |
| "eval_runtime": 0.8034, |
| "eval_samples_per_second": 344.792, |
| "eval_steps_per_second": 11.203, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 8.59481582537517e-06, |
| "loss": 0.4867, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.85, |
| "eval_accuracy": 0.7256317689530686, |
| "eval_loss": 0.5372898578643799, |
| "eval_runtime": 0.8424, |
| "eval_samples_per_second": 328.815, |
| "eval_steps_per_second": 10.684, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 8.253751705320601e-06, |
| "loss": 0.3673, |
| "step": 350 |
| }, |
| { |
| "epoch": 4.49, |
| "eval_accuracy": 0.7292418772563177, |
| "eval_loss": 0.6152286529541016, |
| "eval_runtime": 0.7525, |
| "eval_samples_per_second": 368.117, |
| "eval_steps_per_second": 11.96, |
| "step": 350 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 7.912687585266032e-06, |
| "loss": 0.3246, |
| "step": 400 |
| }, |
| { |
| "epoch": 5.13, |
| "eval_accuracy": 0.7545126353790613, |
| "eval_loss": 0.6094390153884888, |
| "eval_runtime": 0.7684, |
| "eval_samples_per_second": 360.479, |
| "eval_steps_per_second": 11.712, |
| "step": 400 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 7.5716234652114605e-06, |
| "loss": 0.2585, |
| "step": 450 |
| }, |
| { |
| "epoch": 5.77, |
| "eval_accuracy": 0.7725631768953068, |
| "eval_loss": 0.6251689791679382, |
| "eval_runtime": 0.7559, |
| "eval_samples_per_second": 366.46, |
| "eval_steps_per_second": 11.907, |
| "step": 450 |
| }, |
| { |
| "epoch": 6.41, |
| "learning_rate": 7.23055934515689e-06, |
| "loss": 0.1975, |
| "step": 500 |
| }, |
| { |
| "epoch": 6.41, |
| "eval_accuracy": 0.7436823104693141, |
| "eval_loss": 0.7674959301948547, |
| "eval_runtime": 0.766, |
| "eval_samples_per_second": 361.606, |
| "eval_steps_per_second": 11.749, |
| "step": 500 |
| }, |
| { |
| "epoch": 7.05, |
| "learning_rate": 6.88949522510232e-06, |
| "loss": 0.19, |
| "step": 550 |
| }, |
| { |
| "epoch": 7.05, |
| "eval_accuracy": 0.7617328519855595, |
| "eval_loss": 0.7783448696136475, |
| "eval_runtime": 0.7892, |
| "eval_samples_per_second": 350.981, |
| "eval_steps_per_second": 11.404, |
| "step": 550 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 6.54843110504775e-06, |
| "loss": 0.1266, |
| "step": 600 |
| }, |
| { |
| "epoch": 7.69, |
| "eval_accuracy": 0.7545126353790613, |
| "eval_loss": 1.0359293222427368, |
| "eval_runtime": 0.7751, |
| "eval_samples_per_second": 357.395, |
| "eval_steps_per_second": 11.612, |
| "step": 600 |
| }, |
| { |
| "epoch": 8.33, |
| "learning_rate": 6.20736698499318e-06, |
| "loss": 0.1222, |
| "step": 650 |
| }, |
| { |
| "epoch": 8.33, |
| "eval_accuracy": 0.7833935018050542, |
| "eval_loss": 0.8435311913490295, |
| "eval_runtime": 1.3166, |
| "eval_samples_per_second": 210.387, |
| "eval_steps_per_second": 6.836, |
| "step": 650 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 5.8663028649386085e-06, |
| "loss": 0.1146, |
| "step": 700 |
| }, |
| { |
| "epoch": 8.97, |
| "eval_accuracy": 0.7833935018050542, |
| "eval_loss": 0.941527247428894, |
| "eval_runtime": 0.749, |
| "eval_samples_per_second": 369.818, |
| "eval_steps_per_second": 12.016, |
| "step": 700 |
| }, |
| { |
| "epoch": 9.62, |
| "learning_rate": 5.525238744884038e-06, |
| "loss": 0.1131, |
| "step": 750 |
| }, |
| { |
| "epoch": 9.62, |
| "eval_accuracy": 0.7833935018050542, |
| "eval_loss": 0.9660640358924866, |
| "eval_runtime": 0.7502, |
| "eval_samples_per_second": 369.243, |
| "eval_steps_per_second": 11.997, |
| "step": 750 |
| }, |
| { |
| "epoch": 10.26, |
| "learning_rate": 5.1841746248294686e-06, |
| "loss": 0.0719, |
| "step": 800 |
| }, |
| { |
| "epoch": 10.26, |
| "eval_accuracy": 0.7906137184115524, |
| "eval_loss": 1.0486385822296143, |
| "eval_runtime": 0.7951, |
| "eval_samples_per_second": 348.395, |
| "eval_steps_per_second": 11.32, |
| "step": 800 |
| }, |
| { |
| "epoch": 10.9, |
| "learning_rate": 4.843110504774898e-06, |
| "loss": 0.1002, |
| "step": 850 |
| }, |
| { |
| "epoch": 10.9, |
| "eval_accuracy": 0.776173285198556, |
| "eval_loss": 1.1470500230789185, |
| "eval_runtime": 0.7467, |
| "eval_samples_per_second": 370.982, |
| "eval_steps_per_second": 12.054, |
| "step": 850 |
| }, |
| { |
| "epoch": 11.54, |
| "learning_rate": 4.502046384720328e-06, |
| "loss": 0.0515, |
| "step": 900 |
| }, |
| { |
| "epoch": 11.54, |
| "eval_accuracy": 0.7906137184115524, |
| "eval_loss": 1.0903133153915405, |
| "eval_runtime": 0.7707, |
| "eval_samples_per_second": 359.412, |
| "eval_steps_per_second": 11.678, |
| "step": 900 |
| }, |
| { |
| "epoch": 12.18, |
| "learning_rate": 4.160982264665757e-06, |
| "loss": 0.0732, |
| "step": 950 |
| }, |
| { |
| "epoch": 12.18, |
| "eval_accuracy": 0.7978339350180506, |
| "eval_loss": 1.0761293172836304, |
| "eval_runtime": 0.8013, |
| "eval_samples_per_second": 345.69, |
| "eval_steps_per_second": 11.232, |
| "step": 950 |
| }, |
| { |
| "epoch": 12.82, |
| "learning_rate": 3.819918144611187e-06, |
| "loss": 0.0644, |
| "step": 1000 |
| }, |
| { |
| "epoch": 12.82, |
| "eval_accuracy": 0.8050541516245487, |
| "eval_loss": 1.118359088897705, |
| "eval_runtime": 0.7549, |
| "eval_samples_per_second": 366.937, |
| "eval_steps_per_second": 11.922, |
| "step": 1000 |
| } |
| ], |
| "max_steps": 1560, |
| "num_train_epochs": 20, |
| "total_flos": 2100152443883520.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|