| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 14403, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.479275151010207e-05, |
| "loss": 0.8616, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 3.9585503020204125e-05, |
| "loss": 0.7086, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 3.437825453030619e-05, |
| "loss": 0.6876, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 2.917100604040825e-05, |
| "loss": 0.6658, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 2.396375755051031e-05, |
| "loss": 0.6513, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.8756509060612374e-05, |
| "loss": 0.6115, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.3549260570714433e-05, |
| "loss": 0.5986, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 8.342012080816496e-06, |
| "loss": 0.6065, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.134763590918559e-06, |
| "loss": 0.5997, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 20.0, |
| "eval_loss": 0.7166666984558105, |
| "eval_rouge1": 15.1597, |
| "eval_rouge2": 11.4975, |
| "eval_rougeL": 14.0709, |
| "eval_rougeLsum": 14.9737, |
| "eval_runtime": 13.0755, |
| "eval_samples_per_second": 6.501, |
| "eval_steps_per_second": 3.289, |
| "step": 4801 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 2.396375755051031e-05, |
| "loss": 0.535, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 2.136013330556134e-05, |
| "loss": 0.5328, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.8756509060612374e-05, |
| "loss": 0.555, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.6152884815663406e-05, |
| "loss": 0.5381, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.3549260570714433e-05, |
| "loss": 0.5375, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 1.0945636325765466e-05, |
| "loss": 0.5333, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.342012080816496e-06, |
| "loss": 0.5291, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 5.738387835867528e-06, |
| "loss": 0.5196, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 3.134763590918559e-06, |
| "loss": 0.4995, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.311393459695897e-07, |
| "loss": 0.5057, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 20.0, |
| "eval_loss": 0.6984732747077942, |
| "eval_rouge1": 15.2746, |
| "eval_rouge2": 11.537, |
| "eval_rougeL": 14.1481, |
| "eval_rougeLsum": 15.1267, |
| "eval_runtime": 13.3919, |
| "eval_samples_per_second": 6.347, |
| "eval_steps_per_second": 3.211, |
| "step": 9602 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.528501006734708e-05, |
| "loss": 0.4867, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.3549260570714433e-05, |
| "loss": 0.4811, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.181351107408179e-05, |
| "loss": 0.4842, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.0077761577449144e-05, |
| "loss": 0.4909, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.342012080816496e-06, |
| "loss": 0.4659, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.606262584183851e-06, |
| "loss": 0.4747, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.870513087551205e-06, |
| "loss": 0.473, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.134763590918559e-06, |
| "loss": 0.4484, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.3990140942859125e-06, |
| "loss": 0.4804, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 20.0, |
| "eval_loss": 0.6930598020553589, |
| "eval_rouge1": 15.2658, |
| "eval_rouge2": 11.5104, |
| "eval_rougeL": 14.117, |
| "eval_rougeLsum": 15.1103, |
| "eval_runtime": 13.632, |
| "eval_samples_per_second": 6.235, |
| "eval_steps_per_second": 3.154, |
| "step": 14403 |
| } |
| ], |
| "max_steps": 14403, |
| "num_train_epochs": 3, |
| "total_flos": 1.113119515662336e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|