| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "global_step": 3608, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.166923313828149e-06, |
| "loss": 2.6072, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 2.3964672088623047, |
| "eval_runtime": 18.2149, |
| "eval_samples_per_second": 54.9, |
| "eval_steps_per_second": 0.604, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 2.777948875885433e-06, |
| "loss": 2.522, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 2.383113384246826, |
| "eval_runtime": 1.6537, |
| "eval_samples_per_second": 604.697, |
| "eval_steps_per_second": 6.652, |
| "step": 1804 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 1.3889744379427164e-06, |
| "loss": 2.485, |
| "step": 2706 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 2.3780102729797363, |
| "eval_runtime": 1.6082, |
| "eval_samples_per_second": 621.809, |
| "eval_steps_per_second": 6.84, |
| "step": 2706 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 0.0, |
| "loss": 2.4641, |
| "step": 3608 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 2.3768954277038574, |
| "eval_runtime": 1.6566, |
| "eval_samples_per_second": 603.647, |
| "eval_steps_per_second": 6.64, |
| "step": 3608 |
| } |
| ], |
| "max_steps": 3608, |
| "num_train_epochs": 4, |
| "total_flos": 1.1301011030016e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|