| { |
| "best_metric": 0.052040886133909225, |
| "best_model_checkpoint": "ckpt/checkpoint-200", |
| "epoch": 2.824858757062147, |
| "eval_steps": 200, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2824858757062147, |
| "grad_norm": 0.08298324048519135, |
| "learning_rate": 0.00029152542372881354, |
| "loss": 0.1901, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5649717514124294, |
| "grad_norm": 0.11693409830331802, |
| "learning_rate": 0.0002830508474576271, |
| "loss": 0.0624, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5649717514124294, |
| "eval_accuracy": 0.9860834990059643, |
| "eval_f1": 0.9891354958870091, |
| "eval_loss": 0.052040886133909225, |
| "eval_precision": 0.9869908626297041, |
| "eval_recall": 0.9912894695909161, |
| "eval_runtime": 12.1769, |
| "eval_samples_per_second": 826.154, |
| "eval_steps_per_second": 6.488, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.847457627118644, |
| "grad_norm": 0.10722041875123978, |
| "learning_rate": 0.00027457627118644066, |
| "loss": 0.0562, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1299435028248588, |
| "grad_norm": 0.08397164940834045, |
| "learning_rate": 0.0002661016949152542, |
| "loss": 0.0524, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1299435028248588, |
| "eval_accuracy": 0.9874751491053678, |
| "eval_f1": 0.9902219462983083, |
| "eval_loss": 0.04475295916199684, |
| "eval_precision": 0.9880749574105622, |
| "eval_recall": 0.9923782858920517, |
| "eval_runtime": 11.6899, |
| "eval_samples_per_second": 860.571, |
| "eval_steps_per_second": 6.758, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.4124293785310735, |
| "grad_norm": 0.16527259349822998, |
| "learning_rate": 0.0002576271186440678, |
| "loss": 0.0492, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.694915254237288, |
| "grad_norm": 0.07748957723379135, |
| "learning_rate": 0.00024915254237288135, |
| "loss": 0.0518, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.694915254237288, |
| "eval_accuracy": 0.9872763419483102, |
| "eval_f1": 0.9900482040118178, |
| "eval_loss": 0.04457252100110054, |
| "eval_precision": 0.9897404010570496, |
| "eval_recall": 0.9903561984756571, |
| "eval_runtime": 12.8001, |
| "eval_samples_per_second": 785.934, |
| "eval_steps_per_second": 6.172, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.9774011299435028, |
| "grad_norm": 0.07877205312252045, |
| "learning_rate": 0.0002406779661016949, |
| "loss": 0.0489, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.2598870056497176, |
| "grad_norm": 0.09426571428775787, |
| "learning_rate": 0.00023220338983050845, |
| "loss": 0.0477, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.2598870056497176, |
| "eval_accuracy": 0.9879721669980119, |
| "eval_f1": 0.9906339499961297, |
| "eval_loss": 0.04242447018623352, |
| "eval_precision": 0.9859784283513097, |
| "eval_recall": 0.9953336444237051, |
| "eval_runtime": 13.2576, |
| "eval_samples_per_second": 758.811, |
| "eval_steps_per_second": 5.959, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.542372881355932, |
| "grad_norm": 0.09674050658941269, |
| "learning_rate": 0.000223728813559322, |
| "loss": 0.0463, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.824858757062147, |
| "grad_norm": 0.08737868070602417, |
| "learning_rate": 0.0002152542372881356, |
| "loss": 0.0465, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.824858757062147, |
| "eval_accuracy": 0.9887673956262425, |
| "eval_f1": 0.9912369135323769, |
| "eval_loss": 0.04115341976284981, |
| "eval_precision": 0.9884008660686668, |
| "eval_recall": 0.9940892829366931, |
| "eval_runtime": 11.8282, |
| "eval_samples_per_second": 850.507, |
| "eval_steps_per_second": 6.679, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3540, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8484875417960448.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|