| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 6342, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.23651844843897823, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.881740775780511e-05, |
| "loss": 0.3019, |
| "mean_token_accuracy": 0.9215967762470245, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.47303689687795647, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.763481551561022e-05, |
| "loss": 0.246, |
| "mean_token_accuracy": 0.9323436969518661, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7095553453169348, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.6452223273415328e-05, |
| "loss": 0.2338, |
| "mean_token_accuracy": 0.9348658822774887, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9460737937559129, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.5269631031220437e-05, |
| "loss": 0.2312, |
| "mean_token_accuracy": 0.9352898219823838, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1825922421948911, |
| "grad_norm": 1.5703125, |
| "learning_rate": 1.4087038789025544e-05, |
| "loss": 0.2174, |
| "mean_token_accuracy": 0.9378906596899033, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.4191106906338695, |
| "grad_norm": 1.5546875, |
| "learning_rate": 1.2904446546830653e-05, |
| "loss": 0.2107, |
| "mean_token_accuracy": 0.9396841472387314, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6556291390728477, |
| "grad_norm": 0.875, |
| "learning_rate": 1.1721854304635763e-05, |
| "loss": 0.2129, |
| "mean_token_accuracy": 0.9386629806756973, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.8921475875118259, |
| "grad_norm": 1.625, |
| "learning_rate": 1.0539262062440872e-05, |
| "loss": 0.2041, |
| "mean_token_accuracy": 0.9411084994077682, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.128666035950804, |
| "grad_norm": 1.4375, |
| "learning_rate": 9.35666982024598e-06, |
| "loss": 0.2061, |
| "mean_token_accuracy": 0.9402695513963699, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.3651844843897822, |
| "grad_norm": 1.1953125, |
| "learning_rate": 8.174077578051088e-06, |
| "loss": 0.201, |
| "mean_token_accuracy": 0.9417461235523223, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.6017029328287604, |
| "grad_norm": 0.86328125, |
| "learning_rate": 6.991485335856198e-06, |
| "loss": 0.1962, |
| "mean_token_accuracy": 0.9427423716783524, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.838221381267739, |
| "grad_norm": 1.65625, |
| "learning_rate": 5.808893093661306e-06, |
| "loss": 0.1989, |
| "mean_token_accuracy": 0.9420522735118866, |
| "step": 6000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 8456, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.2919686076624896e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|