| { |
| "best_global_step": 2000, |
| "best_metric": 0.9999500948197181, |
| "best_model_checkpoint": "trained_models/intent_classifier/checkpoint-2000", |
| "epoch": 1.596169193934557, |
| "eval_steps": 1000, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007980845969672786, |
| "grad_norm": 51713.75390625, |
| "learning_rate": 0.0, |
| "loss": 0.695, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07980845969672785, |
| "grad_norm": 91372.3984375, |
| "learning_rate": 1.98e-06, |
| "loss": 0.6689, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1596169193934557, |
| "grad_norm": 47582.55078125, |
| "learning_rate": 3.980000000000001e-06, |
| "loss": 0.2889, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23942537909018355, |
| "grad_norm": 2023.180908203125, |
| "learning_rate": 5.98e-06, |
| "loss": 0.0206, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3192338387869114, |
| "grad_norm": 105.5784912109375, |
| "learning_rate": 7.980000000000002e-06, |
| "loss": 0.0006, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.39904229848363926, |
| "grad_norm": 2.8826353549957275, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.0016, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4788507581803671, |
| "grad_norm": 0.18906153738498688, |
| "learning_rate": 1.198e-05, |
| "loss": 0.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5586592178770949, |
| "grad_norm": 0.06490982323884964, |
| "learning_rate": 1.398e-05, |
| "loss": 0.001, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6384676775738228, |
| "grad_norm": 0.07483379542827606, |
| "learning_rate": 1.5980000000000003e-05, |
| "loss": 0.0026, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7182761372705507, |
| "grad_norm": 0.029934018850326538, |
| "learning_rate": 1.798e-05, |
| "loss": 0.0027, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7980845969672785, |
| "grad_norm": 0.023010307922959328, |
| "learning_rate": 1.9980000000000002e-05, |
| "loss": 0.0049, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7980845969672785, |
| "eval_accuracy": 0.9999001896396846, |
| "eval_f1": 0.9999001896386902, |
| "eval_f1_macro": 0.9999001896386903, |
| "eval_loss": 0.0016707783797755837, |
| "eval_runtime": 88.2175, |
| "eval_samples_per_second": 227.143, |
| "eval_steps_per_second": 1.78, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8778930566640064, |
| "grad_norm": 0.030565178021788597, |
| "learning_rate": 1.9282348677056906e-05, |
| "loss": 0.0091, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9577015163607342, |
| "grad_norm": 0.025367770344018936, |
| "learning_rate": 1.8557448350851758e-05, |
| "loss": 0.0029, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.037509976057462, |
| "grad_norm": 0.01835496723651886, |
| "learning_rate": 1.7832548024646613e-05, |
| "loss": 0.0025, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1173184357541899, |
| "grad_norm": 0.013528961688280106, |
| "learning_rate": 1.7107647698441465e-05, |
| "loss": 0.0004, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.1971268954509178, |
| "grad_norm": 0.013094124384224415, |
| "learning_rate": 1.638274737223632e-05, |
| "loss": 0.0043, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.2769353551476457, |
| "grad_norm": 0.010992957279086113, |
| "learning_rate": 1.565784704603117e-05, |
| "loss": 0.0001, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.3567438148443736, |
| "grad_norm": 0.009696166031062603, |
| "learning_rate": 1.4932946719826025e-05, |
| "loss": 0.0001, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.4365522745411012, |
| "grad_norm": 0.008244643919169903, |
| "learning_rate": 1.4208046393620877e-05, |
| "loss": 0.0, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.5163607342378294, |
| "grad_norm": 0.00833104643970728, |
| "learning_rate": 1.348314606741573e-05, |
| "loss": 0.0, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.596169193934557, |
| "grad_norm": 0.008447665721178055, |
| "learning_rate": 1.2758245741210584e-05, |
| "loss": 0.0031, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.596169193934557, |
| "eval_accuracy": 0.9999500948198423, |
| "eval_f1": 0.9999500948197181, |
| "eval_f1_macro": 0.999950094819718, |
| "eval_loss": 0.000961420766543597, |
| "eval_runtime": 88.1534, |
| "eval_samples_per_second": 227.308, |
| "eval_steps_per_second": 1.781, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3759, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8475131698612224.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|