| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 30.0, |
| "eval_steps": 500, |
| "global_step": 4290, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.2524374723434448, |
| "learning_rate": 4.8333333333333334e-05, |
| "loss": 0.305, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9573631188256962, |
| "eval_f1": 0.0, |
| "eval_loss": 0.25014349818229675, |
| "eval_precision": 0.0, |
| "eval_recall": 0.0, |
| "eval_runtime": 24.3893, |
| "eval_samples_per_second": 93.361, |
| "eval_steps_per_second": 5.863, |
| "step": 143 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.41591477394104, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.1881, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9643132220795892, |
| "eval_f1": 0.1014260249554367, |
| "eval_loss": 0.18320757150650024, |
| "eval_precision": 0.2123134328358209, |
| "eval_recall": 0.06662763466042154, |
| "eval_runtime": 24.4105, |
| "eval_samples_per_second": 93.28, |
| "eval_steps_per_second": 5.858, |
| "step": 286 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.3990638256072998, |
| "learning_rate": 4.5e-05, |
| "loss": 0.1434, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9702112518836858, |
| "eval_f1": 0.2715324897609878, |
| "eval_loss": 0.13982267677783966, |
| "eval_precision": 0.2840516690113825, |
| "eval_recall": 0.2600702576112412, |
| "eval_runtime": 24.5498, |
| "eval_samples_per_second": 92.75, |
| "eval_steps_per_second": 5.825, |
| "step": 429 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.508366346359253, |
| "learning_rate": 4.3333333333333334e-05, |
| "loss": 0.1107, |
| "step": 572 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.977955293854998, |
| "eval_f1": 0.3784165050255687, |
| "eval_loss": 0.09865138679742813, |
| "eval_precision": 0.3799126637554585, |
| "eval_recall": 0.3769320843091335, |
| "eval_runtime": 24.4817, |
| "eval_samples_per_second": 93.008, |
| "eval_steps_per_second": 5.841, |
| "step": 572 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 2.8642067909240723, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.0877, |
| "step": 715 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9799073505609198, |
| "eval_f1": 0.45260009203865625, |
| "eval_loss": 0.08595172315835953, |
| "eval_precision": 0.4448213478064224, |
| "eval_recall": 0.460655737704918, |
| "eval_runtime": 24.4775, |
| "eval_samples_per_second": 93.024, |
| "eval_steps_per_second": 5.842, |
| "step": 715 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.2604122161865234, |
| "learning_rate": 4e-05, |
| "loss": 0.0736, |
| "step": 858 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9855486409555171, |
| "eval_f1": 0.5592687470889613, |
| "eval_loss": 0.06336811929941177, |
| "eval_precision": 0.5561602593793423, |
| "eval_recall": 0.5624121779859484, |
| "eval_runtime": 24.9031, |
| "eval_samples_per_second": 91.435, |
| "eval_steps_per_second": 5.742, |
| "step": 858 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.9219182729721069, |
| "learning_rate": 3.8333333333333334e-05, |
| "loss": 0.0592, |
| "step": 1001 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9885932354746888, |
| "eval_f1": 0.6266651355075792, |
| "eval_loss": 0.05170835927128792, |
| "eval_precision": 0.614803965750338, |
| "eval_recall": 0.6389929742388759, |
| "eval_runtime": 24.5205, |
| "eval_samples_per_second": 92.861, |
| "eval_steps_per_second": 5.832, |
| "step": 1001 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.5538210868835449, |
| "learning_rate": 3.6666666666666666e-05, |
| "loss": 0.049, |
| "step": 1144 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9903639002065078, |
| "eval_f1": 0.6772266065388951, |
| "eval_loss": 0.040448613464832306, |
| "eval_precision": 0.6529347826086956, |
| "eval_recall": 0.7033957845433255, |
| "eval_runtime": 26.0505, |
| "eval_samples_per_second": 87.407, |
| "eval_steps_per_second": 5.489, |
| "step": 1144 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.5682235956192017, |
| "learning_rate": 3.5e-05, |
| "loss": 0.0404, |
| "step": 1287 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9925099067924318, |
| "eval_f1": 0.7394493342360641, |
| "eval_loss": 0.032923389226198196, |
| "eval_precision": 0.7135235191637631, |
| "eval_recall": 0.7673302107728337, |
| "eval_runtime": 24.6843, |
| "eval_samples_per_second": 92.245, |
| "eval_steps_per_second": 5.793, |
| "step": 1287 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 2.0670313835144043, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.0328, |
| "step": 1430 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9937712786738852, |
| "eval_f1": 0.7751125292006154, |
| "eval_loss": 0.026249362155795097, |
| "eval_precision": 0.7548551770058817, |
| "eval_recall": 0.7964871194379392, |
| "eval_runtime": 24.857, |
| "eval_samples_per_second": 91.604, |
| "eval_steps_per_second": 5.753, |
| "step": 1430 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 2.1008994579315186, |
| "learning_rate": 3.1666666666666666e-05, |
| "loss": 0.0286, |
| "step": 1573 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.9950842775018139, |
| "eval_f1": 0.8216382602747535, |
| "eval_loss": 0.020949603989720345, |
| "eval_precision": 0.800510940797512, |
| "eval_recall": 0.8439110070257612, |
| "eval_runtime": 24.6118, |
| "eval_samples_per_second": 92.517, |
| "eval_steps_per_second": 5.81, |
| "step": 1573 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.8632619976997375, |
| "learning_rate": 3e-05, |
| "loss": 0.0229, |
| "step": 1716 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.9960051906011052, |
| "eval_f1": 0.8509210825562884, |
| "eval_loss": 0.016843697056174278, |
| "eval_precision": 0.8270335985853228, |
| "eval_recall": 0.8762295081967213, |
| "eval_runtime": 24.6326, |
| "eval_samples_per_second": 92.439, |
| "eval_steps_per_second": 5.805, |
| "step": 1716 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 1.6470736265182495, |
| "learning_rate": 2.8333333333333335e-05, |
| "loss": 0.0192, |
| "step": 1859 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.9966414578333427, |
| "eval_f1": 0.8833197342347593, |
| "eval_loss": 0.014622141607105732, |
| "eval_precision": 0.8793223485727547, |
| "eval_recall": 0.8873536299765808, |
| "eval_runtime": 25.8427, |
| "eval_samples_per_second": 88.11, |
| "eval_steps_per_second": 5.533, |
| "step": 1859 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.4230591058731079, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.0172, |
| "step": 2002 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.9968563375565106, |
| "eval_f1": 0.8954657778288604, |
| "eval_loss": 0.012411631643772125, |
| "eval_precision": 0.8792461347477711, |
| "eval_recall": 0.9122950819672131, |
| "eval_runtime": 24.5899, |
| "eval_samples_per_second": 92.599, |
| "eval_steps_per_second": 5.815, |
| "step": 2002 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.9317820072174072, |
| "learning_rate": 2.5e-05, |
| "loss": 0.0153, |
| "step": 2145 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.9975428364123459, |
| "eval_f1": 0.9154970589948168, |
| "eval_loss": 0.010398673824965954, |
| "eval_precision": 0.910670837678137, |
| "eval_recall": 0.9203747072599532, |
| "eval_runtime": 25.4287, |
| "eval_samples_per_second": 89.544, |
| "eval_steps_per_second": 5.624, |
| "step": 2145 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.3844711482524872, |
| "learning_rate": 2.3333333333333336e-05, |
| "loss": 0.0127, |
| "step": 2288 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9975916727130658, |
| "eval_f1": 0.9196624667668478, |
| "eval_loss": 0.009496341459453106, |
| "eval_precision": 0.9080118694362018, |
| "eval_recall": 0.931615925058548, |
| "eval_runtime": 24.6017, |
| "eval_samples_per_second": 92.555, |
| "eval_steps_per_second": 5.813, |
| "step": 2288 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.7824459075927734, |
| "learning_rate": 2.1666666666666667e-05, |
| "loss": 0.0101, |
| "step": 2431 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.9979837584417034, |
| "eval_f1": 0.9320265206111271, |
| "eval_loss": 0.00775914778932929, |
| "eval_precision": 0.9180011357183419, |
| "eval_recall": 0.9464871194379391, |
| "eval_runtime": 24.6006, |
| "eval_samples_per_second": 92.559, |
| "eval_steps_per_second": 5.813, |
| "step": 2431 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 1.2668181657791138, |
| "learning_rate": 2e-05, |
| "loss": 0.0096, |
| "step": 2574 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9981916615504828, |
| "eval_f1": 0.9469846046996181, |
| "eval_loss": 0.006909618154168129, |
| "eval_precision": 0.9362554360265507, |
| "eval_recall": 0.9579625292740047, |
| "eval_runtime": 24.6298, |
| "eval_samples_per_second": 92.449, |
| "eval_steps_per_second": 5.806, |
| "step": 2574 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.8047562837600708, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 0.0082, |
| "step": 2717 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.9983981693363845, |
| "eval_f1": 0.9509088803982865, |
| "eval_loss": 0.0058281742967665195, |
| "eval_precision": 0.9403480650332036, |
| "eval_recall": 0.9617096018735363, |
| "eval_runtime": 24.6016, |
| "eval_samples_per_second": 92.555, |
| "eval_steps_per_second": 5.813, |
| "step": 2717 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.3952634036540985, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.0074, |
| "step": 2860 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9985851426019982, |
| "eval_f1": 0.9587371597701816, |
| "eval_loss": 0.005235890857875347, |
| "eval_precision": 0.9504084685306639, |
| "eval_recall": 0.9672131147540983, |
| "eval_runtime": 24.5882, |
| "eval_samples_per_second": 92.605, |
| "eval_steps_per_second": 5.816, |
| "step": 2860 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 0.20539724826812744, |
| "learning_rate": 1.5e-05, |
| "loss": 0.0065, |
| "step": 3003 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.998590723893509, |
| "eval_f1": 0.9577888284954319, |
| "eval_loss": 0.00510649848729372, |
| "eval_precision": 0.9460817911811743, |
| "eval_recall": 0.9697892271662764, |
| "eval_runtime": 24.8469, |
| "eval_samples_per_second": 91.641, |
| "eval_steps_per_second": 5.755, |
| "step": 3003 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 0.10989252477884293, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0058, |
| "step": 3146 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.998551654852933, |
| "eval_f1": 0.9594181146452693, |
| "eval_loss": 0.004978466313332319, |
| "eval_precision": 0.9461459637936923, |
| "eval_recall": 0.9730679156908665, |
| "eval_runtime": 24.4805, |
| "eval_samples_per_second": 93.013, |
| "eval_steps_per_second": 5.841, |
| "step": 3146 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 0.10916499048471451, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 0.0056, |
| "step": 3289 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.9987204889211363, |
| "eval_f1": 0.965821901883283, |
| "eval_loss": 0.00421318830922246, |
| "eval_precision": 0.958910433979686, |
| "eval_recall": 0.9728337236533958, |
| "eval_runtime": 24.4468, |
| "eval_samples_per_second": 93.141, |
| "eval_steps_per_second": 5.849, |
| "step": 3289 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 0.3903842270374298, |
| "learning_rate": 1e-05, |
| "loss": 0.0051, |
| "step": 3432 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.9987930457107774, |
| "eval_f1": 0.969594986654288, |
| "eval_loss": 0.004073758609592915, |
| "eval_precision": 0.961007591442374, |
| "eval_recall": 0.9783372365339579, |
| "eval_runtime": 24.4468, |
| "eval_samples_per_second": 93.141, |
| "eval_steps_per_second": 5.849, |
| "step": 3432 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 2.5139732360839844, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.0049, |
| "step": 3575 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.9989186247697718, |
| "eval_f1": 0.9717112922002328, |
| "eval_loss": 0.0037488937377929688, |
| "eval_precision": 0.966087962962963, |
| "eval_recall": 0.9774004683840749, |
| "eval_runtime": 24.4794, |
| "eval_samples_per_second": 93.017, |
| "eval_steps_per_second": 5.842, |
| "step": 3575 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 0.6361393332481384, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.0047, |
| "step": 3718 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.9989353686443043, |
| "eval_f1": 0.9728469875305908, |
| "eval_loss": 0.0034835096448659897, |
| "eval_precision": 0.968220830433774, |
| "eval_recall": 0.9775175644028103, |
| "eval_runtime": 24.4891, |
| "eval_samples_per_second": 92.98, |
| "eval_steps_per_second": 5.839, |
| "step": 3718 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 0.22475773096084595, |
| "learning_rate": 5e-06, |
| "loss": 0.0043, |
| "step": 3861 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.9989451359044483, |
| "eval_f1": 0.9760381528440154, |
| "eval_loss": 0.0034792672377079725, |
| "eval_precision": 0.9696094291657037, |
| "eval_recall": 0.9825526932084309, |
| "eval_runtime": 24.4691, |
| "eval_samples_per_second": 93.056, |
| "eval_steps_per_second": 5.844, |
| "step": 3861 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 0.24321018159389496, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.0038, |
| "step": 4004 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.9989744376848803, |
| "eval_f1": 0.9768117626547335, |
| "eval_loss": 0.003415257204324007, |
| "eval_precision": 0.9696550132687204, |
| "eval_recall": 0.9840749414519906, |
| "eval_runtime": 24.4398, |
| "eval_samples_per_second": 93.168, |
| "eval_steps_per_second": 5.851, |
| "step": 4004 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 0.5957739949226379, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.0037, |
| "step": 4147 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.9990079254339455, |
| "eval_f1": 0.9779304722529552, |
| "eval_loss": 0.003256614087149501, |
| "eval_precision": 0.9726630371829028, |
| "eval_recall": 0.9832552693208431, |
| "eval_runtime": 24.5523, |
| "eval_samples_per_second": 92.741, |
| "eval_steps_per_second": 5.824, |
| "step": 4147 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.8235336542129517, |
| "learning_rate": 0.0, |
| "loss": 0.0038, |
| "step": 4290 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.9990121114025785, |
| "eval_f1": 0.977371880635216, |
| "eval_loss": 0.0032377191819250584, |
| "eval_precision": 0.9711016067506647, |
| "eval_recall": 0.9837236533957845, |
| "eval_runtime": 24.4476, |
| "eval_samples_per_second": 93.138, |
| "eval_steps_per_second": 5.849, |
| "step": 4290 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 4290, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.785257029315584e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|