| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9424657534246577, |
| "eval_steps": 500, |
| "global_step": 135, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.021917808219178082, |
| "grad_norm": 0.05345574766397476, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 0.14798447489738464, |
| "memory(GiB)": 73.96, |
| "step": 1, |
| "token_acc": 0.9516081000595593, |
| "train_speed(iter/s)": 0.002598 |
| }, |
| { |
| "epoch": 0.1095890410958904, |
| "grad_norm": 0.08354049175977707, |
| "learning_rate": 7.142857142857143e-05, |
| "loss": 0.19844208657741547, |
| "memory(GiB)": 79.75, |
| "step": 5, |
| "token_acc": 0.9440162459039092, |
| "train_speed(iter/s)": 0.011652 |
| }, |
| { |
| "epoch": 0.2191780821917808, |
| "grad_norm": 0.09754039347171783, |
| "learning_rate": 9.986452283393452e-05, |
| "loss": 0.18643227815628052, |
| "memory(GiB)": 105.95, |
| "step": 10, |
| "token_acc": 0.9464405266681545, |
| "train_speed(iter/s)": 0.020266 |
| }, |
| { |
| "epoch": 0.3287671232876712, |
| "grad_norm": 0.10102561861276627, |
| "learning_rate": 9.903926402016153e-05, |
| "loss": 0.1505092740058899, |
| "memory(GiB)": 105.96, |
| "step": 15, |
| "token_acc": 0.9521562734785876, |
| "train_speed(iter/s)": 0.027262 |
| }, |
| { |
| "epoch": 0.4383561643835616, |
| "grad_norm": 0.14934773743152618, |
| "learning_rate": 9.747640902965184e-05, |
| "loss": 0.1314406156539917, |
| "memory(GiB)": 105.96, |
| "step": 20, |
| "token_acc": 0.9554180348572872, |
| "train_speed(iter/s)": 0.032573 |
| }, |
| { |
| "epoch": 0.547945205479452, |
| "grad_norm": 0.1648729294538498, |
| "learning_rate": 9.519946465617218e-05, |
| "loss": 0.1481643557548523, |
| "memory(GiB)": 105.96, |
| "step": 25, |
| "token_acc": 0.9479316731837827, |
| "train_speed(iter/s)": 0.036288 |
| }, |
| { |
| "epoch": 0.6575342465753424, |
| "grad_norm": 0.10335330665111542, |
| "learning_rate": 9.224267826248536e-05, |
| "loss": 0.12146763801574707, |
| "memory(GiB)": 105.96, |
| "step": 30, |
| "token_acc": 0.9561634805537245, |
| "train_speed(iter/s)": 0.040101 |
| }, |
| { |
| "epoch": 0.7671232876712328, |
| "grad_norm": 0.09456586837768555, |
| "learning_rate": 8.865052266813685e-05, |
| "loss": 0.1343398928642273, |
| "memory(GiB)": 105.96, |
| "step": 35, |
| "token_acc": 0.9518978243441241, |
| "train_speed(iter/s)": 0.043284 |
| }, |
| { |
| "epoch": 0.8767123287671232, |
| "grad_norm": 0.07303319126367569, |
| "learning_rate": 8.447702723685335e-05, |
| "loss": 0.10934320688247681, |
| "memory(GiB)": 115.12, |
| "step": 40, |
| "token_acc": 0.9607801568793725, |
| "train_speed(iter/s)": 0.0449 |
| }, |
| { |
| "epoch": 0.9863013698630136, |
| "grad_norm": 0.11877991259098053, |
| "learning_rate": 7.978496522462167e-05, |
| "loss": 0.126079523563385, |
| "memory(GiB)": 115.12, |
| "step": 45, |
| "token_acc": 0.9533266093972803, |
| "train_speed(iter/s)": 0.047779 |
| }, |
| { |
| "epoch": 1.0876712328767124, |
| "grad_norm": 0.09073251485824585, |
| "learning_rate": 7.464490961148921e-05, |
| "loss": 0.10618455410003662, |
| "memory(GiB)": 115.12, |
| "step": 50, |
| "token_acc": 0.9644618781415888, |
| "train_speed(iter/s)": 0.049565 |
| }, |
| { |
| "epoch": 1.1972602739726028, |
| "grad_norm": 0.08698726445436478, |
| "learning_rate": 6.91341716182545e-05, |
| "loss": 0.10942069292068482, |
| "memory(GiB)": 115.12, |
| "step": 55, |
| "token_acc": 0.9613163469630576, |
| "train_speed(iter/s)": 0.051581 |
| }, |
| { |
| "epoch": 1.3068493150684932, |
| "grad_norm": 0.11653894186019897, |
| "learning_rate": 6.333563787374493e-05, |
| "loss": 0.10550706386566162, |
| "memory(GiB)": 115.12, |
| "step": 60, |
| "token_acc": 0.964783186494069, |
| "train_speed(iter/s)": 0.053018 |
| }, |
| { |
| "epoch": 1.4164383561643836, |
| "grad_norm": 0.11223548650741577, |
| "learning_rate": 5.733652372276809e-05, |
| "loss": 0.10657280683517456, |
| "memory(GiB)": 115.12, |
| "step": 65, |
| "token_acc": 0.9625531401694455, |
| "train_speed(iter/s)": 0.054731 |
| }, |
| { |
| "epoch": 1.526027397260274, |
| "grad_norm": 0.14359991252422333, |
| "learning_rate": 5.122706142614562e-05, |
| "loss": 0.11491676568984985, |
| "memory(GiB)": 115.13, |
| "step": 70, |
| "token_acc": 0.9618089745393164, |
| "train_speed(iter/s)": 0.056347 |
| }, |
| { |
| "epoch": 1.6356164383561644, |
| "grad_norm": 0.10080211609601974, |
| "learning_rate": 4.509914298352197e-05, |
| "loss": 0.09663158655166626, |
| "memory(GiB)": 115.13, |
| "step": 75, |
| "token_acc": 0.965161357921207, |
| "train_speed(iter/s)": 0.057444 |
| }, |
| { |
| "epoch": 1.7452054794520548, |
| "grad_norm": 0.0964246392250061, |
| "learning_rate": 3.9044937992156516e-05, |
| "loss": 0.10549492835998535, |
| "memory(GiB)": 115.13, |
| "step": 80, |
| "token_acc": 0.9612994018998475, |
| "train_speed(iter/s)": 0.058281 |
| }, |
| { |
| "epoch": 1.8547945205479452, |
| "grad_norm": 0.09354526549577713, |
| "learning_rate": 3.3155507330389e-05, |
| "loss": 0.08785929679870605, |
| "memory(GiB)": 115.13, |
| "step": 85, |
| "token_acc": 0.9670452769238785, |
| "train_speed(iter/s)": 0.059315 |
| }, |
| { |
| "epoch": 1.9643835616438357, |
| "grad_norm": 0.12421014159917831, |
| "learning_rate": 2.7519433517269665e-05, |
| "loss": 0.10144306421279907, |
| "memory(GiB)": 115.13, |
| "step": 90, |
| "token_acc": 0.9624555254633317, |
| "train_speed(iter/s)": 0.060163 |
| }, |
| { |
| "epoch": 2.0657534246575344, |
| "grad_norm": 0.08086857199668884, |
| "learning_rate": 2.2221488349019903e-05, |
| "loss": 0.08750819563865661, |
| "memory(GiB)": 115.13, |
| "step": 95, |
| "token_acc": 0.9699472974629244, |
| "train_speed(iter/s)": 0.06089 |
| }, |
| { |
| "epoch": 2.175342465753425, |
| "grad_norm": 0.12894976139068604, |
| "learning_rate": 1.7341357852311174e-05, |
| "loss": 0.09126536846160889, |
| "memory(GiB)": 115.13, |
| "step": 100, |
| "token_acc": 0.9701614366043896, |
| "train_speed(iter/s)": 0.061361 |
| }, |
| { |
| "epoch": 2.2849315068493152, |
| "grad_norm": 0.14563050866127014, |
| "learning_rate": 1.2952443732252057e-05, |
| "loss": 0.08330788612365722, |
| "memory(GiB)": 115.13, |
| "step": 105, |
| "token_acc": 0.9722385948367323, |
| "train_speed(iter/s)": 0.062287 |
| }, |
| { |
| "epoch": 2.3945205479452056, |
| "grad_norm": 0.1167457178235054, |
| "learning_rate": 9.12075934242082e-06, |
| "loss": 0.08607174158096313, |
| "memory(GiB)": 115.13, |
| "step": 110, |
| "token_acc": 0.9686517477344183, |
| "train_speed(iter/s)": 0.063146 |
| }, |
| { |
| "epoch": 2.504109589041096, |
| "grad_norm": 0.10588780790567398, |
| "learning_rate": 5.903936782582253e-06, |
| "loss": 0.07531117200851441, |
| "memory(GiB)": 115.13, |
| "step": 115, |
| "token_acc": 0.9729850523911209, |
| "train_speed(iter/s)": 0.06371 |
| }, |
| { |
| "epoch": 2.6136986301369864, |
| "grad_norm": 0.1297299861907959, |
| "learning_rate": 3.350360058263058e-06, |
| "loss": 0.08095236420631409, |
| "memory(GiB)": 115.13, |
| "step": 120, |
| "token_acc": 0.9723439137796651, |
| "train_speed(iter/s)": 0.064274 |
| }, |
| { |
| "epoch": 2.723287671232877, |
| "grad_norm": 0.11432217061519623, |
| "learning_rate": 1.4984373402728014e-06, |
| "loss": 0.09324527382850648, |
| "memory(GiB)": 115.15, |
| "step": 125, |
| "token_acc": 0.9666533115542268, |
| "train_speed(iter/s)": 0.064962 |
| }, |
| { |
| "epoch": 2.8328767123287673, |
| "grad_norm": 0.10140057653188705, |
| "learning_rate": 3.7602327006450167e-07, |
| "loss": 0.07901231050491334, |
| "memory(GiB)": 115.15, |
| "step": 130, |
| "token_acc": 0.9727071738964702, |
| "train_speed(iter/s)": 0.065426 |
| }, |
| { |
| "epoch": 2.9424657534246577, |
| "grad_norm": 0.09595915675163269, |
| "learning_rate": 0.0, |
| "loss": 0.08781038522720337, |
| "memory(GiB)": 115.15, |
| "step": 135, |
| "token_acc": 0.9682683590208522, |
| "train_speed(iter/s)": 0.065788 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 135, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.1292311990945178e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|