| { |
| "best_metric": 0.00065021, |
| "best_model_checkpoint": "/mnt/si0001694oxp/default/vlm_sft/outputs/output/deepseek-vl-7b-chat/v32-20250613-154734/checkpoint-3000", |
| "epoch": 1.0993219717793659, |
| "eval_steps": 500, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0003665017408832692, |
| "grad_norm": 16.33839225769043, |
| "learning_rate": 9.999999631609428e-06, |
| "loss": 3.0264194011688232, |
| "memory(GiB)": 149.2, |
| "step": 1, |
| "token_acc": 0.4675925925925926, |
| "train_speed(iter/s)": 0.033836 |
| }, |
| { |
| "epoch": 0.001832508704416346, |
| "grad_norm": 6.188778400421143, |
| "learning_rate": 9.999990790238409e-06, |
| "loss": 0.6836232542991638, |
| "memory(GiB)": 158.4, |
| "step": 5, |
| "token_acc": 0.8406651231319722, |
| "train_speed(iter/s)": 0.042635 |
| }, |
| { |
| "epoch": 0.003665017408832692, |
| "grad_norm": 0.4842391312122345, |
| "learning_rate": 9.999963160987561e-06, |
| "loss": 0.05034670829772949, |
| "memory(GiB)": 158.4, |
| "step": 10, |
| "token_acc": 0.9856121161127471, |
| "train_speed(iter/s)": 0.04407 |
| }, |
| { |
| "epoch": 0.005497526113249038, |
| "grad_norm": 0.1827951818704605, |
| "learning_rate": 9.99991711234924e-06, |
| "loss": 0.01651783734560013, |
| "memory(GiB)": 158.4, |
| "step": 15, |
| "token_acc": 0.9920074036681811, |
| "train_speed(iter/s)": 0.044607 |
| }, |
| { |
| "epoch": 0.007330034817665384, |
| "grad_norm": 0.09308009594678879, |
| "learning_rate": 9.999852644493086e-06, |
| "loss": 0.014441253244876861, |
| "memory(GiB)": 158.4, |
| "step": 20, |
| "token_acc": 0.9914947368421053, |
| "train_speed(iter/s)": 0.044685 |
| }, |
| { |
| "epoch": 0.00916254352208173, |
| "grad_norm": 0.13165982067584991, |
| "learning_rate": 9.999769757656593e-06, |
| "loss": 0.013714964687824249, |
| "memory(GiB)": 158.4, |
| "step": 25, |
| "token_acc": 0.9919225915018931, |
| "train_speed(iter/s)": 0.044897 |
| }, |
| { |
| "epoch": 0.010995052226498075, |
| "grad_norm": 0.136412113904953, |
| "learning_rate": 9.999668452145104e-06, |
| "loss": 0.010563116520643234, |
| "memory(GiB)": 158.4, |
| "step": 30, |
| "token_acc": 0.9947824623411596, |
| "train_speed(iter/s)": 0.04502 |
| }, |
| { |
| "epoch": 0.012827560930914422, |
| "grad_norm": 0.2637465298175812, |
| "learning_rate": 9.999548728331825e-06, |
| "loss": 0.008089790493249894, |
| "memory(GiB)": 158.4, |
| "step": 35, |
| "token_acc": 0.9959606160060591, |
| "train_speed(iter/s)": 0.045028 |
| }, |
| { |
| "epoch": 0.014660069635330768, |
| "grad_norm": 0.2768152952194214, |
| "learning_rate": 9.999410586657801e-06, |
| "loss": 0.005358598381280899, |
| "memory(GiB)": 158.4, |
| "step": 40, |
| "token_acc": 0.9978118161925602, |
| "train_speed(iter/s)": 0.045061 |
| }, |
| { |
| "epoch": 0.016492578339747113, |
| "grad_norm": 0.09677782654762268, |
| "learning_rate": 9.999254027631938e-06, |
| "loss": 0.003943501785397529, |
| "memory(GiB)": 158.4, |
| "step": 45, |
| "token_acc": 0.9986528584659425, |
| "train_speed(iter/s)": 0.044994 |
| }, |
| { |
| "epoch": 0.01832508704416346, |
| "grad_norm": 0.3623986840248108, |
| "learning_rate": 9.99907905183098e-06, |
| "loss": 0.0031241703778505324, |
| "memory(GiB)": 158.4, |
| "step": 50, |
| "token_acc": 0.9987373737373737, |
| "train_speed(iter/s)": 0.04505 |
| }, |
| { |
| "epoch": 0.020157595748579806, |
| "grad_norm": 0.496895432472229, |
| "learning_rate": 9.998885659899524e-06, |
| "loss": 0.002511710487306118, |
| "memory(GiB)": 158.4, |
| "step": 55, |
| "token_acc": 0.9988217471806093, |
| "train_speed(iter/s)": 0.045107 |
| }, |
| { |
| "epoch": 0.02199010445299615, |
| "grad_norm": 0.1918005645275116, |
| "learning_rate": 9.998673852550007e-06, |
| "loss": 0.002556230500340462, |
| "memory(GiB)": 158.4, |
| "step": 60, |
| "token_acc": 0.9994104270192875, |
| "train_speed(iter/s)": 0.045164 |
| }, |
| { |
| "epoch": 0.0238226131574125, |
| "grad_norm": 0.16670851409435272, |
| "learning_rate": 9.998443630562707e-06, |
| "loss": 0.0034642994403839113, |
| "memory(GiB)": 158.4, |
| "step": 65, |
| "token_acc": 0.9989904938167746, |
| "train_speed(iter/s)": 0.045187 |
| }, |
| { |
| "epoch": 0.025655121861828844, |
| "grad_norm": 0.04445331171154976, |
| "learning_rate": 9.99819499478574e-06, |
| "loss": 0.00226197075098753, |
| "memory(GiB)": 158.4, |
| "step": 70, |
| "token_acc": 0.9994109231675503, |
| "train_speed(iter/s)": 0.045194 |
| }, |
| { |
| "epoch": 0.02748763056624519, |
| "grad_norm": 0.13421526551246643, |
| "learning_rate": 9.997927946135055e-06, |
| "loss": 0.0026616916060447694, |
| "memory(GiB)": 158.4, |
| "step": 75, |
| "token_acc": 0.998989558773998, |
| "train_speed(iter/s)": 0.04522 |
| }, |
| { |
| "epoch": 0.029320139270661537, |
| "grad_norm": 0.09873384982347488, |
| "learning_rate": 9.997642485594436e-06, |
| "loss": 0.0017027700319886207, |
| "memory(GiB)": 158.4, |
| "step": 80, |
| "token_acc": 0.9993260887878022, |
| "train_speed(iter/s)": 0.04525 |
| }, |
| { |
| "epoch": 0.03115264797507788, |
| "grad_norm": 0.03224126249551773, |
| "learning_rate": 9.997338614215492e-06, |
| "loss": 0.0017118226736783982, |
| "memory(GiB)": 158.4, |
| "step": 85, |
| "token_acc": 0.9993263725159987, |
| "train_speed(iter/s)": 0.04528 |
| }, |
| { |
| "epoch": 0.032985156679494226, |
| "grad_norm": 0.3803243637084961, |
| "learning_rate": 9.997016333117655e-06, |
| "loss": 0.0019580798223614694, |
| "memory(GiB)": 158.4, |
| "step": 90, |
| "token_acc": 0.9993265993265993, |
| "train_speed(iter/s)": 0.045299 |
| }, |
| { |
| "epoch": 0.034817665383910575, |
| "grad_norm": 0.3237900733947754, |
| "learning_rate": 9.996675643488177e-06, |
| "loss": 0.002880098670721054, |
| "memory(GiB)": 158.4, |
| "step": 95, |
| "token_acc": 0.9990737622094982, |
| "train_speed(iter/s)": 0.045329 |
| }, |
| { |
| "epoch": 0.03665017408832692, |
| "grad_norm": 0.1465182900428772, |
| "learning_rate": 9.99631654658213e-06, |
| "loss": 0.0028293343260884286, |
| "memory(GiB)": 158.4, |
| "step": 100, |
| "token_acc": 0.9990743857287109, |
| "train_speed(iter/s)": 0.045355 |
| }, |
| { |
| "epoch": 0.038482682792743264, |
| "grad_norm": 0.24748782813549042, |
| "learning_rate": 9.995939043722388e-06, |
| "loss": 0.0018339043483138085, |
| "memory(GiB)": 158.4, |
| "step": 105, |
| "token_acc": 0.9994106255788499, |
| "train_speed(iter/s)": 0.045379 |
| }, |
| { |
| "epoch": 0.04031519149715961, |
| "grad_norm": 0.04621001332998276, |
| "learning_rate": 9.995543136299636e-06, |
| "loss": 0.0019403379410505295, |
| "memory(GiB)": 158.4, |
| "step": 110, |
| "token_acc": 0.9994108735903047, |
| "train_speed(iter/s)": 0.045398 |
| }, |
| { |
| "epoch": 0.04214770020157596, |
| "grad_norm": 0.06725554913282394, |
| "learning_rate": 9.995128825772365e-06, |
| "loss": 0.0010762955993413926, |
| "memory(GiB)": 158.4, |
| "step": 115, |
| "token_acc": 0.9995792308339645, |
| "train_speed(iter/s)": 0.045421 |
| }, |
| { |
| "epoch": 0.0439802089059923, |
| "grad_norm": 0.16836291551589966, |
| "learning_rate": 9.99469611366685e-06, |
| "loss": 0.0029191805049777033, |
| "memory(GiB)": 158.4, |
| "step": 120, |
| "token_acc": 0.9990743857287109, |
| "train_speed(iter/s)": 0.045438 |
| }, |
| { |
| "epoch": 0.04581271761040865, |
| "grad_norm": 0.19015128910541534, |
| "learning_rate": 9.994245001577163e-06, |
| "loss": 0.0029153132811188696, |
| "memory(GiB)": 158.4, |
| "step": 125, |
| "token_acc": 0.9988206553786538, |
| "train_speed(iter/s)": 0.045454 |
| }, |
| { |
| "epoch": 0.047645226314825, |
| "grad_norm": 0.2672649919986725, |
| "learning_rate": 9.993775491165157e-06, |
| "loss": 0.0028599994257092476, |
| "memory(GiB)": 158.4, |
| "step": 130, |
| "token_acc": 0.9989905787348586, |
| "train_speed(iter/s)": 0.045477 |
| }, |
| { |
| "epoch": 0.04947773501924134, |
| "grad_norm": 0.09613120555877686, |
| "learning_rate": 9.993287584160462e-06, |
| "loss": 0.001117743458598852, |
| "memory(GiB)": 158.4, |
| "step": 135, |
| "token_acc": 0.9996634129922585, |
| "train_speed(iter/s)": 0.045484 |
| }, |
| { |
| "epoch": 0.05131024372365769, |
| "grad_norm": 0.08400937169790268, |
| "learning_rate": 9.992781282360486e-06, |
| "loss": 0.0014099805615842343, |
| "memory(GiB)": 158.4, |
| "step": 140, |
| "token_acc": 0.9995794785534062, |
| "train_speed(iter/s)": 0.045497 |
| }, |
| { |
| "epoch": 0.053142752428074036, |
| "grad_norm": 0.2961122989654541, |
| "learning_rate": 9.992256587630392e-06, |
| "loss": 0.0026107219979166984, |
| "memory(GiB)": 158.4, |
| "step": 145, |
| "token_acc": 0.9993264292329713, |
| "train_speed(iter/s)": 0.045509 |
| }, |
| { |
| "epoch": 0.05497526113249038, |
| "grad_norm": 0.11588957160711288, |
| "learning_rate": 9.991713501903107e-06, |
| "loss": 0.0020393442362546923, |
| "memory(GiB)": 158.4, |
| "step": 150, |
| "token_acc": 0.9991583908432924, |
| "train_speed(iter/s)": 0.045518 |
| }, |
| { |
| "epoch": 0.056807769836906725, |
| "grad_norm": 0.04025767371058464, |
| "learning_rate": 9.991152027179307e-06, |
| "loss": 0.001108243688941002, |
| "memory(GiB)": 158.4, |
| "step": 155, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.045528 |
| }, |
| { |
| "epoch": 0.058640278541323074, |
| "grad_norm": 0.26148226857185364, |
| "learning_rate": 9.990572165527413e-06, |
| "loss": 0.003043392114341259, |
| "memory(GiB)": 158.4, |
| "step": 160, |
| "token_acc": 0.9991581074254925, |
| "train_speed(iter/s)": 0.045537 |
| }, |
| { |
| "epoch": 0.060472787245739415, |
| "grad_norm": 0.02609323337674141, |
| "learning_rate": 9.989973919083576e-06, |
| "loss": 0.003145371749997139, |
| "memory(GiB)": 158.4, |
| "step": 165, |
| "token_acc": 0.9989058160087535, |
| "train_speed(iter/s)": 0.045548 |
| }, |
| { |
| "epoch": 0.06230529595015576, |
| "grad_norm": 0.08112650364637375, |
| "learning_rate": 9.989357290051681e-06, |
| "loss": 0.0019015805795788766, |
| "memory(GiB)": 158.4, |
| "step": 170, |
| "token_acc": 0.9991585324806462, |
| "train_speed(iter/s)": 0.045556 |
| }, |
| { |
| "epoch": 0.06413780465457211, |
| "grad_norm": 0.012307146564126015, |
| "learning_rate": 9.98872228070333e-06, |
| "loss": 0.0017634263262152673, |
| "memory(GiB)": 158.4, |
| "step": 175, |
| "token_acc": 0.9994951619688683, |
| "train_speed(iter/s)": 0.045559 |
| }, |
| { |
| "epoch": 0.06597031335898845, |
| "grad_norm": 0.22926685214042664, |
| "learning_rate": 9.988068893377841e-06, |
| "loss": 0.0008580862544476986, |
| "memory(GiB)": 158.4, |
| "step": 180, |
| "token_acc": 0.9996634413125789, |
| "train_speed(iter/s)": 0.045562 |
| }, |
| { |
| "epoch": 0.06780282206340481, |
| "grad_norm": 0.07493411749601364, |
| "learning_rate": 9.987397130482224e-06, |
| "loss": 0.001726461760699749, |
| "memory(GiB)": 158.4, |
| "step": 185, |
| "token_acc": 0.9994107744107744, |
| "train_speed(iter/s)": 0.045574 |
| }, |
| { |
| "epoch": 0.06963533076782115, |
| "grad_norm": 0.11616482585668564, |
| "learning_rate": 9.986706994491194e-06, |
| "loss": 0.0020760688930749893, |
| "memory(GiB)": 158.4, |
| "step": 190, |
| "token_acc": 0.999494779386999, |
| "train_speed(iter/s)": 0.04558 |
| }, |
| { |
| "epoch": 0.07146783947223749, |
| "grad_norm": 0.1130843311548233, |
| "learning_rate": 9.985998487947143e-06, |
| "loss": 0.003568219020962715, |
| "memory(GiB)": 158.4, |
| "step": 195, |
| "token_acc": 0.9988221436984688, |
| "train_speed(iter/s)": 0.045588 |
| }, |
| { |
| "epoch": 0.07330034817665385, |
| "grad_norm": 0.03086119331419468, |
| "learning_rate": 9.985271613460144e-06, |
| "loss": 0.0014082181267440319, |
| "memory(GiB)": 158.4, |
| "step": 200, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.045593 |
| }, |
| { |
| "epoch": 0.07513285688107019, |
| "grad_norm": 0.10936316847801208, |
| "learning_rate": 9.984526373707933e-06, |
| "loss": 0.0023099591955542563, |
| "memory(GiB)": 158.4, |
| "step": 205, |
| "token_acc": 0.999242615501136, |
| "train_speed(iter/s)": 0.045599 |
| }, |
| { |
| "epoch": 0.07696536558548653, |
| "grad_norm": 0.17849738895893097, |
| "learning_rate": 9.983762771435902e-06, |
| "loss": 0.0017316842451691628, |
| "memory(GiB)": 158.4, |
| "step": 210, |
| "token_acc": 0.9995793016407236, |
| "train_speed(iter/s)": 0.0456 |
| }, |
| { |
| "epoch": 0.07879787428990288, |
| "grad_norm": 0.07379074394702911, |
| "learning_rate": 9.982980809457088e-06, |
| "loss": 0.001504539605230093, |
| "memory(GiB)": 158.4, |
| "step": 215, |
| "token_acc": 0.99949499200404, |
| "train_speed(iter/s)": 0.045601 |
| }, |
| { |
| "epoch": 0.08063038299431922, |
| "grad_norm": 0.20956623554229736, |
| "learning_rate": 9.982180490652165e-06, |
| "loss": 0.001286138966679573, |
| "memory(GiB)": 158.4, |
| "step": 220, |
| "token_acc": 0.9997476022211005, |
| "train_speed(iter/s)": 0.045606 |
| }, |
| { |
| "epoch": 0.08246289169873557, |
| "grad_norm": 0.36039137840270996, |
| "learning_rate": 9.981361817969433e-06, |
| "loss": 0.0015822691842913628, |
| "memory(GiB)": 158.4, |
| "step": 225, |
| "token_acc": 0.999494779386999, |
| "train_speed(iter/s)": 0.045612 |
| }, |
| { |
| "epoch": 0.08429540040315192, |
| "grad_norm": 0.05167197808623314, |
| "learning_rate": 9.9805247944248e-06, |
| "loss": 0.0016318798065185548, |
| "memory(GiB)": 158.4, |
| "step": 230, |
| "token_acc": 0.9994951194883878, |
| "train_speed(iter/s)": 0.045618 |
| }, |
| { |
| "epoch": 0.08612790910756826, |
| "grad_norm": 0.0602310486137867, |
| "learning_rate": 9.979669423101784e-06, |
| "loss": 0.0017338620498776435, |
| "memory(GiB)": 158.4, |
| "step": 235, |
| "token_acc": 0.9992421690804985, |
| "train_speed(iter/s)": 0.045622 |
| }, |
| { |
| "epoch": 0.0879604178119846, |
| "grad_norm": 0.03006557747721672, |
| "learning_rate": 9.978795707151492e-06, |
| "loss": 0.0005913118831813336, |
| "memory(GiB)": 158.4, |
| "step": 240, |
| "token_acc": 0.9997476659096644, |
| "train_speed(iter/s)": 0.045626 |
| }, |
| { |
| "epoch": 0.08979292651640096, |
| "grad_norm": 0.1851363480091095, |
| "learning_rate": 9.977903649792606e-06, |
| "loss": 0.0013333003968000411, |
| "memory(GiB)": 158.4, |
| "step": 245, |
| "token_acc": 0.9995793016407236, |
| "train_speed(iter/s)": 0.04562 |
| }, |
| { |
| "epoch": 0.0916254352208173, |
| "grad_norm": 0.16427940130233765, |
| "learning_rate": 9.976993254311385e-06, |
| "loss": 0.0022492580115795135, |
| "memory(GiB)": 158.4, |
| "step": 250, |
| "token_acc": 0.999326259053394, |
| "train_speed(iter/s)": 0.045566 |
| }, |
| { |
| "epoch": 0.09345794392523364, |
| "grad_norm": 0.07113044708967209, |
| "learning_rate": 9.976064524061637e-06, |
| "loss": 0.0023244613781571387, |
| "memory(GiB)": 158.4, |
| "step": 255, |
| "token_acc": 0.9994107744107744, |
| "train_speed(iter/s)": 0.04552 |
| }, |
| { |
| "epoch": 0.09529045262965, |
| "grad_norm": 0.0672680214047432, |
| "learning_rate": 9.975117462464716e-06, |
| "loss": 0.0020451253280043603, |
| "memory(GiB)": 158.4, |
| "step": 260, |
| "token_acc": 0.9994105263157895, |
| "train_speed(iter/s)": 0.045509 |
| }, |
| { |
| "epoch": 0.09712296133406634, |
| "grad_norm": 0.09312908351421356, |
| "learning_rate": 9.974152073009506e-06, |
| "loss": 0.0018878720700740814, |
| "memory(GiB)": 158.4, |
| "step": 265, |
| "token_acc": 0.9994954167017072, |
| "train_speed(iter/s)": 0.045482 |
| }, |
| { |
| "epoch": 0.09895547003848268, |
| "grad_norm": 0.06397019326686859, |
| "learning_rate": 9.973168359252411e-06, |
| "loss": 0.0020165286958217623, |
| "memory(GiB)": 158.4, |
| "step": 270, |
| "token_acc": 0.9994108735903047, |
| "train_speed(iter/s)": 0.045476 |
| }, |
| { |
| "epoch": 0.10078797874289903, |
| "grad_norm": 0.15306073427200317, |
| "learning_rate": 9.972166324817338e-06, |
| "loss": 0.0017529357224702834, |
| "memory(GiB)": 158.4, |
| "step": 275, |
| "token_acc": 0.9997474109623642, |
| "train_speed(iter/s)": 0.045455 |
| }, |
| { |
| "epoch": 0.10262048744731538, |
| "grad_norm": 0.13208770751953125, |
| "learning_rate": 9.971145973395685e-06, |
| "loss": 0.001645715907216072, |
| "memory(GiB)": 158.4, |
| "step": 280, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.045452 |
| }, |
| { |
| "epoch": 0.10445299615173172, |
| "grad_norm": 0.0297766774892807, |
| "learning_rate": 9.97010730874633e-06, |
| "loss": 0.0012823720462620258, |
| "memory(GiB)": 158.4, |
| "step": 285, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.045422 |
| }, |
| { |
| "epoch": 0.10628550485614807, |
| "grad_norm": 0.16176588833332062, |
| "learning_rate": 9.969050334695619e-06, |
| "loss": 0.001742975413799286, |
| "memory(GiB)": 158.4, |
| "step": 290, |
| "token_acc": 0.9995788764423482, |
| "train_speed(iter/s)": 0.045417 |
| }, |
| { |
| "epoch": 0.10811801356056441, |
| "grad_norm": 0.10822831094264984, |
| "learning_rate": 9.967975055137335e-06, |
| "loss": 0.002227822504937649, |
| "memory(GiB)": 158.4, |
| "step": 295, |
| "token_acc": 0.9994103773584906, |
| "train_speed(iter/s)": 0.045373 |
| }, |
| { |
| "epoch": 0.10995052226498075, |
| "grad_norm": 0.1328648328781128, |
| "learning_rate": 9.966881474032711e-06, |
| "loss": 0.0017272233963012695, |
| "memory(GiB)": 158.4, |
| "step": 300, |
| "token_acc": 0.9994105759514988, |
| "train_speed(iter/s)": 0.045362 |
| }, |
| { |
| "epoch": 0.11178303096939711, |
| "grad_norm": 0.11945555359125137, |
| "learning_rate": 9.965769595410395e-06, |
| "loss": 0.0011399961076676846, |
| "memory(GiB)": 158.4, |
| "step": 305, |
| "token_acc": 0.9995791954216462, |
| "train_speed(iter/s)": 0.045363 |
| }, |
| { |
| "epoch": 0.11361553967381345, |
| "grad_norm": 0.2175164371728897, |
| "learning_rate": 9.964639423366442e-06, |
| "loss": 0.0025836611166596413, |
| "memory(GiB)": 158.4, |
| "step": 310, |
| "token_acc": 0.9990738401953355, |
| "train_speed(iter/s)": 0.045357 |
| }, |
| { |
| "epoch": 0.11544804837822979, |
| "grad_norm": 0.035975273698568344, |
| "learning_rate": 9.963490962064297e-06, |
| "loss": 0.0006968880537897348, |
| "memory(GiB)": 158.4, |
| "step": 315, |
| "token_acc": 0.9997475385003787, |
| "train_speed(iter/s)": 0.04536 |
| }, |
| { |
| "epoch": 0.11728055708264615, |
| "grad_norm": 0.14850489795207977, |
| "learning_rate": 9.962324215734782e-06, |
| "loss": 0.0017726331949234008, |
| "memory(GiB)": 158.4, |
| "step": 320, |
| "token_acc": 0.999242615501136, |
| "train_speed(iter/s)": 0.045365 |
| }, |
| { |
| "epoch": 0.11911306578706249, |
| "grad_norm": 0.03455163165926933, |
| "learning_rate": 9.96113918867608e-06, |
| "loss": 0.0013269748538732528, |
| "memory(GiB)": 158.4, |
| "step": 325, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.045365 |
| }, |
| { |
| "epoch": 0.12094557449147883, |
| "grad_norm": 0.23186658322811127, |
| "learning_rate": 9.959935885253715e-06, |
| "loss": 0.0010508694685995579, |
| "memory(GiB)": 158.4, |
| "step": 330, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.045369 |
| }, |
| { |
| "epoch": 0.12277808319589519, |
| "grad_norm": 0.06666416674852371, |
| "learning_rate": 9.958714309900546e-06, |
| "loss": 0.0009142296388745308, |
| "memory(GiB)": 158.4, |
| "step": 335, |
| "token_acc": 0.9995789119083712, |
| "train_speed(iter/s)": 0.045376 |
| }, |
| { |
| "epoch": 0.12461059190031153, |
| "grad_norm": 0.014640443958342075, |
| "learning_rate": 9.957474467116739e-06, |
| "loss": 0.0024377334862947463, |
| "memory(GiB)": 158.4, |
| "step": 340, |
| "token_acc": 0.9992424880060601, |
| "train_speed(iter/s)": 0.045382 |
| }, |
| { |
| "epoch": 0.12644310060472788, |
| "grad_norm": 0.15044739842414856, |
| "learning_rate": 9.956216361469755e-06, |
| "loss": 0.002022208273410797, |
| "memory(GiB)": 158.4, |
| "step": 345, |
| "token_acc": 0.9994952893674294, |
| "train_speed(iter/s)": 0.045388 |
| }, |
| { |
| "epoch": 0.12827560930914422, |
| "grad_norm": 0.012829025276005268, |
| "learning_rate": 9.954939997594335e-06, |
| "loss": 0.003057861886918545, |
| "memory(GiB)": 158.4, |
| "step": 350, |
| "token_acc": 0.9992422966829433, |
| "train_speed(iter/s)": 0.045394 |
| }, |
| { |
| "epoch": 0.13010811801356056, |
| "grad_norm": 0.02966240420937538, |
| "learning_rate": 9.953645380192485e-06, |
| "loss": 0.0017476610839366913, |
| "memory(GiB)": 158.4, |
| "step": 355, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.045399 |
| }, |
| { |
| "epoch": 0.1319406267179769, |
| "grad_norm": 0.0715402215719223, |
| "learning_rate": 9.952332514033449e-06, |
| "loss": 0.0023743031546473504, |
| "memory(GiB)": 158.4, |
| "step": 360, |
| "token_acc": 0.9991585324806462, |
| "train_speed(iter/s)": 0.045407 |
| }, |
| { |
| "epoch": 0.13377313542239325, |
| "grad_norm": 0.07701452821493149, |
| "learning_rate": 9.9510014039537e-06, |
| "loss": 0.0022863084450364113, |
| "memory(GiB)": 158.4, |
| "step": 365, |
| "token_acc": 0.9994110222970131, |
| "train_speed(iter/s)": 0.04541 |
| }, |
| { |
| "epoch": 0.13560564412680962, |
| "grad_norm": 0.09453430771827698, |
| "learning_rate": 9.949652054856924e-06, |
| "loss": 0.0019000820815563203, |
| "memory(GiB)": 158.4, |
| "step": 370, |
| "token_acc": 0.9993265426382693, |
| "train_speed(iter/s)": 0.045415 |
| }, |
| { |
| "epoch": 0.13743815283122596, |
| "grad_norm": 0.0394257977604866, |
| "learning_rate": 9.948284471713994e-06, |
| "loss": 0.0016634922474622726, |
| "memory(GiB)": 158.4, |
| "step": 375, |
| "token_acc": 0.9994104766717197, |
| "train_speed(iter/s)": 0.045419 |
| }, |
| { |
| "epoch": 0.1392706615356423, |
| "grad_norm": 0.04517311230301857, |
| "learning_rate": 9.94689865956295e-06, |
| "loss": 0.0017285166308283807, |
| "memory(GiB)": 158.4, |
| "step": 380, |
| "token_acc": 0.9994948644552955, |
| "train_speed(iter/s)": 0.045425 |
| }, |
| { |
| "epoch": 0.14110317024005864, |
| "grad_norm": 0.07294133305549622, |
| "learning_rate": 9.945494623509003e-06, |
| "loss": 0.000422241585329175, |
| "memory(GiB)": 158.4, |
| "step": 385, |
| "token_acc": 0.9999158390843292, |
| "train_speed(iter/s)": 0.045427 |
| }, |
| { |
| "epoch": 0.14293567894447498, |
| "grad_norm": 0.06523015350103378, |
| "learning_rate": 9.944072368724476e-06, |
| "loss": 0.0024235062301158905, |
| "memory(GiB)": 158.4, |
| "step": 390, |
| "token_acc": 0.9994953318193288, |
| "train_speed(iter/s)": 0.045433 |
| }, |
| { |
| "epoch": 0.14476818764889132, |
| "grad_norm": 0.0444883331656456, |
| "learning_rate": 9.942631900448827e-06, |
| "loss": 0.0009868125431239604, |
| "memory(GiB)": 158.4, |
| "step": 395, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.045437 |
| }, |
| { |
| "epoch": 0.1466006963533077, |
| "grad_norm": 0.01692277006804943, |
| "learning_rate": 9.941173223988603e-06, |
| "loss": 0.0023114632815122603, |
| "memory(GiB)": 158.4, |
| "step": 400, |
| "token_acc": 0.9993263725159987, |
| "train_speed(iter/s)": 0.045442 |
| }, |
| { |
| "epoch": 0.14843320505772403, |
| "grad_norm": 0.02756733074784279, |
| "learning_rate": 9.939696344717427e-06, |
| "loss": 0.0015292948111891747, |
| "memory(GiB)": 158.4, |
| "step": 405, |
| "token_acc": 0.9994107744107744, |
| "train_speed(iter/s)": 0.045444 |
| }, |
| { |
| "epoch": 0.15026571376214037, |
| "grad_norm": 0.09074392169713974, |
| "learning_rate": 9.938201268075982e-06, |
| "loss": 0.0020554307848215103, |
| "memory(GiB)": 158.4, |
| "step": 410, |
| "token_acc": 0.9992423604680528, |
| "train_speed(iter/s)": 0.045448 |
| }, |
| { |
| "epoch": 0.15209822246655672, |
| "grad_norm": 0.07123276591300964, |
| "learning_rate": 9.936687999571987e-06, |
| "loss": 0.0014599796384572982, |
| "memory(GiB)": 158.4, |
| "step": 415, |
| "token_acc": 0.9994952044422009, |
| "train_speed(iter/s)": 0.045449 |
| }, |
| { |
| "epoch": 0.15393073117097306, |
| "grad_norm": 0.07088897377252579, |
| "learning_rate": 9.935156544780183e-06, |
| "loss": 0.0010397397913038731, |
| "memory(GiB)": 158.4, |
| "step": 420, |
| "token_acc": 0.9996633846671716, |
| "train_speed(iter/s)": 0.045448 |
| }, |
| { |
| "epoch": 0.1557632398753894, |
| "grad_norm": 0.1305522322654724, |
| "learning_rate": 9.9336069093423e-06, |
| "loss": 0.0015219044871628284, |
| "memory(GiB)": 158.4, |
| "step": 425, |
| "token_acc": 0.9994950770007573, |
| "train_speed(iter/s)": 0.045451 |
| }, |
| { |
| "epoch": 0.15759574857980577, |
| "grad_norm": 0.03542817756533623, |
| "learning_rate": 9.932039098967046e-06, |
| "loss": 0.002127250283956528, |
| "memory(GiB)": 158.4, |
| "step": 430, |
| "token_acc": 0.9994949494949495, |
| "train_speed(iter/s)": 0.045456 |
| }, |
| { |
| "epoch": 0.1594282572842221, |
| "grad_norm": 0.14930537343025208, |
| "learning_rate": 9.930453119430086e-06, |
| "loss": 0.000645923474803567, |
| "memory(GiB)": 158.4, |
| "step": 435, |
| "token_acc": 0.9997474960020201, |
| "train_speed(iter/s)": 0.045458 |
| }, |
| { |
| "epoch": 0.16126076598863845, |
| "grad_norm": 0.10225468873977661, |
| "learning_rate": 9.92884897657402e-06, |
| "loss": 0.000911066122353077, |
| "memory(GiB)": 158.4, |
| "step": 440, |
| "token_acc": 0.9997473471450228, |
| "train_speed(iter/s)": 0.045415 |
| }, |
| { |
| "epoch": 0.1630932746930548, |
| "grad_norm": 0.05018873140215874, |
| "learning_rate": 9.927226676308354e-06, |
| "loss": 0.00166127011179924, |
| "memory(GiB)": 158.4, |
| "step": 445, |
| "token_acc": 0.9997476871320438, |
| "train_speed(iter/s)": 0.045381 |
| }, |
| { |
| "epoch": 0.16492578339747113, |
| "grad_norm": 0.17071396112442017, |
| "learning_rate": 9.925586224609489e-06, |
| "loss": 0.0025668978691101075, |
| "memory(GiB)": 158.4, |
| "step": 450, |
| "token_acc": 0.9994110718492344, |
| "train_speed(iter/s)": 0.045381 |
| }, |
| { |
| "epoch": 0.16675829210188747, |
| "grad_norm": 0.008416908793151379, |
| "learning_rate": 9.923927627520694e-06, |
| "loss": 0.000798144843429327, |
| "memory(GiB)": 158.4, |
| "step": 455, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.045382 |
| }, |
| { |
| "epoch": 0.16859080080630384, |
| "grad_norm": 0.1326538473367691, |
| "learning_rate": 9.922250891152078e-06, |
| "loss": 0.0013757062144577504, |
| "memory(GiB)": 158.4, |
| "step": 460, |
| "token_acc": 0.9994102780117945, |
| "train_speed(iter/s)": 0.045388 |
| }, |
| { |
| "epoch": 0.17042330951072018, |
| "grad_norm": 0.10151144862174988, |
| "learning_rate": 9.92055602168058e-06, |
| "loss": 0.0008957336656749248, |
| "memory(GiB)": 158.4, |
| "step": 465, |
| "token_acc": 0.9996634696281339, |
| "train_speed(iter/s)": 0.045392 |
| }, |
| { |
| "epoch": 0.17225581821513652, |
| "grad_norm": 0.09111111611127853, |
| "learning_rate": 9.918843025349941e-06, |
| "loss": 0.0013033418916165828, |
| "memory(GiB)": 158.4, |
| "step": 470, |
| "token_acc": 0.9995792308339645, |
| "train_speed(iter/s)": 0.045396 |
| }, |
| { |
| "epoch": 0.17408832691955287, |
| "grad_norm": 0.029473107308149338, |
| "learning_rate": 9.917111908470673e-06, |
| "loss": 0.0013312675058841706, |
| "memory(GiB)": 158.4, |
| "step": 475, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.0454 |
| }, |
| { |
| "epoch": 0.1759208356239692, |
| "grad_norm": 0.1001836434006691, |
| "learning_rate": 9.915362677420045e-06, |
| "loss": 0.0019773678854107858, |
| "memory(GiB)": 158.4, |
| "step": 480, |
| "token_acc": 0.9991580365412142, |
| "train_speed(iter/s)": 0.045406 |
| }, |
| { |
| "epoch": 0.17775334432838555, |
| "grad_norm": 0.047665633261203766, |
| "learning_rate": 9.913595338642059e-06, |
| "loss": 0.0014092091470956803, |
| "memory(GiB)": 158.4, |
| "step": 485, |
| "token_acc": 0.9997473896934995, |
| "train_speed(iter/s)": 0.045408 |
| }, |
| { |
| "epoch": 0.17958585303280192, |
| "grad_norm": 0.02579871006309986, |
| "learning_rate": 9.91180989864742e-06, |
| "loss": 0.0007158961612731219, |
| "memory(GiB)": 158.4, |
| "step": 490, |
| "token_acc": 0.9996631862579993, |
| "train_speed(iter/s)": 0.045412 |
| }, |
| { |
| "epoch": 0.18141836173721826, |
| "grad_norm": 0.028310472145676613, |
| "learning_rate": 9.910006364013522e-06, |
| "loss": 0.0007194250822067261, |
| "memory(GiB)": 158.4, |
| "step": 495, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.045414 |
| }, |
| { |
| "epoch": 0.1832508704416346, |
| "grad_norm": 0.12511947751045227, |
| "learning_rate": 9.908184741384412e-06, |
| "loss": 0.0015858769416809081, |
| "memory(GiB)": 158.4, |
| "step": 500, |
| "token_acc": 0.999663242970197, |
| "train_speed(iter/s)": 0.045418 |
| }, |
| { |
| "epoch": 0.1832508704416346, |
| "eval_loss": 0.0010450058616697788, |
| "eval_runtime": 172.5985, |
| "eval_samples_per_second": 2.549, |
| "eval_steps_per_second": 2.549, |
| "eval_token_acc": 0.9996786189798448, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18508337914605094, |
| "grad_norm": 0.007651148363947868, |
| "learning_rate": 9.906345037470776e-06, |
| "loss": 0.0017563182860612868, |
| "memory(GiB)": 160.86, |
| "step": 505, |
| "token_acc": 0.9996503360658923, |
| "train_speed(iter/s)": 0.0421 |
| }, |
| { |
| "epoch": 0.18691588785046728, |
| "grad_norm": 0.06049024686217308, |
| "learning_rate": 9.904487259049907e-06, |
| "loss": 0.0015754606574773788, |
| "memory(GiB)": 160.86, |
| "step": 510, |
| "token_acc": 0.9991582491582491, |
| "train_speed(iter/s)": 0.0421 |
| }, |
| { |
| "epoch": 0.18874839655488362, |
| "grad_norm": 0.06416209042072296, |
| "learning_rate": 9.902611412965681e-06, |
| "loss": 0.0016123156994581223, |
| "memory(GiB)": 160.86, |
| "step": 515, |
| "token_acc": 0.9994950770007573, |
| "train_speed(iter/s)": 0.042128 |
| }, |
| { |
| "epoch": 0.1905809052593, |
| "grad_norm": 0.028774991631507874, |
| "learning_rate": 9.90071750612854e-06, |
| "loss": 0.001327525917440653, |
| "memory(GiB)": 160.86, |
| "step": 520, |
| "token_acc": 0.999326825984517, |
| "train_speed(iter/s)": 0.042161 |
| }, |
| { |
| "epoch": 0.19241341396371633, |
| "grad_norm": 0.01806553080677986, |
| "learning_rate": 9.898805545515455e-06, |
| "loss": 0.0018014278262853622, |
| "memory(GiB)": 160.86, |
| "step": 525, |
| "token_acc": 0.999494779386999, |
| "train_speed(iter/s)": 0.042191 |
| }, |
| { |
| "epoch": 0.19424592266813268, |
| "grad_norm": 0.022810854017734528, |
| "learning_rate": 9.896875538169906e-06, |
| "loss": 0.0012151801958680153, |
| "memory(GiB)": 160.86, |
| "step": 530, |
| "token_acc": 0.9996629876147949, |
| "train_speed(iter/s)": 0.042224 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 0.11561686545610428, |
| "learning_rate": 9.894927491201856e-06, |
| "loss": 0.0021266091614961626, |
| "memory(GiB)": 160.86, |
| "step": 535, |
| "token_acc": 0.9994109727364524, |
| "train_speed(iter/s)": 0.042231 |
| }, |
| { |
| "epoch": 0.19791094007696536, |
| "grad_norm": 0.06175706535577774, |
| "learning_rate": 9.892961411787725e-06, |
| "loss": 0.0011159414425492287, |
| "memory(GiB)": 160.86, |
| "step": 540, |
| "token_acc": 0.9996632146164857, |
| "train_speed(iter/s)": 0.042227 |
| }, |
| { |
| "epoch": 0.1997434487813817, |
| "grad_norm": 0.05753181502223015, |
| "learning_rate": 9.890977307170362e-06, |
| "loss": 0.001347663253545761, |
| "memory(GiB)": 160.86, |
| "step": 545, |
| "token_acc": 0.9994108240047134, |
| "train_speed(iter/s)": 0.042253 |
| }, |
| { |
| "epoch": 0.20157595748579807, |
| "grad_norm": 0.02328096143901348, |
| "learning_rate": 9.888975184659018e-06, |
| "loss": 0.0003634607419371605, |
| "memory(GiB)": 160.86, |
| "step": 550, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.042275 |
| }, |
| { |
| "epoch": 0.2034084661902144, |
| "grad_norm": 0.06188211217522621, |
| "learning_rate": 9.886955051629322e-06, |
| "loss": 0.001550444681197405, |
| "memory(GiB)": 160.86, |
| "step": 555, |
| "token_acc": 0.9994948219247285, |
| "train_speed(iter/s)": 0.042291 |
| }, |
| { |
| "epoch": 0.20524097489463075, |
| "grad_norm": 0.1453787237405777, |
| "learning_rate": 9.88491691552325e-06, |
| "loss": 0.001519276574254036, |
| "memory(GiB)": 160.86, |
| "step": 560, |
| "token_acc": 0.9992421052631579, |
| "train_speed(iter/s)": 0.042293 |
| }, |
| { |
| "epoch": 0.2070734835990471, |
| "grad_norm": 0.023789288476109505, |
| "learning_rate": 9.882860783849106e-06, |
| "loss": 0.00029240711592137814, |
| "memory(GiB)": 160.86, |
| "step": 565, |
| "token_acc": 0.9999158107425492, |
| "train_speed(iter/s)": 0.042306 |
| }, |
| { |
| "epoch": 0.20890599230346343, |
| "grad_norm": 0.01045987755060196, |
| "learning_rate": 9.880786664181477e-06, |
| "loss": 0.0012256539426743983, |
| "memory(GiB)": 160.86, |
| "step": 570, |
| "token_acc": 0.999579018270607, |
| "train_speed(iter/s)": 0.042302 |
| }, |
| { |
| "epoch": 0.21073850100787977, |
| "grad_norm": 0.011777155101299286, |
| "learning_rate": 9.878694564161227e-06, |
| "loss": 0.00046466137282550333, |
| "memory(GiB)": 160.86, |
| "step": 575, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.042318 |
| }, |
| { |
| "epoch": 0.21257100971229614, |
| "grad_norm": 0.23171444237232208, |
| "learning_rate": 9.876584491495448e-06, |
| "loss": 0.0011185991577804088, |
| "memory(GiB)": 160.86, |
| "step": 580, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.042309 |
| }, |
| { |
| "epoch": 0.21440351841671249, |
| "grad_norm": 0.049548666924238205, |
| "learning_rate": 9.87445645395745e-06, |
| "loss": 0.0009535157121717929, |
| "memory(GiB)": 160.86, |
| "step": 585, |
| "token_acc": 0.9995791954216462, |
| "train_speed(iter/s)": 0.042334 |
| }, |
| { |
| "epoch": 0.21623602712112883, |
| "grad_norm": 0.022135065868496895, |
| "learning_rate": 9.87231045938672e-06, |
| "loss": 0.0012145033106207848, |
| "memory(GiB)": 160.86, |
| "step": 590, |
| "token_acc": 0.999663129526697, |
| "train_speed(iter/s)": 0.042348 |
| }, |
| { |
| "epoch": 0.21806853582554517, |
| "grad_norm": 0.20922328531742096, |
| "learning_rate": 9.870146515688896e-06, |
| "loss": 0.0015425698831677437, |
| "memory(GiB)": 160.86, |
| "step": 595, |
| "token_acc": 0.999578947368421, |
| "train_speed(iter/s)": 0.04231 |
| }, |
| { |
| "epoch": 0.2199010445299615, |
| "grad_norm": 0.027032975107431412, |
| "learning_rate": 9.867964630835742e-06, |
| "loss": 0.00022940777707844973, |
| "memory(GiB)": 160.86, |
| "step": 600, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.042103 |
| }, |
| { |
| "epoch": 0.22173355323437785, |
| "grad_norm": 0.016112059354782104, |
| "learning_rate": 9.865764812865113e-06, |
| "loss": 0.0013837903738021851, |
| "memory(GiB)": 160.86, |
| "step": 605, |
| "token_acc": 0.9996631862579993, |
| "train_speed(iter/s)": 0.042119 |
| }, |
| { |
| "epoch": 0.22356606193879422, |
| "grad_norm": 0.03569135442376137, |
| "learning_rate": 9.863547069880928e-06, |
| "loss": 0.002841825969517231, |
| "memory(GiB)": 160.86, |
| "step": 610, |
| "token_acc": 0.9993265993265993, |
| "train_speed(iter/s)": 0.042143 |
| }, |
| { |
| "epoch": 0.22539857064321056, |
| "grad_norm": 0.04555279016494751, |
| "learning_rate": 9.86131141005314e-06, |
| "loss": 0.012712681293487548, |
| "memory(GiB)": 160.86, |
| "step": 615, |
| "token_acc": 0.9986531986531987, |
| "train_speed(iter/s)": 0.042163 |
| }, |
| { |
| "epoch": 0.2272310793476269, |
| "grad_norm": 0.09330299496650696, |
| "learning_rate": 9.859057841617709e-06, |
| "loss": 0.007313913106918335, |
| "memory(GiB)": 160.86, |
| "step": 620, |
| "token_acc": 0.9966310115387855, |
| "train_speed(iter/s)": 0.042185 |
| }, |
| { |
| "epoch": 0.22906358805204324, |
| "grad_norm": 0.04176206886768341, |
| "learning_rate": 9.856786372876565e-06, |
| "loss": 0.0030346425250172616, |
| "memory(GiB)": 160.86, |
| "step": 625, |
| "token_acc": 0.9989054475035783, |
| "train_speed(iter/s)": 0.042207 |
| }, |
| { |
| "epoch": 0.23089609675645958, |
| "grad_norm": 0.0391584113240242, |
| "learning_rate": 9.854497012197581e-06, |
| "loss": 0.0021283647045493128, |
| "memory(GiB)": 160.86, |
| "step": 630, |
| "token_acc": 0.999494779386999, |
| "train_speed(iter/s)": 0.042231 |
| }, |
| { |
| "epoch": 0.23272860546087593, |
| "grad_norm": 0.06570518761873245, |
| "learning_rate": 9.852189768014547e-06, |
| "loss": 0.0012692485004663467, |
| "memory(GiB)": 160.86, |
| "step": 635, |
| "token_acc": 0.9994950770007573, |
| "train_speed(iter/s)": 0.042249 |
| }, |
| { |
| "epoch": 0.2345611141652923, |
| "grad_norm": 0.04750160127878189, |
| "learning_rate": 9.849864648827126e-06, |
| "loss": 0.001050265971571207, |
| "memory(GiB)": 160.86, |
| "step": 640, |
| "token_acc": 0.9996630727762803, |
| "train_speed(iter/s)": 0.042274 |
| }, |
| { |
| "epoch": 0.23639362286970864, |
| "grad_norm": 0.012142885476350784, |
| "learning_rate": 9.847521663200837e-06, |
| "loss": 0.00046721328981220723, |
| "memory(GiB)": 160.86, |
| "step": 645, |
| "token_acc": 0.9998315363881402, |
| "train_speed(iter/s)": 0.042296 |
| }, |
| { |
| "epoch": 0.23822613157412498, |
| "grad_norm": 0.0755368173122406, |
| "learning_rate": 9.845160819767017e-06, |
| "loss": 0.0013550316914916038, |
| "memory(GiB)": 160.86, |
| "step": 650, |
| "token_acc": 0.9995790891489182, |
| "train_speed(iter/s)": 0.042321 |
| }, |
| { |
| "epoch": 0.24005864027854132, |
| "grad_norm": 0.07237580418586731, |
| "learning_rate": 9.842782127222786e-06, |
| "loss": 0.002187203988432884, |
| "memory(GiB)": 160.86, |
| "step": 655, |
| "token_acc": 0.9994101786316144, |
| "train_speed(iter/s)": 0.042344 |
| }, |
| { |
| "epoch": 0.24189114898295766, |
| "grad_norm": 0.043931830674409866, |
| "learning_rate": 9.840385594331022e-06, |
| "loss": 0.0009523511864244938, |
| "memory(GiB)": 160.86, |
| "step": 660, |
| "token_acc": 0.9997474534893509, |
| "train_speed(iter/s)": 0.042366 |
| }, |
| { |
| "epoch": 0.243723657687374, |
| "grad_norm": 0.008748149499297142, |
| "learning_rate": 9.837971229920324e-06, |
| "loss": 0.0016139259561896325, |
| "memory(GiB)": 160.86, |
| "step": 665, |
| "token_acc": 0.9994108240047134, |
| "train_speed(iter/s)": 0.042378 |
| }, |
| { |
| "epoch": 0.24555616639179037, |
| "grad_norm": 0.12863993644714355, |
| "learning_rate": 9.83553904288498e-06, |
| "loss": 0.001357206143438816, |
| "memory(GiB)": 160.86, |
| "step": 670, |
| "token_acc": 0.9993265993265993, |
| "train_speed(iter/s)": 0.042358 |
| }, |
| { |
| "epoch": 0.2473886750962067, |
| "grad_norm": 0.08388248831033707, |
| "learning_rate": 9.833089042184933e-06, |
| "loss": 0.0016548488289117812, |
| "memory(GiB)": 160.86, |
| "step": 675, |
| "token_acc": 0.9994950345059754, |
| "train_speed(iter/s)": 0.042379 |
| }, |
| { |
| "epoch": 0.24922118380062305, |
| "grad_norm": 0.09960606694221497, |
| "learning_rate": 9.830621236845755e-06, |
| "loss": 0.0014729213900864125, |
| "memory(GiB)": 160.86, |
| "step": 680, |
| "token_acc": 0.9994103773584906, |
| "train_speed(iter/s)": 0.042403 |
| }, |
| { |
| "epoch": 0.2510536925050394, |
| "grad_norm": 0.07054334878921509, |
| "learning_rate": 9.828135635958602e-06, |
| "loss": 0.0012276002205908298, |
| "memory(GiB)": 160.86, |
| "step": 685, |
| "token_acc": 0.99949499200404, |
| "train_speed(iter/s)": 0.042425 |
| }, |
| { |
| "epoch": 0.25288620120945576, |
| "grad_norm": 0.011227499693632126, |
| "learning_rate": 9.825632248680195e-06, |
| "loss": 0.0014451307244598866, |
| "memory(GiB)": 160.86, |
| "step": 690, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.042448 |
| }, |
| { |
| "epoch": 0.2547187099138721, |
| "grad_norm": 0.09235574305057526, |
| "learning_rate": 9.82311108423277e-06, |
| "loss": 0.001263285707682371, |
| "memory(GiB)": 160.86, |
| "step": 695, |
| "token_acc": 0.9995789119083712, |
| "train_speed(iter/s)": 0.04247 |
| }, |
| { |
| "epoch": 0.25655121861828845, |
| "grad_norm": 0.045791253447532654, |
| "learning_rate": 9.82057215190406e-06, |
| "loss": 0.0009290166199207306, |
| "memory(GiB)": 160.86, |
| "step": 700, |
| "token_acc": 0.9998316498316498, |
| "train_speed(iter/s)": 0.042485 |
| }, |
| { |
| "epoch": 0.25838372732270476, |
| "grad_norm": 0.07074666768312454, |
| "learning_rate": 9.818015461047246e-06, |
| "loss": 0.0015341023914515971, |
| "memory(GiB)": 160.86, |
| "step": 705, |
| "token_acc": 0.99949499200404, |
| "train_speed(iter/s)": 0.042504 |
| }, |
| { |
| "epoch": 0.26021623602712113, |
| "grad_norm": 0.1540241241455078, |
| "learning_rate": 9.815441021080935e-06, |
| "loss": 0.0007845636457204819, |
| "memory(GiB)": 160.86, |
| "step": 710, |
| "token_acc": 0.9997473896934995, |
| "train_speed(iter/s)": 0.042523 |
| }, |
| { |
| "epoch": 0.2620487447315375, |
| "grad_norm": 0.033406198024749756, |
| "learning_rate": 9.812848841489118e-06, |
| "loss": 0.0012617891654372216, |
| "memory(GiB)": 160.86, |
| "step": 715, |
| "token_acc": 0.9994950345059754, |
| "train_speed(iter/s)": 0.042542 |
| }, |
| { |
| "epoch": 0.2638812534359538, |
| "grad_norm": 0.09797952324151993, |
| "learning_rate": 9.810238931821139e-06, |
| "loss": 0.0005904140882194043, |
| "memory(GiB)": 160.86, |
| "step": 720, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.042558 |
| }, |
| { |
| "epoch": 0.2657137621403702, |
| "grad_norm": 0.004131863825023174, |
| "learning_rate": 9.807611301691656e-06, |
| "loss": 0.0003168722614645958, |
| "memory(GiB)": 160.86, |
| "step": 725, |
| "token_acc": 0.9998316498316498, |
| "train_speed(iter/s)": 0.042576 |
| }, |
| { |
| "epoch": 0.2675462708447865, |
| "grad_norm": 0.0872625857591629, |
| "learning_rate": 9.804965960780603e-06, |
| "loss": 0.0018875803798437119, |
| "memory(GiB)": 160.86, |
| "step": 730, |
| "token_acc": 0.9993262023077571, |
| "train_speed(iter/s)": 0.042595 |
| }, |
| { |
| "epoch": 0.26937877954920286, |
| "grad_norm": 0.03825852647423744, |
| "learning_rate": 9.80230291883317e-06, |
| "loss": 0.0008161487989127636, |
| "memory(GiB)": 160.86, |
| "step": 735, |
| "token_acc": 0.9996634129922585, |
| "train_speed(iter/s)": 0.042614 |
| }, |
| { |
| "epoch": 0.27121128825361923, |
| "grad_norm": 0.09421674907207489, |
| "learning_rate": 9.799622185659748e-06, |
| "loss": 0.0013505241833627224, |
| "memory(GiB)": 160.86, |
| "step": 740, |
| "token_acc": 0.9995793016407236, |
| "train_speed(iter/s)": 0.0426 |
| }, |
| { |
| "epoch": 0.27304379695803555, |
| "grad_norm": 0.008914557285606861, |
| "learning_rate": 9.7969237711359e-06, |
| "loss": 0.0008496672846376896, |
| "memory(GiB)": 160.86, |
| "step": 745, |
| "token_acc": 0.9995792662403231, |
| "train_speed(iter/s)": 0.042614 |
| }, |
| { |
| "epoch": 0.2748763056624519, |
| "grad_norm": 0.05403187870979309, |
| "learning_rate": 9.79420768520233e-06, |
| "loss": 0.00033216315787285564, |
| "memory(GiB)": 160.86, |
| "step": 750, |
| "token_acc": 0.9998315221969506, |
| "train_speed(iter/s)": 0.042627 |
| }, |
| { |
| "epoch": 0.2767088143668682, |
| "grad_norm": 0.07824942469596863, |
| "learning_rate": 9.791473937864838e-06, |
| "loss": 0.0009146830998361111, |
| "memory(GiB)": 160.86, |
| "step": 755, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.042645 |
| }, |
| { |
| "epoch": 0.2785413230712846, |
| "grad_norm": 0.059788450598716736, |
| "learning_rate": 9.788722539194291e-06, |
| "loss": 0.0014368345960974692, |
| "memory(GiB)": 160.86, |
| "step": 760, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.042663 |
| }, |
| { |
| "epoch": 0.2803738317757009, |
| "grad_norm": 0.03711073473095894, |
| "learning_rate": 9.785953499326575e-06, |
| "loss": 0.0013325980864465237, |
| "memory(GiB)": 160.86, |
| "step": 765, |
| "token_acc": 0.9994953742640875, |
| "train_speed(iter/s)": 0.042681 |
| }, |
| { |
| "epoch": 0.2822063404801173, |
| "grad_norm": 0.024719931185245514, |
| "learning_rate": 9.783166828462573e-06, |
| "loss": 0.002364422380924225, |
| "memory(GiB)": 160.86, |
| "step": 770, |
| "token_acc": 0.9992422328870927, |
| "train_speed(iter/s)": 0.0427 |
| }, |
| { |
| "epoch": 0.28403884918453365, |
| "grad_norm": 0.03786981478333473, |
| "learning_rate": 9.780362536868113e-06, |
| "loss": 0.0009791357442736626, |
| "memory(GiB)": 160.86, |
| "step": 775, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.042719 |
| }, |
| { |
| "epoch": 0.28587135788894996, |
| "grad_norm": 0.1868947595357895, |
| "learning_rate": 9.777540634873939e-06, |
| "loss": 0.0009650942869484424, |
| "memory(GiB)": 160.86, |
| "step": 780, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.042737 |
| }, |
| { |
| "epoch": 0.28770386659336633, |
| "grad_norm": 0.015713131055235863, |
| "learning_rate": 9.774701132875665e-06, |
| "loss": 0.0007482931017875671, |
| "memory(GiB)": 160.86, |
| "step": 785, |
| "token_acc": 0.9997473045822103, |
| "train_speed(iter/s)": 0.042755 |
| }, |
| { |
| "epoch": 0.28953637529778264, |
| "grad_norm": 0.0045456611551344395, |
| "learning_rate": 9.771844041333751e-06, |
| "loss": 0.0009433764033019542, |
| "memory(GiB)": 160.86, |
| "step": 790, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.042771 |
| }, |
| { |
| "epoch": 0.291368884002199, |
| "grad_norm": 0.01577194780111313, |
| "learning_rate": 9.768969370773446e-06, |
| "loss": 0.0004402685910463333, |
| "memory(GiB)": 160.86, |
| "step": 795, |
| "token_acc": 0.9999158390843292, |
| "train_speed(iter/s)": 0.042788 |
| }, |
| { |
| "epoch": 0.2932013927066154, |
| "grad_norm": 0.022222327068448067, |
| "learning_rate": 9.766077131784764e-06, |
| "loss": 0.0012076054699718952, |
| "memory(GiB)": 160.86, |
| "step": 800, |
| "token_acc": 0.999663129526697, |
| "train_speed(iter/s)": 0.042807 |
| }, |
| { |
| "epoch": 0.2950339014110317, |
| "grad_norm": 0.1063130721449852, |
| "learning_rate": 9.763167335022437e-06, |
| "loss": 0.0008463741280138493, |
| "memory(GiB)": 160.86, |
| "step": 805, |
| "token_acc": 0.9997475597441938, |
| "train_speed(iter/s)": 0.042824 |
| }, |
| { |
| "epoch": 0.29686641011544807, |
| "grad_norm": 0.018112968653440475, |
| "learning_rate": 9.760239991205878e-06, |
| "loss": 0.0014921230264008044, |
| "memory(GiB)": 160.86, |
| "step": 810, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.042841 |
| }, |
| { |
| "epoch": 0.2986989188198644, |
| "grad_norm": 0.17134827375411987, |
| "learning_rate": 9.757295111119142e-06, |
| "loss": 0.0017302492633461952, |
| "memory(GiB)": 160.86, |
| "step": 815, |
| "token_acc": 0.9994105263157895, |
| "train_speed(iter/s)": 0.042859 |
| }, |
| { |
| "epoch": 0.30053142752428075, |
| "grad_norm": 0.1881178468465805, |
| "learning_rate": 9.75433270561089e-06, |
| "loss": 0.0018818458542227746, |
| "memory(GiB)": 160.86, |
| "step": 820, |
| "token_acc": 0.9994947368421052, |
| "train_speed(iter/s)": 0.042876 |
| }, |
| { |
| "epoch": 0.30236393622869706, |
| "grad_norm": 0.0701608955860138, |
| "learning_rate": 9.751352785594337e-06, |
| "loss": 0.0015649979934096337, |
| "memory(GiB)": 160.86, |
| "step": 825, |
| "token_acc": 0.9994106255788499, |
| "train_speed(iter/s)": 0.042892 |
| }, |
| { |
| "epoch": 0.30419644493311343, |
| "grad_norm": 0.11719143390655518, |
| "learning_rate": 9.748355362047228e-06, |
| "loss": 0.0022079024463891985, |
| "memory(GiB)": 160.86, |
| "step": 830, |
| "token_acc": 0.9993266560053867, |
| "train_speed(iter/s)": 0.042901 |
| }, |
| { |
| "epoch": 0.3060289536375298, |
| "grad_norm": 0.052010610699653625, |
| "learning_rate": 9.745340446011782e-06, |
| "loss": 0.0014782694168388843, |
| "memory(GiB)": 160.86, |
| "step": 835, |
| "token_acc": 0.9994952044422009, |
| "train_speed(iter/s)": 0.042913 |
| }, |
| { |
| "epoch": 0.3078614623419461, |
| "grad_norm": 0.04955873638391495, |
| "learning_rate": 9.742308048594665e-06, |
| "loss": 0.0016095375642180443, |
| "memory(GiB)": 160.86, |
| "step": 840, |
| "token_acc": 0.9994949069787019, |
| "train_speed(iter/s)": 0.04293 |
| }, |
| { |
| "epoch": 0.3096939710463625, |
| "grad_norm": 0.03515881672501564, |
| "learning_rate": 9.73925818096694e-06, |
| "loss": 0.0010076938197016716, |
| "memory(GiB)": 160.86, |
| "step": 845, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.042945 |
| }, |
| { |
| "epoch": 0.3115264797507788, |
| "grad_norm": 0.05620809271931648, |
| "learning_rate": 9.736190854364025e-06, |
| "loss": 0.0021063588559627534, |
| "memory(GiB)": 160.86, |
| "step": 850, |
| "token_acc": 0.999326485940394, |
| "train_speed(iter/s)": 0.042961 |
| }, |
| { |
| "epoch": 0.31335898845519516, |
| "grad_norm": 0.03683305159211159, |
| "learning_rate": 9.733106080085662e-06, |
| "loss": 0.0005148151423782111, |
| "memory(GiB)": 160.86, |
| "step": 855, |
| "token_acc": 0.9997475385003787, |
| "train_speed(iter/s)": 0.042977 |
| }, |
| { |
| "epoch": 0.31519149715961153, |
| "grad_norm": 0.029852213338017464, |
| "learning_rate": 9.730003869495863e-06, |
| "loss": 0.0004310948308557272, |
| "memory(GiB)": 160.86, |
| "step": 860, |
| "token_acc": 0.9998316073082428, |
| "train_speed(iter/s)": 0.042993 |
| }, |
| { |
| "epoch": 0.31702400586402785, |
| "grad_norm": 0.0037861524615436792, |
| "learning_rate": 9.726884234022877e-06, |
| "loss": 0.0005989938508719206, |
| "memory(GiB)": 160.86, |
| "step": 865, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.042978 |
| }, |
| { |
| "epoch": 0.3188565145684442, |
| "grad_norm": 0.04197857156395912, |
| "learning_rate": 9.723747185159146e-06, |
| "loss": 0.0018272759392857552, |
| "memory(GiB)": 160.86, |
| "step": 870, |
| "token_acc": 0.9996634413125789, |
| "train_speed(iter/s)": 0.042973 |
| }, |
| { |
| "epoch": 0.32068902327286053, |
| "grad_norm": 0.04336322471499443, |
| "learning_rate": 9.720592734461257e-06, |
| "loss": 0.0018274670466780663, |
| "memory(GiB)": 160.86, |
| "step": 875, |
| "token_acc": 0.999578947368421, |
| "train_speed(iter/s)": 0.042986 |
| }, |
| { |
| "epoch": 0.3225215319772769, |
| "grad_norm": 0.007882770150899887, |
| "learning_rate": 9.717420893549902e-06, |
| "loss": 0.0010360433720052243, |
| "memory(GiB)": 160.86, |
| "step": 880, |
| "token_acc": 0.9994951619688683, |
| "train_speed(iter/s)": 0.042996 |
| }, |
| { |
| "epoch": 0.3243540406816932, |
| "grad_norm": 0.03858296945691109, |
| "learning_rate": 9.714231674109845e-06, |
| "loss": 0.0016417885199189186, |
| "memory(GiB)": 160.86, |
| "step": 885, |
| "token_acc": 0.9991580365412142, |
| "train_speed(iter/s)": 0.042993 |
| }, |
| { |
| "epoch": 0.3261865493861096, |
| "grad_norm": 0.016526591032743454, |
| "learning_rate": 9.711025087889866e-06, |
| "loss": 0.0008385243825614452, |
| "memory(GiB)": 160.86, |
| "step": 890, |
| "token_acc": 0.9999158461667929, |
| "train_speed(iter/s)": 0.042974 |
| }, |
| { |
| "epoch": 0.32801905809052595, |
| "grad_norm": 0.011745758354663849, |
| "learning_rate": 9.70780114670272e-06, |
| "loss": 0.0007513574324548245, |
| "memory(GiB)": 160.86, |
| "step": 895, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.042982 |
| }, |
| { |
| "epoch": 0.32985156679494226, |
| "grad_norm": 0.032515864819288254, |
| "learning_rate": 9.704559862425101e-06, |
| "loss": 0.000879857875406742, |
| "memory(GiB)": 160.86, |
| "step": 900, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.042993 |
| }, |
| { |
| "epoch": 0.33168407549935863, |
| "grad_norm": 0.11071360856294632, |
| "learning_rate": 9.701301246997592e-06, |
| "loss": 0.0013037783093750477, |
| "memory(GiB)": 160.86, |
| "step": 905, |
| "token_acc": 0.9994106751978448, |
| "train_speed(iter/s)": 0.043 |
| }, |
| { |
| "epoch": 0.33351658420377495, |
| "grad_norm": 0.03765702247619629, |
| "learning_rate": 9.698025312424619e-06, |
| "loss": 0.0015159587375819684, |
| "memory(GiB)": 160.86, |
| "step": 910, |
| "token_acc": 0.999579443182774, |
| "train_speed(iter/s)": 0.043008 |
| }, |
| { |
| "epoch": 0.3353490929081913, |
| "grad_norm": 0.008713570423424244, |
| "learning_rate": 9.694732070774415e-06, |
| "loss": 0.00026825035456568, |
| "memory(GiB)": 160.86, |
| "step": 915, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.04298 |
| }, |
| { |
| "epoch": 0.3371816016126077, |
| "grad_norm": 0.07823354005813599, |
| "learning_rate": 9.691421534178966e-06, |
| "loss": 0.001245938241481781, |
| "memory(GiB)": 160.86, |
| "step": 920, |
| "token_acc": 0.9994108240047134, |
| "train_speed(iter/s)": 0.042955 |
| }, |
| { |
| "epoch": 0.339014110317024, |
| "grad_norm": 0.04400285705924034, |
| "learning_rate": 9.688093714833975e-06, |
| "loss": 0.000505279190838337, |
| "memory(GiB)": 160.86, |
| "step": 925, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.042963 |
| }, |
| { |
| "epoch": 0.34084661902144037, |
| "grad_norm": 0.05997716262936592, |
| "learning_rate": 9.68474862499881e-06, |
| "loss": 0.001019585132598877, |
| "memory(GiB)": 160.86, |
| "step": 930, |
| "token_acc": 0.9996631578947368, |
| "train_speed(iter/s)": 0.042968 |
| }, |
| { |
| "epoch": 0.3426791277258567, |
| "grad_norm": 0.17811425030231476, |
| "learning_rate": 9.681386276996462e-06, |
| "loss": 0.0005352488718926906, |
| "memory(GiB)": 160.86, |
| "step": 935, |
| "token_acc": 0.999831734814067, |
| "train_speed(iter/s)": 0.042975 |
| }, |
| { |
| "epoch": 0.34451163643027305, |
| "grad_norm": 0.2344316691160202, |
| "learning_rate": 9.678006683213503e-06, |
| "loss": 0.0009379078634083271, |
| "memory(GiB)": 160.86, |
| "step": 940, |
| "token_acc": 0.9997475385003787, |
| "train_speed(iter/s)": 0.04297 |
| }, |
| { |
| "epoch": 0.34634414513468936, |
| "grad_norm": 0.06496769934892654, |
| "learning_rate": 9.674609856100032e-06, |
| "loss": 0.0008637402206659317, |
| "memory(GiB)": 160.86, |
| "step": 945, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.04298 |
| }, |
| { |
| "epoch": 0.34817665383910573, |
| "grad_norm": 0.0862952470779419, |
| "learning_rate": 9.671195808169639e-06, |
| "loss": 0.0011458213441073895, |
| "memory(GiB)": 160.86, |
| "step": 950, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.042994 |
| }, |
| { |
| "epoch": 0.3500091625435221, |
| "grad_norm": 0.016611328348517418, |
| "learning_rate": 9.667764551999346e-06, |
| "loss": 0.0010181719437241555, |
| "memory(GiB)": 160.86, |
| "step": 955, |
| "token_acc": 0.999663242970197, |
| "train_speed(iter/s)": 0.04297 |
| }, |
| { |
| "epoch": 0.3518416712479384, |
| "grad_norm": 0.08347468078136444, |
| "learning_rate": 9.664316100229578e-06, |
| "loss": 0.0007937697693705559, |
| "memory(GiB)": 160.86, |
| "step": 960, |
| "token_acc": 0.9995793724236561, |
| "train_speed(iter/s)": 0.042953 |
| }, |
| { |
| "epoch": 0.3536741799523548, |
| "grad_norm": 0.07462402433156967, |
| "learning_rate": 9.660850465564101e-06, |
| "loss": 0.0014566186815500259, |
| "memory(GiB)": 160.86, |
| "step": 965, |
| "token_acc": 0.9995790537127462, |
| "train_speed(iter/s)": 0.042967 |
| }, |
| { |
| "epoch": 0.3555066886567711, |
| "grad_norm": 0.031168634071946144, |
| "learning_rate": 9.657367660769984e-06, |
| "loss": 0.0008765817619860172, |
| "memory(GiB)": 160.86, |
| "step": 970, |
| "token_acc": 0.9996635828427249, |
| "train_speed(iter/s)": 0.04298 |
| }, |
| { |
| "epoch": 0.35733919736118747, |
| "grad_norm": 0.10647280514240265, |
| "learning_rate": 9.653867698677543e-06, |
| "loss": 0.0011190660297870636, |
| "memory(GiB)": 160.86, |
| "step": 975, |
| "token_acc": 0.9996634979389248, |
| "train_speed(iter/s)": 0.042993 |
| }, |
| { |
| "epoch": 0.35917170606560384, |
| "grad_norm": 0.041436877101659775, |
| "learning_rate": 9.650350592180312e-06, |
| "loss": 0.0012339851818978786, |
| "memory(GiB)": 160.86, |
| "step": 980, |
| "token_acc": 0.9994108735903047, |
| "train_speed(iter/s)": 0.043006 |
| }, |
| { |
| "epoch": 0.36100421477002015, |
| "grad_norm": 0.056029047816991806, |
| "learning_rate": 9.646816354234968e-06, |
| "loss": 0.0012508154846727847, |
| "memory(GiB)": 160.86, |
| "step": 985, |
| "token_acc": 0.9996634129922585, |
| "train_speed(iter/s)": 0.043019 |
| }, |
| { |
| "epoch": 0.3628367234744365, |
| "grad_norm": 0.016829386353492737, |
| "learning_rate": 9.643264997861312e-06, |
| "loss": 0.0006543456576764584, |
| "memory(GiB)": 160.86, |
| "step": 990, |
| "token_acc": 0.9995789119083712, |
| "train_speed(iter/s)": 0.043032 |
| }, |
| { |
| "epoch": 0.36466923217885283, |
| "grad_norm": 0.035343799740076065, |
| "learning_rate": 9.6396965361422e-06, |
| "loss": 0.0010605846531689168, |
| "memory(GiB)": 160.86, |
| "step": 995, |
| "token_acc": 0.9996632146164857, |
| "train_speed(iter/s)": 0.043044 |
| }, |
| { |
| "epoch": 0.3665017408832692, |
| "grad_norm": 0.1007576435804367, |
| "learning_rate": 9.636110982223505e-06, |
| "loss": 0.0017275510355830193, |
| "memory(GiB)": 160.86, |
| "step": 1000, |
| "token_acc": 0.9993263157894737, |
| "train_speed(iter/s)": 0.043052 |
| }, |
| { |
| "epoch": 0.3665017408832692, |
| "eval_loss": 0.0009223763481713831, |
| "eval_runtime": 173.3991, |
| "eval_samples_per_second": 2.537, |
| "eval_steps_per_second": 2.537, |
| "eval_token_acc": 0.9996633151217422, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3683342495876855, |
| "grad_norm": 0.005491136573255062, |
| "learning_rate": 9.632508349314066e-06, |
| "loss": 0.0003129460848867893, |
| "memory(GiB)": 160.86, |
| "step": 1005, |
| "token_acc": 0.9997021844125912, |
| "train_speed(iter/s)": 0.041292 |
| }, |
| { |
| "epoch": 0.3701667582921019, |
| "grad_norm": 0.052943065762519836, |
| "learning_rate": 9.628888650685642e-06, |
| "loss": 0.0011203167960047722, |
| "memory(GiB)": 160.86, |
| "step": 1010, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.041243 |
| }, |
| { |
| "epoch": 0.37199926699651825, |
| "grad_norm": 0.03638750687241554, |
| "learning_rate": 9.625251899672852e-06, |
| "loss": 0.0004535942804068327, |
| "memory(GiB)": 160.86, |
| "step": 1015, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.041263 |
| }, |
| { |
| "epoch": 0.37383177570093457, |
| "grad_norm": 0.010707657784223557, |
| "learning_rate": 9.621598109673142e-06, |
| "loss": 0.00024845553562045095, |
| "memory(GiB)": 160.86, |
| "step": 1020, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.041283 |
| }, |
| { |
| "epoch": 0.37566428440535093, |
| "grad_norm": 0.003029848216101527, |
| "learning_rate": 9.617927294146726e-06, |
| "loss": 0.000255924928933382, |
| "memory(GiB)": 160.86, |
| "step": 1025, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.041303 |
| }, |
| { |
| "epoch": 0.37749679310976725, |
| "grad_norm": 0.002139889169484377, |
| "learning_rate": 9.614239466616541e-06, |
| "loss": 0.001936671696603298, |
| "memory(GiB)": 160.86, |
| "step": 1030, |
| "token_acc": 0.9996633846671716, |
| "train_speed(iter/s)": 0.041322 |
| }, |
| { |
| "epoch": 0.3793293018141836, |
| "grad_norm": 0.033104073256254196, |
| "learning_rate": 9.61053464066819e-06, |
| "loss": 0.0009706121869385243, |
| "memory(GiB)": 160.86, |
| "step": 1035, |
| "token_acc": 0.9996632146164857, |
| "train_speed(iter/s)": 0.041341 |
| }, |
| { |
| "epoch": 0.3811618105186, |
| "grad_norm": 0.02874094434082508, |
| "learning_rate": 9.606812829949896e-06, |
| "loss": 0.0007171142846345901, |
| "memory(GiB)": 160.86, |
| "step": 1040, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.041359 |
| }, |
| { |
| "epoch": 0.3829943192230163, |
| "grad_norm": 0.13675667345523834, |
| "learning_rate": 9.603074048172458e-06, |
| "loss": 0.0008686968125402927, |
| "memory(GiB)": 160.86, |
| "step": 1045, |
| "token_acc": 0.9998317489694625, |
| "train_speed(iter/s)": 0.041378 |
| }, |
| { |
| "epoch": 0.38482682792743267, |
| "grad_norm": 0.325898677110672, |
| "learning_rate": 9.599318309109191e-06, |
| "loss": 0.001396147720515728, |
| "memory(GiB)": 160.86, |
| "step": 1050, |
| "token_acc": 0.9995791245791246, |
| "train_speed(iter/s)": 0.041398 |
| }, |
| { |
| "epoch": 0.386659336631849, |
| "grad_norm": 0.06272176653146744, |
| "learning_rate": 9.595545626595878e-06, |
| "loss": 0.002794544957578182, |
| "memory(GiB)": 160.86, |
| "step": 1055, |
| "token_acc": 0.9992422966829433, |
| "train_speed(iter/s)": 0.041416 |
| }, |
| { |
| "epoch": 0.38849184533626535, |
| "grad_norm": 0.019762301817536354, |
| "learning_rate": 9.591756014530723e-06, |
| "loss": 0.0009371510706841946, |
| "memory(GiB)": 160.86, |
| "step": 1060, |
| "token_acc": 0.9996630727762803, |
| "train_speed(iter/s)": 0.041434 |
| }, |
| { |
| "epoch": 0.3903243540406817, |
| "grad_norm": 0.09259835630655289, |
| "learning_rate": 9.587949486874295e-06, |
| "loss": 0.0013479561544954776, |
| "memory(GiB)": 160.86, |
| "step": 1065, |
| "token_acc": 0.9995791245791246, |
| "train_speed(iter/s)": 0.041453 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 0.05826210230588913, |
| "learning_rate": 9.58412605764948e-06, |
| "loss": 0.00075059924274683, |
| "memory(GiB)": 160.86, |
| "step": 1070, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.041452 |
| }, |
| { |
| "epoch": 0.3939893714495144, |
| "grad_norm": 0.02435746043920517, |
| "learning_rate": 9.580285740941425e-06, |
| "loss": 0.0010668656788766385, |
| "memory(GiB)": 160.86, |
| "step": 1075, |
| "token_acc": 0.9994948644552955, |
| "train_speed(iter/s)": 0.04147 |
| }, |
| { |
| "epoch": 0.3958218801539307, |
| "grad_norm": 0.06046979874372482, |
| "learning_rate": 9.57642855089749e-06, |
| "loss": 0.0006216964218765497, |
| "memory(GiB)": 160.86, |
| "step": 1080, |
| "token_acc": 0.9995790891489182, |
| "train_speed(iter/s)": 0.041489 |
| }, |
| { |
| "epoch": 0.3976543888583471, |
| "grad_norm": 0.02380959317088127, |
| "learning_rate": 9.572554501727198e-06, |
| "loss": 0.000693302508443594, |
| "memory(GiB)": 160.86, |
| "step": 1085, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.041506 |
| }, |
| { |
| "epoch": 0.3994868975627634, |
| "grad_norm": 0.015010896138846874, |
| "learning_rate": 9.568663607702174e-06, |
| "loss": 0.0005827041808515787, |
| "memory(GiB)": 160.86, |
| "step": 1090, |
| "token_acc": 0.9997476446837147, |
| "train_speed(iter/s)": 0.041523 |
| }, |
| { |
| "epoch": 0.40131940626717977, |
| "grad_norm": 0.17055855691432953, |
| "learning_rate": 9.564755883156103e-06, |
| "loss": 0.0010279595851898193, |
| "memory(GiB)": 160.86, |
| "step": 1095, |
| "token_acc": 0.9995791600033668, |
| "train_speed(iter/s)": 0.041535 |
| }, |
| { |
| "epoch": 0.40315191497159614, |
| "grad_norm": 0.0005144431488588452, |
| "learning_rate": 9.560831342484668e-06, |
| "loss": 0.00026263915933668616, |
| "memory(GiB)": 160.86, |
| "step": 1100, |
| "token_acc": 0.9999158249158249, |
| "train_speed(iter/s)": 0.041545 |
| }, |
| { |
| "epoch": 0.40498442367601245, |
| "grad_norm": 0.019269630312919617, |
| "learning_rate": 9.556890000145503e-06, |
| "loss": 0.0010970150120556354, |
| "memory(GiB)": 160.86, |
| "step": 1105, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.041546 |
| }, |
| { |
| "epoch": 0.4068169323804288, |
| "grad_norm": 0.037301257252693176, |
| "learning_rate": 9.552931870658136e-06, |
| "loss": 0.001028469391167164, |
| "memory(GiB)": 160.86, |
| "step": 1110, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.041562 |
| }, |
| { |
| "epoch": 0.40864944108484513, |
| "grad_norm": 0.006164327263832092, |
| "learning_rate": 9.54895696860394e-06, |
| "loss": 0.0005135733168572188, |
| "memory(GiB)": 160.86, |
| "step": 1115, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.041578 |
| }, |
| { |
| "epoch": 0.4104819497892615, |
| "grad_norm": 0.1576082557439804, |
| "learning_rate": 9.544965308626075e-06, |
| "loss": 0.001076418813318014, |
| "memory(GiB)": 160.86, |
| "step": 1120, |
| "token_acc": 0.9996634413125789, |
| "train_speed(iter/s)": 0.041593 |
| }, |
| { |
| "epoch": 0.41231445849367787, |
| "grad_norm": 0.014838850125670433, |
| "learning_rate": 9.540956905429435e-06, |
| "loss": 0.000989390444010496, |
| "memory(GiB)": 160.86, |
| "step": 1125, |
| "token_acc": 0.9994946091644205, |
| "train_speed(iter/s)": 0.041608 |
| }, |
| { |
| "epoch": 0.4141469671980942, |
| "grad_norm": 0.014855766668915749, |
| "learning_rate": 9.536931773780598e-06, |
| "loss": 0.0015475031919777392, |
| "memory(GiB)": 160.86, |
| "step": 1130, |
| "token_acc": 0.9994103276893269, |
| "train_speed(iter/s)": 0.041623 |
| }, |
| { |
| "epoch": 0.41597947590251055, |
| "grad_norm": 0.019349105656147003, |
| "learning_rate": 9.53288992850776e-06, |
| "loss": 0.0005111652426421642, |
| "memory(GiB)": 160.86, |
| "step": 1135, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.041628 |
| }, |
| { |
| "epoch": 0.41781198460692687, |
| "grad_norm": 0.03461524471640587, |
| "learning_rate": 9.528831384500699e-06, |
| "loss": 0.0004519184119999409, |
| "memory(GiB)": 160.86, |
| "step": 1140, |
| "token_acc": 0.9999158036541214, |
| "train_speed(iter/s)": 0.041643 |
| }, |
| { |
| "epoch": 0.41964449331134324, |
| "grad_norm": 0.15801462531089783, |
| "learning_rate": 9.5247561567107e-06, |
| "loss": 0.00042958445847034453, |
| "memory(GiB)": 160.86, |
| "step": 1145, |
| "token_acc": 0.9997474322276477, |
| "train_speed(iter/s)": 0.041646 |
| }, |
| { |
| "epoch": 0.42147700201575955, |
| "grad_norm": 0.04607151448726654, |
| "learning_rate": 9.520664260150513e-06, |
| "loss": 0.0018787598237395287, |
| "memory(GiB)": 160.86, |
| "step": 1150, |
| "token_acc": 0.9995792662403231, |
| "train_speed(iter/s)": 0.04166 |
| }, |
| { |
| "epoch": 0.4233095107201759, |
| "grad_norm": 0.0973573699593544, |
| "learning_rate": 9.5165557098943e-06, |
| "loss": 0.0009789202362298966, |
| "memory(GiB)": 160.86, |
| "step": 1155, |
| "token_acc": 0.9997473684210526, |
| "train_speed(iter/s)": 0.041675 |
| }, |
| { |
| "epoch": 0.4251420194245923, |
| "grad_norm": 0.038962222635746, |
| "learning_rate": 9.512430521077565e-06, |
| "loss": 0.0009090069681406022, |
| "memory(GiB)": 160.86, |
| "step": 1160, |
| "token_acc": 0.9997473896934995, |
| "train_speed(iter/s)": 0.041686 |
| }, |
| { |
| "epoch": 0.4269745281290086, |
| "grad_norm": 0.010646538808941841, |
| "learning_rate": 9.508288708897109e-06, |
| "loss": 0.00033488136250525713, |
| "memory(GiB)": 160.86, |
| "step": 1165, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.041701 |
| }, |
| { |
| "epoch": 0.42880703683342497, |
| "grad_norm": 0.0063909804448485374, |
| "learning_rate": 9.504130288610972e-06, |
| "loss": 0.0002777322195470333, |
| "memory(GiB)": 160.86, |
| "step": 1170, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.041717 |
| }, |
| { |
| "epoch": 0.4306395455378413, |
| "grad_norm": 0.0029652463272213936, |
| "learning_rate": 9.499955275538384e-06, |
| "loss": 0.0006769481580704451, |
| "memory(GiB)": 160.86, |
| "step": 1175, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.041732 |
| }, |
| { |
| "epoch": 0.43247205424225765, |
| "grad_norm": 0.03148781880736351, |
| "learning_rate": 9.495763685059689e-06, |
| "loss": 0.0021369663998484613, |
| "memory(GiB)": 160.86, |
| "step": 1180, |
| "token_acc": 0.9996631862579993, |
| "train_speed(iter/s)": 0.041747 |
| }, |
| { |
| "epoch": 0.434304562946674, |
| "grad_norm": 0.0476820208132267, |
| "learning_rate": 9.49155553261631e-06, |
| "loss": 0.0006943107582628727, |
| "memory(GiB)": 160.86, |
| "step": 1185, |
| "token_acc": 0.9999157610984752, |
| "train_speed(iter/s)": 0.041763 |
| }, |
| { |
| "epoch": 0.43613707165109034, |
| "grad_norm": 0.006549006327986717, |
| "learning_rate": 9.487330833710678e-06, |
| "loss": 0.00024927293416112664, |
| "memory(GiB)": 160.86, |
| "step": 1190, |
| "token_acc": 0.9999158532480646, |
| "train_speed(iter/s)": 0.041649 |
| }, |
| { |
| "epoch": 0.4379695803555067, |
| "grad_norm": 0.030179157853126526, |
| "learning_rate": 9.48308960390618e-06, |
| "loss": 0.0010321117006242275, |
| "memory(GiB)": 160.86, |
| "step": 1195, |
| "token_acc": 0.9997475597441938, |
| "train_speed(iter/s)": 0.041664 |
| }, |
| { |
| "epoch": 0.439802089059923, |
| "grad_norm": 0.0033925846219062805, |
| "learning_rate": 9.478831858827105e-06, |
| "loss": 0.00027046091854572297, |
| "memory(GiB)": 160.86, |
| "step": 1200, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.041679 |
| }, |
| { |
| "epoch": 0.4416345977643394, |
| "grad_norm": 0.07267381250858307, |
| "learning_rate": 9.474557614158575e-06, |
| "loss": 0.0008655142039060593, |
| "memory(GiB)": 160.86, |
| "step": 1205, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.041695 |
| }, |
| { |
| "epoch": 0.4434671064687557, |
| "grad_norm": 0.006699859630316496, |
| "learning_rate": 9.470266885646504e-06, |
| "loss": 0.0006839127279818058, |
| "memory(GiB)": 160.86, |
| "step": 1210, |
| "token_acc": 0.9998316498316498, |
| "train_speed(iter/s)": 0.04171 |
| }, |
| { |
| "epoch": 0.44529961517317207, |
| "grad_norm": 0.01745425909757614, |
| "learning_rate": 9.465959689097525e-06, |
| "loss": 0.0009552924893796444, |
| "memory(GiB)": 160.86, |
| "step": 1215, |
| "token_acc": 0.9997473896934995, |
| "train_speed(iter/s)": 0.041723 |
| }, |
| { |
| "epoch": 0.44713212387758844, |
| "grad_norm": 0.018873147666454315, |
| "learning_rate": 9.461636040378941e-06, |
| "loss": 0.0004271782469004393, |
| "memory(GiB)": 160.86, |
| "step": 1220, |
| "token_acc": 0.9998315789473684, |
| "train_speed(iter/s)": 0.04171 |
| }, |
| { |
| "epoch": 0.44896463258200475, |
| "grad_norm": 0.030013209208846092, |
| "learning_rate": 9.45729595541866e-06, |
| "loss": 0.0011812681332230568, |
| "memory(GiB)": 160.86, |
| "step": 1225, |
| "token_acc": 0.9996633280026934, |
| "train_speed(iter/s)": 0.041724 |
| }, |
| { |
| "epoch": 0.4507971412864211, |
| "grad_norm": 0.0008936990634538233, |
| "learning_rate": 9.452939450205139e-06, |
| "loss": 0.0004920902196317911, |
| "memory(GiB)": 160.86, |
| "step": 1230, |
| "token_acc": 0.9996634129922585, |
| "train_speed(iter/s)": 0.041738 |
| }, |
| { |
| "epoch": 0.45262964999083743, |
| "grad_norm": 0.06023690477013588, |
| "learning_rate": 9.448566540787331e-06, |
| "loss": 0.0010696605779230595, |
| "memory(GiB)": 160.86, |
| "step": 1235, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.041753 |
| }, |
| { |
| "epoch": 0.4544621586952538, |
| "grad_norm": 0.05453835055232048, |
| "learning_rate": 9.444177243274619e-06, |
| "loss": 0.0011446685530245304, |
| "memory(GiB)": 160.86, |
| "step": 1240, |
| "token_acc": 0.9994107248084856, |
| "train_speed(iter/s)": 0.041767 |
| }, |
| { |
| "epoch": 0.4562946673996702, |
| "grad_norm": 0.06793410331010818, |
| "learning_rate": 9.43977157383675e-06, |
| "loss": 0.0017616702243685722, |
| "memory(GiB)": 160.86, |
| "step": 1245, |
| "token_acc": 0.9994109231675503, |
| "train_speed(iter/s)": 0.04178 |
| }, |
| { |
| "epoch": 0.4581271761040865, |
| "grad_norm": 0.03625203296542168, |
| "learning_rate": 9.435349548703796e-06, |
| "loss": 0.000555843859910965, |
| "memory(GiB)": 160.86, |
| "step": 1250, |
| "token_acc": 0.9998317489694625, |
| "train_speed(iter/s)": 0.041794 |
| }, |
| { |
| "epoch": 0.45995968480850286, |
| "grad_norm": 0.08264432102441788, |
| "learning_rate": 9.430911184166074e-06, |
| "loss": 0.0007446614094078541, |
| "memory(GiB)": 160.86, |
| "step": 1255, |
| "token_acc": 0.9996634696281339, |
| "train_speed(iter/s)": 0.041808 |
| }, |
| { |
| "epoch": 0.46179219351291917, |
| "grad_norm": 0.03210179880261421, |
| "learning_rate": 9.426456496574095e-06, |
| "loss": 0.0009373857639729977, |
| "memory(GiB)": 160.86, |
| "step": 1260, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.041821 |
| }, |
| { |
| "epoch": 0.46362470221733554, |
| "grad_norm": 0.047844789922237396, |
| "learning_rate": 9.421985502338505e-06, |
| "loss": 0.0005674117710441351, |
| "memory(GiB)": 160.86, |
| "step": 1265, |
| "token_acc": 0.9997473258654089, |
| "train_speed(iter/s)": 0.041818 |
| }, |
| { |
| "epoch": 0.46545721092175185, |
| "grad_norm": 0.10654474049806595, |
| "learning_rate": 9.417498217930017e-06, |
| "loss": 0.0010964240878820418, |
| "memory(GiB)": 160.86, |
| "step": 1270, |
| "token_acc": 0.9994948644552955, |
| "train_speed(iter/s)": 0.04183 |
| }, |
| { |
| "epoch": 0.4672897196261682, |
| "grad_norm": 0.09114305675029755, |
| "learning_rate": 9.412994659879362e-06, |
| "loss": 0.0010675345547497272, |
| "memory(GiB)": 160.86, |
| "step": 1275, |
| "token_acc": 0.9997476022211005, |
| "train_speed(iter/s)": 0.041843 |
| }, |
| { |
| "epoch": 0.4691222283305846, |
| "grad_norm": 0.01834912970662117, |
| "learning_rate": 9.408474844777218e-06, |
| "loss": 0.0008592868223786354, |
| "memory(GiB)": 160.86, |
| "step": 1280, |
| "token_acc": 0.9996632146164857, |
| "train_speed(iter/s)": 0.041856 |
| }, |
| { |
| "epoch": 0.4709547370350009, |
| "grad_norm": 0.057866550981998444, |
| "learning_rate": 9.403938789274152e-06, |
| "loss": 0.0005749462172389031, |
| "memory(GiB)": 160.86, |
| "step": 1285, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.041858 |
| }, |
| { |
| "epoch": 0.4727872457394173, |
| "grad_norm": 0.06462471187114716, |
| "learning_rate": 9.39938651008056e-06, |
| "loss": 0.00032207604963332417, |
| "memory(GiB)": 160.86, |
| "step": 1290, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.041871 |
| }, |
| { |
| "epoch": 0.4746197544438336, |
| "grad_norm": 0.13423164188861847, |
| "learning_rate": 9.394818023966604e-06, |
| "loss": 0.0010271795094013215, |
| "memory(GiB)": 160.86, |
| "step": 1295, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.041884 |
| }, |
| { |
| "epoch": 0.47645226314824995, |
| "grad_norm": 0.08763778209686279, |
| "learning_rate": 9.39023334776215e-06, |
| "loss": 0.0028607085347175597, |
| "memory(GiB)": 160.86, |
| "step": 1300, |
| "token_acc": 0.9993261455525606, |
| "train_speed(iter/s)": 0.041897 |
| }, |
| { |
| "epoch": 0.4782847718526663, |
| "grad_norm": 0.002933151787146926, |
| "learning_rate": 9.385632498356713e-06, |
| "loss": 0.00027030634228140114, |
| "memory(GiB)": 160.86, |
| "step": 1305, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.041909 |
| }, |
| { |
| "epoch": 0.48011728055708264, |
| "grad_norm": 0.04423481225967407, |
| "learning_rate": 9.381015492699379e-06, |
| "loss": 0.00081101693212986, |
| "memory(GiB)": 160.86, |
| "step": 1310, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.041916 |
| }, |
| { |
| "epoch": 0.481949789261499, |
| "grad_norm": 0.02344198152422905, |
| "learning_rate": 9.376382347798756e-06, |
| "loss": 0.0003832927206531167, |
| "memory(GiB)": 160.86, |
| "step": 1315, |
| "token_acc": 0.9998316073082428, |
| "train_speed(iter/s)": 0.041926 |
| }, |
| { |
| "epoch": 0.4837822979659153, |
| "grad_norm": 0.016795309260487556, |
| "learning_rate": 9.371733080722911e-06, |
| "loss": 0.00048357550986111164, |
| "memory(GiB)": 160.86, |
| "step": 1320, |
| "token_acc": 0.9998315789473684, |
| "train_speed(iter/s)": 0.041939 |
| }, |
| { |
| "epoch": 0.4856148066703317, |
| "grad_norm": 0.09421277046203613, |
| "learning_rate": 9.3670677085993e-06, |
| "loss": 0.0011711867526173591, |
| "memory(GiB)": 160.86, |
| "step": 1325, |
| "token_acc": 0.9997474322276477, |
| "train_speed(iter/s)": 0.04195 |
| }, |
| { |
| "epoch": 0.487447315374748, |
| "grad_norm": 0.18248307704925537, |
| "learning_rate": 9.362386248614706e-06, |
| "loss": 0.0005028956104069949, |
| "memory(GiB)": 160.86, |
| "step": 1330, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.041963 |
| }, |
| { |
| "epoch": 0.48927982407916437, |
| "grad_norm": 0.04889710247516632, |
| "learning_rate": 9.357688718015185e-06, |
| "loss": 0.0029960500076413156, |
| "memory(GiB)": 160.86, |
| "step": 1335, |
| "token_acc": 0.9992425517589631, |
| "train_speed(iter/s)": 0.041975 |
| }, |
| { |
| "epoch": 0.49111233278358074, |
| "grad_norm": 0.01644892431795597, |
| "learning_rate": 9.35297513410599e-06, |
| "loss": 0.001054964866489172, |
| "memory(GiB)": 160.86, |
| "step": 1340, |
| "token_acc": 0.999663242970197, |
| "train_speed(iter/s)": 0.041987 |
| }, |
| { |
| "epoch": 0.49294484148799705, |
| "grad_norm": 0.06923960894346237, |
| "learning_rate": 9.348245514251515e-06, |
| "loss": 0.0015572577714920044, |
| "memory(GiB)": 160.86, |
| "step": 1345, |
| "token_acc": 0.99949499200404, |
| "train_speed(iter/s)": 0.041999 |
| }, |
| { |
| "epoch": 0.4947773501924134, |
| "grad_norm": 0.4345010817050934, |
| "learning_rate": 9.343499875875226e-06, |
| "loss": 0.0008648891933262348, |
| "memory(GiB)": 160.86, |
| "step": 1350, |
| "token_acc": 0.9998317914213625, |
| "train_speed(iter/s)": 0.042012 |
| }, |
| { |
| "epoch": 0.49660985889682974, |
| "grad_norm": 0.12544922530651093, |
| "learning_rate": 9.338738236459606e-06, |
| "loss": 0.0008970722556114197, |
| "memory(GiB)": 160.86, |
| "step": 1355, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.042024 |
| }, |
| { |
| "epoch": 0.4984423676012461, |
| "grad_norm": 0.04251859337091446, |
| "learning_rate": 9.333960613546079e-06, |
| "loss": 0.0008619870990514755, |
| "memory(GiB)": 160.86, |
| "step": 1360, |
| "token_acc": 0.9996632146164857, |
| "train_speed(iter/s)": 0.042036 |
| }, |
| { |
| "epoch": 0.5002748763056625, |
| "grad_norm": 0.05376381427049637, |
| "learning_rate": 9.329167024734951e-06, |
| "loss": 0.0009831368923187255, |
| "memory(GiB)": 160.86, |
| "step": 1365, |
| "token_acc": 0.9996631862579993, |
| "train_speed(iter/s)": 0.042046 |
| }, |
| { |
| "epoch": 0.5021073850100788, |
| "grad_norm": 0.03389672935009003, |
| "learning_rate": 9.32435748768535e-06, |
| "loss": 0.001122223772108555, |
| "memory(GiB)": 160.86, |
| "step": 1370, |
| "token_acc": 0.9995790891489182, |
| "train_speed(iter/s)": 0.042057 |
| }, |
| { |
| "epoch": 0.5039398937144951, |
| "grad_norm": 0.07879503071308136, |
| "learning_rate": 9.319532020115147e-06, |
| "loss": 0.0011348828673362731, |
| "memory(GiB)": 160.86, |
| "step": 1375, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.042069 |
| }, |
| { |
| "epoch": 0.5057724024189115, |
| "grad_norm": 0.004050049465149641, |
| "learning_rate": 9.314690639800906e-06, |
| "loss": 0.0002213560277596116, |
| "memory(GiB)": 160.86, |
| "step": 1380, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.04208 |
| }, |
| { |
| "epoch": 0.5076049111233278, |
| "grad_norm": 0.028278427198529243, |
| "learning_rate": 9.30983336457781e-06, |
| "loss": 0.0009013951756060123, |
| "memory(GiB)": 160.86, |
| "step": 1385, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.042093 |
| }, |
| { |
| "epoch": 0.5094374198277442, |
| "grad_norm": 0.020806804299354553, |
| "learning_rate": 9.304960212339602e-06, |
| "loss": 0.001097150705754757, |
| "memory(GiB)": 160.86, |
| "step": 1390, |
| "token_acc": 0.9995791954216462, |
| "train_speed(iter/s)": 0.042093 |
| }, |
| { |
| "epoch": 0.5112699285321606, |
| "grad_norm": 0.05375039204955101, |
| "learning_rate": 9.300071201038503e-06, |
| "loss": 0.0004816567990928888, |
| "memory(GiB)": 160.86, |
| "step": 1395, |
| "token_acc": 0.9998316073082428, |
| "train_speed(iter/s)": 0.042105 |
| }, |
| { |
| "epoch": 0.5131024372365769, |
| "grad_norm": 0.005027708597481251, |
| "learning_rate": 9.295166348685169e-06, |
| "loss": 0.0004785487428307533, |
| "memory(GiB)": 160.86, |
| "step": 1400, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.042115 |
| }, |
| { |
| "epoch": 0.5149349459409932, |
| "grad_norm": 0.007288212422281504, |
| "learning_rate": 9.290245673348609e-06, |
| "loss": 0.00039666993543505666, |
| "memory(GiB)": 160.86, |
| "step": 1405, |
| "token_acc": 0.9998316781686585, |
| "train_speed(iter/s)": 0.042118 |
| }, |
| { |
| "epoch": 0.5167674546454095, |
| "grad_norm": 0.0003485670604277402, |
| "learning_rate": 9.285309193156118e-06, |
| "loss": 0.0002419668948277831, |
| "memory(GiB)": 160.86, |
| "step": 1410, |
| "token_acc": 0.9999158461667929, |
| "train_speed(iter/s)": 0.042128 |
| }, |
| { |
| "epoch": 0.5185999633498259, |
| "grad_norm": 0.05836885794997215, |
| "learning_rate": 9.280356926293222e-06, |
| "loss": 0.0011019782163202763, |
| "memory(GiB)": 160.86, |
| "step": 1415, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.04214 |
| }, |
| { |
| "epoch": 0.5204324720542423, |
| "grad_norm": 0.030392736196517944, |
| "learning_rate": 9.275388891003596e-06, |
| "loss": 0.0003588124178349972, |
| "memory(GiB)": 160.86, |
| "step": 1420, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.042152 |
| }, |
| { |
| "epoch": 0.5222649807586586, |
| "grad_norm": 0.10738146305084229, |
| "learning_rate": 9.270405105589012e-06, |
| "loss": 0.0022922657430171967, |
| "memory(GiB)": 160.86, |
| "step": 1425, |
| "token_acc": 0.9995792308339645, |
| "train_speed(iter/s)": 0.042164 |
| }, |
| { |
| "epoch": 0.524097489463075, |
| "grad_norm": 0.024856839329004288, |
| "learning_rate": 9.265405588409258e-06, |
| "loss": 0.000432960782200098, |
| "memory(GiB)": 160.86, |
| "step": 1430, |
| "token_acc": 0.9999158390843292, |
| "train_speed(iter/s)": 0.042176 |
| }, |
| { |
| "epoch": 0.5259299981674913, |
| "grad_norm": 0.023576080799102783, |
| "learning_rate": 9.26039035788208e-06, |
| "loss": 0.0014881092123687268, |
| "memory(GiB)": 160.86, |
| "step": 1435, |
| "token_acc": 0.9995794078061911, |
| "train_speed(iter/s)": 0.042185 |
| }, |
| { |
| "epoch": 0.5277625068719076, |
| "grad_norm": 0.025212427601218224, |
| "learning_rate": 9.255359432483106e-06, |
| "loss": 0.0006445163395255804, |
| "memory(GiB)": 160.86, |
| "step": 1440, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.042195 |
| }, |
| { |
| "epoch": 0.5295950155763239, |
| "grad_norm": 0.05869888886809349, |
| "learning_rate": 9.25031283074579e-06, |
| "loss": 0.0012847738340497016, |
| "memory(GiB)": 160.86, |
| "step": 1445, |
| "token_acc": 0.9995791245791246, |
| "train_speed(iter/s)": 0.042206 |
| }, |
| { |
| "epoch": 0.5314275242807404, |
| "grad_norm": 0.02733391709625721, |
| "learning_rate": 9.245250571261328e-06, |
| "loss": 0.0012956521473824977, |
| "memory(GiB)": 160.86, |
| "step": 1450, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.042217 |
| }, |
| { |
| "epoch": 0.5332600329851567, |
| "grad_norm": 0.01605917513370514, |
| "learning_rate": 9.240172672678603e-06, |
| "loss": 0.0010051255114376545, |
| "memory(GiB)": 160.86, |
| "step": 1455, |
| "token_acc": 0.9997476234541937, |
| "train_speed(iter/s)": 0.042217 |
| }, |
| { |
| "epoch": 0.535092541689573, |
| "grad_norm": 0.07777733355760574, |
| "learning_rate": 9.235079153704108e-06, |
| "loss": 0.001209939643740654, |
| "memory(GiB)": 160.86, |
| "step": 1460, |
| "token_acc": 0.9994948644552955, |
| "train_speed(iter/s)": 0.042228 |
| }, |
| { |
| "epoch": 0.5369250503939894, |
| "grad_norm": 0.024418100714683533, |
| "learning_rate": 9.229970033101881e-06, |
| "loss": 0.0006480346899479627, |
| "memory(GiB)": 160.86, |
| "step": 1465, |
| "token_acc": 0.9998315789473684, |
| "train_speed(iter/s)": 0.042239 |
| }, |
| { |
| "epoch": 0.5387575590984057, |
| "grad_norm": 0.051130812615156174, |
| "learning_rate": 9.224845329693434e-06, |
| "loss": 0.0005965878255665303, |
| "memory(GiB)": 160.86, |
| "step": 1470, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.04225 |
| }, |
| { |
| "epoch": 0.540590067802822, |
| "grad_norm": 0.03825452923774719, |
| "learning_rate": 9.21970506235769e-06, |
| "loss": 0.0003675919026136398, |
| "memory(GiB)": 160.86, |
| "step": 1475, |
| "token_acc": 0.9998315221969506, |
| "train_speed(iter/s)": 0.042259 |
| }, |
| { |
| "epoch": 0.5424225765072385, |
| "grad_norm": 0.05280032381415367, |
| "learning_rate": 9.214549250030899e-06, |
| "loss": 0.00044973762705922125, |
| "memory(GiB)": 160.86, |
| "step": 1480, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.042271 |
| }, |
| { |
| "epoch": 0.5442550852116548, |
| "grad_norm": 0.13924196362495422, |
| "learning_rate": 9.209377911706585e-06, |
| "loss": 0.0010926604270935058, |
| "memory(GiB)": 160.86, |
| "step": 1485, |
| "token_acc": 0.9996634979389248, |
| "train_speed(iter/s)": 0.042282 |
| }, |
| { |
| "epoch": 0.5460875939160711, |
| "grad_norm": 0.0010057148756459355, |
| "learning_rate": 9.204191066435463e-06, |
| "loss": 7.150891469791532e-05, |
| "memory(GiB)": 160.86, |
| "step": 1490, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.042286 |
| }, |
| { |
| "epoch": 0.5479201026204874, |
| "grad_norm": 0.0028190938755869865, |
| "learning_rate": 9.198988733325381e-06, |
| "loss": 0.00018844833830371498, |
| "memory(GiB)": 160.86, |
| "step": 1495, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.042297 |
| }, |
| { |
| "epoch": 0.5497526113249038, |
| "grad_norm": 0.2529807388782501, |
| "learning_rate": 9.19377093154123e-06, |
| "loss": 0.0006476116366684436, |
| "memory(GiB)": 160.86, |
| "step": 1500, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.034291 |
| }, |
| { |
| "epoch": 0.5497526113249038, |
| "eval_loss": 0.0008861870155669749, |
| "eval_runtime": 172.4847, |
| "eval_samples_per_second": 2.551, |
| "eval_steps_per_second": 2.551, |
| "eval_token_acc": 0.999755138270358, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5515851200293201, |
| "grad_norm": 0.07361137121915817, |
| "learning_rate": 9.188537680304901e-06, |
| "loss": 0.001575019396841526, |
| "memory(GiB)": 160.86, |
| "step": 1505, |
| "token_acc": 0.9997150923359839, |
| "train_speed(iter/s)": 0.033682 |
| }, |
| { |
| "epoch": 0.5534176287337365, |
| "grad_norm": 0.1123221218585968, |
| "learning_rate": 9.18328899889519e-06, |
| "loss": 0.0008759641088545323, |
| "memory(GiB)": 160.86, |
| "step": 1510, |
| "token_acc": 0.9997474960020201, |
| "train_speed(iter/s)": 0.033712 |
| }, |
| { |
| "epoch": 0.5552501374381529, |
| "grad_norm": 0.031373172998428345, |
| "learning_rate": 9.17802490664774e-06, |
| "loss": 0.0005370716098695993, |
| "memory(GiB)": 160.86, |
| "step": 1515, |
| "token_acc": 0.9997475385003787, |
| "train_speed(iter/s)": 0.033741 |
| }, |
| { |
| "epoch": 0.5570826461425692, |
| "grad_norm": 0.00548228295519948, |
| "learning_rate": 9.172745422954961e-06, |
| "loss": 0.0006150617729872466, |
| "memory(GiB)": 160.86, |
| "step": 1520, |
| "token_acc": 0.9997476234541937, |
| "train_speed(iter/s)": 0.033771 |
| }, |
| { |
| "epoch": 0.5589151548469855, |
| "grad_norm": 0.09783894568681717, |
| "learning_rate": 9.167450567265972e-06, |
| "loss": 0.0003677058033645153, |
| "memory(GiB)": 160.86, |
| "step": 1525, |
| "token_acc": 0.9999158036541214, |
| "train_speed(iter/s)": 0.033793 |
| }, |
| { |
| "epoch": 0.5607476635514018, |
| "grad_norm": 0.02310693822801113, |
| "learning_rate": 9.162140359086515e-06, |
| "loss": 0.0013180834241211415, |
| "memory(GiB)": 160.86, |
| "step": 1530, |
| "token_acc": 0.9994106751978448, |
| "train_speed(iter/s)": 0.033822 |
| }, |
| { |
| "epoch": 0.5625801722558182, |
| "grad_norm": 0.07956714183092117, |
| "learning_rate": 9.156814817978889e-06, |
| "loss": 0.0014457314275205136, |
| "memory(GiB)": 160.86, |
| "step": 1535, |
| "token_acc": 0.9994950345059754, |
| "train_speed(iter/s)": 0.033851 |
| }, |
| { |
| "epoch": 0.5644126809602346, |
| "grad_norm": 0.007547269109636545, |
| "learning_rate": 9.151473963561884e-06, |
| "loss": 0.0004539607558399439, |
| "memory(GiB)": 160.86, |
| "step": 1540, |
| "token_acc": 0.9998316356595673, |
| "train_speed(iter/s)": 0.033879 |
| }, |
| { |
| "epoch": 0.5662451896646509, |
| "grad_norm": 0.016255052760243416, |
| "learning_rate": 9.146117815510691e-06, |
| "loss": 0.0003765122266486287, |
| "memory(GiB)": 160.86, |
| "step": 1545, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.033907 |
| }, |
| { |
| "epoch": 0.5680776983690673, |
| "grad_norm": 0.06404280662536621, |
| "learning_rate": 9.140746393556853e-06, |
| "loss": 0.0009273691102862358, |
| "memory(GiB)": 160.86, |
| "step": 1550, |
| "token_acc": 0.9994106751978448, |
| "train_speed(iter/s)": 0.033936 |
| }, |
| { |
| "epoch": 0.5699102070734836, |
| "grad_norm": 0.030146759003400803, |
| "learning_rate": 9.135359717488179e-06, |
| "loss": 0.0006903111469000577, |
| "memory(GiB)": 160.86, |
| "step": 1555, |
| "token_acc": 0.9997473471450228, |
| "train_speed(iter/s)": 0.033965 |
| }, |
| { |
| "epoch": 0.5717427157778999, |
| "grad_norm": 0.017701471224427223, |
| "learning_rate": 9.129957807148666e-06, |
| "loss": 0.0014588728547096253, |
| "memory(GiB)": 160.86, |
| "step": 1560, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.033993 |
| }, |
| { |
| "epoch": 0.5735752244823162, |
| "grad_norm": 0.02424156479537487, |
| "learning_rate": 9.124540682438438e-06, |
| "loss": 0.00092041976749897, |
| "memory(GiB)": 160.86, |
| "step": 1565, |
| "token_acc": 0.9997475809844342, |
| "train_speed(iter/s)": 0.034021 |
| }, |
| { |
| "epoch": 0.5754077331867327, |
| "grad_norm": 0.06382456421852112, |
| "learning_rate": 9.119108363313665e-06, |
| "loss": 0.0009634297341108323, |
| "memory(GiB)": 160.86, |
| "step": 1570, |
| "token_acc": 0.9996634413125789, |
| "train_speed(iter/s)": 0.034047 |
| }, |
| { |
| "epoch": 0.577240241891149, |
| "grad_norm": 0.011778367683291435, |
| "learning_rate": 9.113660869786491e-06, |
| "loss": 0.0007347457576543093, |
| "memory(GiB)": 160.86, |
| "step": 1575, |
| "token_acc": 0.999663242970197, |
| "train_speed(iter/s)": 0.034075 |
| }, |
| { |
| "epoch": 0.5790727505955653, |
| "grad_norm": 0.01488505955785513, |
| "learning_rate": 9.108198221924966e-06, |
| "loss": 0.0007065658923238516, |
| "memory(GiB)": 160.86, |
| "step": 1580, |
| "token_acc": 0.9996636677036912, |
| "train_speed(iter/s)": 0.034103 |
| }, |
| { |
| "epoch": 0.5809052592999817, |
| "grad_norm": 0.016339842230081558, |
| "learning_rate": 9.102720439852964e-06, |
| "loss": 0.0004196997731924057, |
| "memory(GiB)": 160.86, |
| "step": 1585, |
| "token_acc": 0.9999158674070335, |
| "train_speed(iter/s)": 0.034131 |
| }, |
| { |
| "epoch": 0.582737768004398, |
| "grad_norm": 0.03133771941065788, |
| "learning_rate": 9.097227543750109e-06, |
| "loss": 0.0003929842729121447, |
| "memory(GiB)": 160.86, |
| "step": 1590, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.034104 |
| }, |
| { |
| "epoch": 0.5845702767088143, |
| "grad_norm": 0.10911545157432556, |
| "learning_rate": 9.091719553851707e-06, |
| "loss": 0.00033823368139564993, |
| "memory(GiB)": 160.86, |
| "step": 1595, |
| "token_acc": 0.9998316781686585, |
| "train_speed(iter/s)": 0.034131 |
| }, |
| { |
| "epoch": 0.5864027854132308, |
| "grad_norm": 0.06253647804260254, |
| "learning_rate": 9.086196490448668e-06, |
| "loss": 0.0004926771856844425, |
| "memory(GiB)": 160.86, |
| "step": 1600, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.034154 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.01017008163034916, |
| "learning_rate": 9.080658373887432e-06, |
| "loss": 0.0021519148722290993, |
| "memory(GiB)": 160.86, |
| "step": 1605, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.034177 |
| }, |
| { |
| "epoch": 0.5900678028220634, |
| "grad_norm": 0.027529926970601082, |
| "learning_rate": 9.07510522456989e-06, |
| "loss": 0.000728294812142849, |
| "memory(GiB)": 160.86, |
| "step": 1610, |
| "token_acc": 0.9996633280026934, |
| "train_speed(iter/s)": 0.034203 |
| }, |
| { |
| "epoch": 0.5919003115264797, |
| "grad_norm": 0.14524707198143005, |
| "learning_rate": 9.069537062953318e-06, |
| "loss": 0.0007321128156036139, |
| "memory(GiB)": 160.86, |
| "step": 1615, |
| "token_acc": 0.9996633846671716, |
| "train_speed(iter/s)": 0.03423 |
| }, |
| { |
| "epoch": 0.5937328202308961, |
| "grad_norm": 0.010788935236632824, |
| "learning_rate": 9.063953909550289e-06, |
| "loss": 0.0007929414510726929, |
| "memory(GiB)": 160.86, |
| "step": 1620, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.034256 |
| }, |
| { |
| "epoch": 0.5955653289353124, |
| "grad_norm": 0.04031025990843773, |
| "learning_rate": 9.05835578492861e-06, |
| "loss": 0.00044157886877655984, |
| "memory(GiB)": 160.86, |
| "step": 1625, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.034282 |
| }, |
| { |
| "epoch": 0.5973978376397288, |
| "grad_norm": 0.005226753186434507, |
| "learning_rate": 9.052742709711234e-06, |
| "loss": 0.0007471313234418631, |
| "memory(GiB)": 160.86, |
| "step": 1630, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.034307 |
| }, |
| { |
| "epoch": 0.5992303463441452, |
| "grad_norm": 0.006849437486380339, |
| "learning_rate": 9.0471147045762e-06, |
| "loss": 0.00016981502994894981, |
| "memory(GiB)": 160.86, |
| "step": 1635, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.034314 |
| }, |
| { |
| "epoch": 0.6010628550485615, |
| "grad_norm": 0.0021249176934361458, |
| "learning_rate": 9.041471790256543e-06, |
| "loss": 0.0004975998308509588, |
| "memory(GiB)": 160.86, |
| "step": 1640, |
| "token_acc": 0.9999157965644998, |
| "train_speed(iter/s)": 0.034341 |
| }, |
| { |
| "epoch": 0.6028953637529778, |
| "grad_norm": 0.03091166540980339, |
| "learning_rate": 9.035813987540216e-06, |
| "loss": 0.001137539092451334, |
| "memory(GiB)": 160.86, |
| "step": 1645, |
| "token_acc": 0.999579018270607, |
| "train_speed(iter/s)": 0.034367 |
| }, |
| { |
| "epoch": 0.6047278724573941, |
| "grad_norm": 0.020048417150974274, |
| "learning_rate": 9.030141317270026e-06, |
| "loss": 0.0009108279831707477, |
| "memory(GiB)": 160.86, |
| "step": 1650, |
| "token_acc": 0.9997473471450228, |
| "train_speed(iter/s)": 0.034393 |
| }, |
| { |
| "epoch": 0.6065603811618105, |
| "grad_norm": 0.0024872045032680035, |
| "learning_rate": 9.02445380034355e-06, |
| "loss": 0.00014628460630774497, |
| "memory(GiB)": 160.86, |
| "step": 1655, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.034418 |
| }, |
| { |
| "epoch": 0.6083928898662269, |
| "grad_norm": 0.1102481409907341, |
| "learning_rate": 9.018751457713062e-06, |
| "loss": 0.002010086178779602, |
| "memory(GiB)": 160.86, |
| "step": 1660, |
| "token_acc": 0.9996634413125789, |
| "train_speed(iter/s)": 0.034443 |
| }, |
| { |
| "epoch": 0.6102253985706432, |
| "grad_norm": 0.0067368666641414165, |
| "learning_rate": 9.013034310385442e-06, |
| "loss": 0.0004647184628993273, |
| "memory(GiB)": 160.86, |
| "step": 1665, |
| "token_acc": 0.9997474322276477, |
| "train_speed(iter/s)": 0.034469 |
| }, |
| { |
| "epoch": 0.6120579072750596, |
| "grad_norm": 0.0039915889501571655, |
| "learning_rate": 9.007302379422118e-06, |
| "loss": 0.0008955980651080608, |
| "memory(GiB)": 160.86, |
| "step": 1670, |
| "token_acc": 0.999663129526697, |
| "train_speed(iter/s)": 0.03449 |
| }, |
| { |
| "epoch": 0.6138904159794759, |
| "grad_norm": 0.04223395511507988, |
| "learning_rate": 9.00155568593898e-06, |
| "loss": 0.0006724436767399311, |
| "memory(GiB)": 160.86, |
| "step": 1675, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.034516 |
| }, |
| { |
| "epoch": 0.6157229246838922, |
| "grad_norm": 0.013977458700537682, |
| "learning_rate": 8.995794251106295e-06, |
| "loss": 0.0012857289984822273, |
| "memory(GiB)": 160.86, |
| "step": 1680, |
| "token_acc": 0.9995791245791246, |
| "train_speed(iter/s)": 0.034534 |
| }, |
| { |
| "epoch": 0.6175554333883086, |
| "grad_norm": 0.02960984595119953, |
| "learning_rate": 8.99001809614864e-06, |
| "loss": 0.0006384906824678183, |
| "memory(GiB)": 160.86, |
| "step": 1685, |
| "token_acc": 0.9997474109623642, |
| "train_speed(iter/s)": 0.034559 |
| }, |
| { |
| "epoch": 0.619387942092725, |
| "grad_norm": 0.14135026931762695, |
| "learning_rate": 8.98422724234482e-06, |
| "loss": 0.0018129302188754082, |
| "memory(GiB)": 160.86, |
| "step": 1690, |
| "token_acc": 0.9994108735903047, |
| "train_speed(iter/s)": 0.034584 |
| }, |
| { |
| "epoch": 0.6212204507971413, |
| "grad_norm": 0.011938896030187607, |
| "learning_rate": 8.978421711027789e-06, |
| "loss": 0.0010257656686007977, |
| "memory(GiB)": 160.86, |
| "step": 1695, |
| "token_acc": 0.999579018270607, |
| "train_speed(iter/s)": 0.034609 |
| }, |
| { |
| "epoch": 0.6230529595015576, |
| "grad_norm": 0.02054041065275669, |
| "learning_rate": 8.97260152358457e-06, |
| "loss": 0.0010426132939755917, |
| "memory(GiB)": 160.86, |
| "step": 1700, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.034627 |
| }, |
| { |
| "epoch": 0.624885468205974, |
| "grad_norm": 0.057805608958005905, |
| "learning_rate": 8.966766701456177e-06, |
| "loss": 0.0011805295012891292, |
| "memory(GiB)": 160.86, |
| "step": 1705, |
| "token_acc": 0.9994950345059754, |
| "train_speed(iter/s)": 0.03465 |
| }, |
| { |
| "epoch": 0.6267179769103903, |
| "grad_norm": 0.01560523733496666, |
| "learning_rate": 8.96091726613754e-06, |
| "loss": 0.0006526369601488113, |
| "memory(GiB)": 160.86, |
| "step": 1710, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.034675 |
| }, |
| { |
| "epoch": 0.6285504856148066, |
| "grad_norm": 0.02277560532093048, |
| "learning_rate": 8.95505323917742e-06, |
| "loss": 0.0003244250314310193, |
| "memory(GiB)": 160.86, |
| "step": 1715, |
| "token_acc": 0.9999158249158249, |
| "train_speed(iter/s)": 0.034697 |
| }, |
| { |
| "epoch": 0.6303829943192231, |
| "grad_norm": 0.03905067220330238, |
| "learning_rate": 8.949174642178333e-06, |
| "loss": 0.0006646113935858012, |
| "memory(GiB)": 160.86, |
| "step": 1720, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.034715 |
| }, |
| { |
| "epoch": 0.6322155030236394, |
| "grad_norm": 0.004376774653792381, |
| "learning_rate": 8.94328149679647e-06, |
| "loss": 0.0006781556177884341, |
| "memory(GiB)": 160.86, |
| "step": 1725, |
| "token_acc": 0.9996631011538786, |
| "train_speed(iter/s)": 0.034739 |
| }, |
| { |
| "epoch": 0.6340480117280557, |
| "grad_norm": 0.08241453766822815, |
| "learning_rate": 8.937373824741618e-06, |
| "loss": 0.0007374928332865238, |
| "memory(GiB)": 160.86, |
| "step": 1730, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.034764 |
| }, |
| { |
| "epoch": 0.635880520432472, |
| "grad_norm": 0.02215947024524212, |
| "learning_rate": 8.931451647777076e-06, |
| "loss": 0.001058538444340229, |
| "memory(GiB)": 160.86, |
| "step": 1735, |
| "token_acc": 0.9994950770007573, |
| "train_speed(iter/s)": 0.034781 |
| }, |
| { |
| "epoch": 0.6377130291368884, |
| "grad_norm": 0.05471364036202431, |
| "learning_rate": 8.92551498771958e-06, |
| "loss": 0.0005416409578174353, |
| "memory(GiB)": 160.86, |
| "step": 1740, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.034805 |
| }, |
| { |
| "epoch": 0.6395455378413047, |
| "grad_norm": 0.0009198402985930443, |
| "learning_rate": 8.919563866439218e-06, |
| "loss": 0.0011710536666214467, |
| "memory(GiB)": 160.86, |
| "step": 1745, |
| "token_acc": 0.9995790537127462, |
| "train_speed(iter/s)": 0.034822 |
| }, |
| { |
| "epoch": 0.6413780465457211, |
| "grad_norm": 0.02374288998544216, |
| "learning_rate": 8.913598305859354e-06, |
| "loss": 0.0002880813553929329, |
| "memory(GiB)": 160.86, |
| "step": 1750, |
| "token_acc": 0.9999158249158249, |
| "train_speed(iter/s)": 0.034839 |
| }, |
| { |
| "epoch": 0.6432105552501375, |
| "grad_norm": 0.03671794757246971, |
| "learning_rate": 8.907618327956546e-06, |
| "loss": 0.0009451866149902344, |
| "memory(GiB)": 160.86, |
| "step": 1755, |
| "token_acc": 0.9997473896934995, |
| "train_speed(iter/s)": 0.034863 |
| }, |
| { |
| "epoch": 0.6450430639545538, |
| "grad_norm": 0.02204386703670025, |
| "learning_rate": 8.90162395476046e-06, |
| "loss": 0.00012790242908522487, |
| "memory(GiB)": 160.86, |
| "step": 1760, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.034887 |
| }, |
| { |
| "epoch": 0.6468755726589701, |
| "grad_norm": 0.006437621079385281, |
| "learning_rate": 8.895615208353796e-06, |
| "loss": 0.0011966807767748832, |
| "memory(GiB)": 160.86, |
| "step": 1765, |
| "token_acc": 0.9996632146164857, |
| "train_speed(iter/s)": 0.034911 |
| }, |
| { |
| "epoch": 0.6487080813633864, |
| "grad_norm": 0.06638949364423752, |
| "learning_rate": 8.889592110872203e-06, |
| "loss": 0.0013600192032754421, |
| "memory(GiB)": 160.86, |
| "step": 1770, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.034934 |
| }, |
| { |
| "epoch": 0.6505405900678028, |
| "grad_norm": 0.029982449486851692, |
| "learning_rate": 8.883554684504198e-06, |
| "loss": 0.00047690006904304026, |
| "memory(GiB)": 160.86, |
| "step": 1775, |
| "token_acc": 0.9999158249158249, |
| "train_speed(iter/s)": 0.034958 |
| }, |
| { |
| "epoch": 0.6523730987722192, |
| "grad_norm": 0.0004446969833225012, |
| "learning_rate": 8.877502951491083e-06, |
| "loss": 0.0002472808351740241, |
| "memory(GiB)": 160.86, |
| "step": 1780, |
| "token_acc": 0.9999158674070335, |
| "train_speed(iter/s)": 0.034982 |
| }, |
| { |
| "epoch": 0.6542056074766355, |
| "grad_norm": 0.045220986008644104, |
| "learning_rate": 8.871436934126865e-06, |
| "loss": 0.00016599131049588323, |
| "memory(GiB)": 160.86, |
| "step": 1785, |
| "token_acc": 0.9999158107425492, |
| "train_speed(iter/s)": 0.035005 |
| }, |
| { |
| "epoch": 0.6560381161810519, |
| "grad_norm": 0.08464392274618149, |
| "learning_rate": 8.865356654758175e-06, |
| "loss": 0.0011138648726046086, |
| "memory(GiB)": 160.86, |
| "step": 1790, |
| "token_acc": 0.9997474534893509, |
| "train_speed(iter/s)": 0.035029 |
| }, |
| { |
| "epoch": 0.6578706248854682, |
| "grad_norm": 0.018666911870241165, |
| "learning_rate": 8.859262135784184e-06, |
| "loss": 0.0008051570504903794, |
| "memory(GiB)": 160.86, |
| "step": 1795, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.035052 |
| }, |
| { |
| "epoch": 0.6597031335898845, |
| "grad_norm": 0.03633316978812218, |
| "learning_rate": 8.853153399656513e-06, |
| "loss": 0.0012314721010625363, |
| "memory(GiB)": 160.86, |
| "step": 1800, |
| "token_acc": 0.9997476022211005, |
| "train_speed(iter/s)": 0.035075 |
| }, |
| { |
| "epoch": 0.661535642294301, |
| "grad_norm": 0.07466746866703033, |
| "learning_rate": 8.84703046887917e-06, |
| "loss": 0.0005056848283857107, |
| "memory(GiB)": 160.86, |
| "step": 1805, |
| "token_acc": 0.9998315789473684, |
| "train_speed(iter/s)": 0.035098 |
| }, |
| { |
| "epoch": 0.6633681509987173, |
| "grad_norm": 0.058270856738090515, |
| "learning_rate": 8.840893366008443e-06, |
| "loss": 0.0027731884270906447, |
| "memory(GiB)": 160.86, |
| "step": 1810, |
| "token_acc": 0.9989051709617652, |
| "train_speed(iter/s)": 0.03512 |
| }, |
| { |
| "epoch": 0.6652006597031336, |
| "grad_norm": 0.053415171802043915, |
| "learning_rate": 8.834742113652835e-06, |
| "loss": 0.0012996003031730651, |
| "memory(GiB)": 160.86, |
| "step": 1815, |
| "token_acc": 0.9996633846671716, |
| "train_speed(iter/s)": 0.035143 |
| }, |
| { |
| "epoch": 0.6670331684075499, |
| "grad_norm": 0.17921970784664154, |
| "learning_rate": 8.828576734472975e-06, |
| "loss": 0.002054636925458908, |
| "memory(GiB)": 160.86, |
| "step": 1820, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.035166 |
| }, |
| { |
| "epoch": 0.6688656771119663, |
| "grad_norm": 0.2059200257062912, |
| "learning_rate": 8.82239725118153e-06, |
| "loss": 0.000544156739488244, |
| "memory(GiB)": 160.86, |
| "step": 1825, |
| "token_acc": 0.9998317914213625, |
| "train_speed(iter/s)": 0.035188 |
| }, |
| { |
| "epoch": 0.6706981858163826, |
| "grad_norm": 0.0659668818116188, |
| "learning_rate": 8.816203686543128e-06, |
| "loss": 0.0011439280584454536, |
| "memory(GiB)": 160.86, |
| "step": 1830, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.03521 |
| }, |
| { |
| "epoch": 0.6725306945207989, |
| "grad_norm": 0.027126120403409004, |
| "learning_rate": 8.80999606337427e-06, |
| "loss": 0.0006697001401335001, |
| "memory(GiB)": 160.86, |
| "step": 1835, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.035233 |
| }, |
| { |
| "epoch": 0.6743632032252154, |
| "grad_norm": 0.04717881977558136, |
| "learning_rate": 8.803774404543246e-06, |
| "loss": 0.0008460984565317631, |
| "memory(GiB)": 160.86, |
| "step": 1840, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.035255 |
| }, |
| { |
| "epoch": 0.6761957119296317, |
| "grad_norm": 0.03212764859199524, |
| "learning_rate": 8.79753873297006e-06, |
| "loss": 0.0013919253833591938, |
| "memory(GiB)": 160.86, |
| "step": 1845, |
| "token_acc": 0.9995793370351674, |
| "train_speed(iter/s)": 0.035277 |
| }, |
| { |
| "epoch": 0.678028220634048, |
| "grad_norm": 0.004734094720333815, |
| "learning_rate": 8.791289071626324e-06, |
| "loss": 0.0017154796048998832, |
| "memory(GiB)": 160.86, |
| "step": 1850, |
| "token_acc": 0.9994106751978448, |
| "train_speed(iter/s)": 0.035298 |
| }, |
| { |
| "epoch": 0.6798607293384643, |
| "grad_norm": 0.002792911371216178, |
| "learning_rate": 8.7850254435352e-06, |
| "loss": 0.00024983214680105446, |
| "memory(GiB)": 160.86, |
| "step": 1855, |
| "token_acc": 0.9999158532480646, |
| "train_speed(iter/s)": 0.035304 |
| }, |
| { |
| "epoch": 0.6816932380428807, |
| "grad_norm": 0.069346122443676, |
| "learning_rate": 8.778747871771293e-06, |
| "loss": 0.0004865613766014576, |
| "memory(GiB)": 160.86, |
| "step": 1860, |
| "token_acc": 0.9999158603281447, |
| "train_speed(iter/s)": 0.035326 |
| }, |
| { |
| "epoch": 0.683525746747297, |
| "grad_norm": 0.0010090708965435624, |
| "learning_rate": 8.772456379460578e-06, |
| "loss": 0.0005619535222649574, |
| "memory(GiB)": 160.86, |
| "step": 1865, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.035348 |
| }, |
| { |
| "epoch": 0.6853582554517134, |
| "grad_norm": 0.00402231328189373, |
| "learning_rate": 8.766150989780317e-06, |
| "loss": 0.00032461092341691257, |
| "memory(GiB)": 160.86, |
| "step": 1870, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.03537 |
| }, |
| { |
| "epoch": 0.6871907641561298, |
| "grad_norm": 0.016630422323942184, |
| "learning_rate": 8.759831725958963e-06, |
| "loss": 0.0007076055742800235, |
| "memory(GiB)": 160.86, |
| "step": 1875, |
| "token_acc": 0.9999158461667929, |
| "train_speed(iter/s)": 0.035386 |
| }, |
| { |
| "epoch": 0.6890232728605461, |
| "grad_norm": 0.13864953815937042, |
| "learning_rate": 8.75349861127608e-06, |
| "loss": 0.0009592998772859574, |
| "memory(GiB)": 160.86, |
| "step": 1880, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.035408 |
| }, |
| { |
| "epoch": 0.6908557815649624, |
| "grad_norm": 0.12857644259929657, |
| "learning_rate": 8.747151669062256e-06, |
| "loss": 0.0003430765587836504, |
| "memory(GiB)": 160.86, |
| "step": 1885, |
| "token_acc": 0.9998316781686585, |
| "train_speed(iter/s)": 0.035429 |
| }, |
| { |
| "epoch": 0.6926882902693787, |
| "grad_norm": 0.007042865734547377, |
| "learning_rate": 8.740790922699024e-06, |
| "loss": 0.0002988249296322465, |
| "memory(GiB)": 160.86, |
| "step": 1890, |
| "token_acc": 0.9999157823816742, |
| "train_speed(iter/s)": 0.035451 |
| }, |
| { |
| "epoch": 0.6945207989737952, |
| "grad_norm": 0.004211138002574444, |
| "learning_rate": 8.73441639561877e-06, |
| "loss": 0.000298920925706625, |
| "memory(GiB)": 160.86, |
| "step": 1895, |
| "token_acc": 0.9998316781686585, |
| "train_speed(iter/s)": 0.035464 |
| }, |
| { |
| "epoch": 0.6963533076782115, |
| "grad_norm": 0.10895411670207977, |
| "learning_rate": 8.728028111304639e-06, |
| "loss": 0.0018308842554688454, |
| "memory(GiB)": 160.86, |
| "step": 1900, |
| "token_acc": 0.9995788054923764, |
| "train_speed(iter/s)": 0.035485 |
| }, |
| { |
| "epoch": 0.6981858163826278, |
| "grad_norm": 0.05376400053501129, |
| "learning_rate": 8.721626093290461e-06, |
| "loss": 0.0004374215379357338, |
| "memory(GiB)": 160.86, |
| "step": 1905, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.035506 |
| }, |
| { |
| "epoch": 0.7000183250870442, |
| "grad_norm": 0.007238362450152636, |
| "learning_rate": 8.715210365160662e-06, |
| "loss": 6.630108109675347e-05, |
| "memory(GiB)": 160.86, |
| "step": 1910, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.035527 |
| }, |
| { |
| "epoch": 0.7018508337914605, |
| "grad_norm": 0.00040705734863877296, |
| "learning_rate": 8.708780950550173e-06, |
| "loss": 0.0006973243784159422, |
| "memory(GiB)": 160.86, |
| "step": 1915, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.035548 |
| }, |
| { |
| "epoch": 0.7036833424958768, |
| "grad_norm": 0.0899810642004013, |
| "learning_rate": 8.702337873144343e-06, |
| "loss": 0.0013748856261372566, |
| "memory(GiB)": 160.86, |
| "step": 1920, |
| "token_acc": 0.9994948219247285, |
| "train_speed(iter/s)": 0.035569 |
| }, |
| { |
| "epoch": 0.7055158512002933, |
| "grad_norm": 0.08953223377466202, |
| "learning_rate": 8.695881156678856e-06, |
| "loss": 0.0006622021552175284, |
| "memory(GiB)": 160.86, |
| "step": 1925, |
| "token_acc": 0.9997475597441938, |
| "train_speed(iter/s)": 0.035589 |
| }, |
| { |
| "epoch": 0.7073483599047096, |
| "grad_norm": 0.015041066333651543, |
| "learning_rate": 8.689410824939639e-06, |
| "loss": 0.0003675042651593685, |
| "memory(GiB)": 160.86, |
| "step": 1930, |
| "token_acc": 0.9999158249158249, |
| "train_speed(iter/s)": 0.03561 |
| }, |
| { |
| "epoch": 0.7091808686091259, |
| "grad_norm": 0.015323134139180183, |
| "learning_rate": 8.682926901762776e-06, |
| "loss": 0.0009645667858421802, |
| "memory(GiB)": 160.86, |
| "step": 1935, |
| "token_acc": 0.999663242970197, |
| "train_speed(iter/s)": 0.03563 |
| }, |
| { |
| "epoch": 0.7110133773135422, |
| "grad_norm": 0.05264544486999512, |
| "learning_rate": 8.676429411034423e-06, |
| "loss": 0.0006276907399296761, |
| "memory(GiB)": 160.86, |
| "step": 1940, |
| "token_acc": 0.9996633280026934, |
| "train_speed(iter/s)": 0.035648 |
| }, |
| { |
| "epoch": 0.7128458860179586, |
| "grad_norm": 0.0028159820940345526, |
| "learning_rate": 8.669918376690716e-06, |
| "loss": 0.00036051685456186535, |
| "memory(GiB)": 160.86, |
| "step": 1945, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.035668 |
| }, |
| { |
| "epoch": 0.7146783947223749, |
| "grad_norm": 0.0341511145234108, |
| "learning_rate": 8.663393822717686e-06, |
| "loss": 0.0003709573531523347, |
| "memory(GiB)": 160.86, |
| "step": 1950, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.035688 |
| }, |
| { |
| "epoch": 0.7165109034267912, |
| "grad_norm": 0.0006480200099758804, |
| "learning_rate": 8.656855773151163e-06, |
| "loss": 0.0003987106028944254, |
| "memory(GiB)": 160.86, |
| "step": 1955, |
| "token_acc": 0.9998315789473684, |
| "train_speed(iter/s)": 0.035709 |
| }, |
| { |
| "epoch": 0.7183434121312077, |
| "grad_norm": 0.0002706103550735861, |
| "learning_rate": 8.650304252076704e-06, |
| "loss": 0.0003762753214687109, |
| "memory(GiB)": 160.86, |
| "step": 1960, |
| "token_acc": 0.9998316356595673, |
| "train_speed(iter/s)": 0.035729 |
| }, |
| { |
| "epoch": 0.720175920835624, |
| "grad_norm": 0.00926526915282011, |
| "learning_rate": 8.643739283629484e-06, |
| "loss": 0.00021247351542115213, |
| "memory(GiB)": 160.86, |
| "step": 1965, |
| "token_acc": 0.9999158603281447, |
| "train_speed(iter/s)": 0.035749 |
| }, |
| { |
| "epoch": 0.7220084295400403, |
| "grad_norm": 0.11203871667385101, |
| "learning_rate": 8.63716089199422e-06, |
| "loss": 0.0012671677395701408, |
| "memory(GiB)": 160.86, |
| "step": 1970, |
| "token_acc": 0.9995792662403231, |
| "train_speed(iter/s)": 0.03577 |
| }, |
| { |
| "epoch": 0.7238409382444566, |
| "grad_norm": 0.027508899569511414, |
| "learning_rate": 8.630569101405084e-06, |
| "loss": 0.0016218043863773346, |
| "memory(GiB)": 160.86, |
| "step": 1975, |
| "token_acc": 0.99949499200404, |
| "train_speed(iter/s)": 0.03579 |
| }, |
| { |
| "epoch": 0.725673446948873, |
| "grad_norm": 0.03338692709803581, |
| "learning_rate": 8.6239639361456e-06, |
| "loss": 0.0007595627568662167, |
| "memory(GiB)": 160.86, |
| "step": 1980, |
| "token_acc": 0.9997473045822103, |
| "train_speed(iter/s)": 0.03581 |
| }, |
| { |
| "epoch": 0.7275059556532893, |
| "grad_norm": 0.01979021355509758, |
| "learning_rate": 8.617345420548568e-06, |
| "loss": 0.00039132642559707164, |
| "memory(GiB)": 160.86, |
| "step": 1985, |
| "token_acc": 0.9998317631224765, |
| "train_speed(iter/s)": 0.035829 |
| }, |
| { |
| "epoch": 0.7293384643577057, |
| "grad_norm": 0.0021872930228710175, |
| "learning_rate": 8.610713578995969e-06, |
| "loss": 0.0002923472551628947, |
| "memory(GiB)": 160.86, |
| "step": 1990, |
| "token_acc": 0.9999158603281447, |
| "train_speed(iter/s)": 0.035848 |
| }, |
| { |
| "epoch": 0.7311709730621221, |
| "grad_norm": 0.007450213190168142, |
| "learning_rate": 8.604068435918876e-06, |
| "loss": 0.0004648041445761919, |
| "memory(GiB)": 160.86, |
| "step": 1995, |
| "token_acc": 0.9998316356595673, |
| "train_speed(iter/s)": 0.035868 |
| }, |
| { |
| "epoch": 0.7330034817665384, |
| "grad_norm": 0.018950950354337692, |
| "learning_rate": 8.597410015797358e-06, |
| "loss": 0.0011166405864059925, |
| "memory(GiB)": 160.86, |
| "step": 2000, |
| "token_acc": 0.9996636394214599, |
| "train_speed(iter/s)": 0.035879 |
| }, |
| { |
| "epoch": 0.7330034817665384, |
| "eval_loss": 0.0007337583811022341, |
| "eval_runtime": 199.2224, |
| "eval_samples_per_second": 2.209, |
| "eval_steps_per_second": 2.209, |
| "eval_token_acc": 0.9997704421284606, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7348359904709547, |
| "grad_norm": 0.0039305477403104305, |
| "learning_rate": 8.590738343160402e-06, |
| "loss": 0.00037078014574944975, |
| "memory(GiB)": 160.86, |
| "step": 2005, |
| "token_acc": 0.9997927917427509, |
| "train_speed(iter/s)": 0.035487 |
| }, |
| { |
| "epoch": 0.736668499175371, |
| "grad_norm": 0.013306787237524986, |
| "learning_rate": 8.584053442585816e-06, |
| "loss": 0.0020991813391447066, |
| "memory(GiB)": 160.86, |
| "step": 2010, |
| "token_acc": 0.9996633846671716, |
| "train_speed(iter/s)": 0.035507 |
| }, |
| { |
| "epoch": 0.7385010078797875, |
| "grad_norm": 0.006368038710206747, |
| "learning_rate": 8.577355338700133e-06, |
| "loss": 0.000787766557186842, |
| "memory(GiB)": 160.86, |
| "step": 2015, |
| "token_acc": 0.9997473896934995, |
| "train_speed(iter/s)": 0.035525 |
| }, |
| { |
| "epoch": 0.7403335165842038, |
| "grad_norm": 0.010385467670857906, |
| "learning_rate": 8.570644056178533e-06, |
| "loss": 0.0008328554220497608, |
| "memory(GiB)": 160.86, |
| "step": 2020, |
| "token_acc": 0.9997476871320438, |
| "train_speed(iter/s)": 0.035538 |
| }, |
| { |
| "epoch": 0.7421660252886201, |
| "grad_norm": 0.01632188819348812, |
| "learning_rate": 8.563919619744735e-06, |
| "loss": 0.0005637739785015583, |
| "memory(GiB)": 160.86, |
| "step": 2025, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.035559 |
| }, |
| { |
| "epoch": 0.7439985339930365, |
| "grad_norm": 0.011626377701759338, |
| "learning_rate": 8.557182054170926e-06, |
| "loss": 0.0005918642971664667, |
| "memory(GiB)": 160.86, |
| "step": 2030, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.035578 |
| }, |
| { |
| "epoch": 0.7458310426974528, |
| "grad_norm": 0.0031517872121185064, |
| "learning_rate": 8.550431384277654e-06, |
| "loss": 0.00141130480915308, |
| "memory(GiB)": 160.86, |
| "step": 2035, |
| "token_acc": 0.9995790891489182, |
| "train_speed(iter/s)": 0.035597 |
| }, |
| { |
| "epoch": 0.7476635514018691, |
| "grad_norm": 0.05396876111626625, |
| "learning_rate": 8.543667634933743e-06, |
| "loss": 0.0004124412313103676, |
| "memory(GiB)": 160.86, |
| "step": 2040, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.035616 |
| }, |
| { |
| "epoch": 0.7494960601062856, |
| "grad_norm": 0.0036719287745654583, |
| "learning_rate": 8.536890831056199e-06, |
| "loss": 0.0014296333305537702, |
| "memory(GiB)": 160.86, |
| "step": 2045, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.035636 |
| }, |
| { |
| "epoch": 0.7513285688107019, |
| "grad_norm": 0.01854000613093376, |
| "learning_rate": 8.530100997610125e-06, |
| "loss": 0.00037872311659157274, |
| "memory(GiB)": 160.86, |
| "step": 2050, |
| "token_acc": 0.9999158886365548, |
| "train_speed(iter/s)": 0.035656 |
| }, |
| { |
| "epoch": 0.7531610775151182, |
| "grad_norm": 0.022685358300805092, |
| "learning_rate": 8.523298159608615e-06, |
| "loss": 0.0005078110843896866, |
| "memory(GiB)": 160.86, |
| "step": 2055, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.035675 |
| }, |
| { |
| "epoch": 0.7549935862195345, |
| "grad_norm": 0.0069847991690039635, |
| "learning_rate": 8.51648234211268e-06, |
| "loss": 0.0006114406045526266, |
| "memory(GiB)": 160.86, |
| "step": 2060, |
| "token_acc": 0.9999158036541214, |
| "train_speed(iter/s)": 0.035694 |
| }, |
| { |
| "epoch": 0.7568260949239509, |
| "grad_norm": 0.005377015098929405, |
| "learning_rate": 8.509653570231139e-06, |
| "loss": 0.000488346815109253, |
| "memory(GiB)": 160.86, |
| "step": 2065, |
| "token_acc": 0.9998316356595673, |
| "train_speed(iter/s)": 0.035714 |
| }, |
| { |
| "epoch": 0.7586586036283672, |
| "grad_norm": 0.13766171038150787, |
| "learning_rate": 8.502811869120537e-06, |
| "loss": 0.0007873100228607654, |
| "memory(GiB)": 160.86, |
| "step": 2070, |
| "token_acc": 0.9997473471450228, |
| "train_speed(iter/s)": 0.035733 |
| }, |
| { |
| "epoch": 0.7604911123327835, |
| "grad_norm": 0.08824609220027924, |
| "learning_rate": 8.495957263985049e-06, |
| "loss": 0.0008373255841434002, |
| "memory(GiB)": 160.86, |
| "step": 2075, |
| "token_acc": 0.9995790537127462, |
| "train_speed(iter/s)": 0.035751 |
| }, |
| { |
| "epoch": 0.7623236210372, |
| "grad_norm": 0.006550587713718414, |
| "learning_rate": 8.489089780076387e-06, |
| "loss": 0.00012923479080200194, |
| "memory(GiB)": 160.86, |
| "step": 2080, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.03577 |
| }, |
| { |
| "epoch": 0.7641561297416163, |
| "grad_norm": 0.06086429953575134, |
| "learning_rate": 8.482209442693706e-06, |
| "loss": 0.002163195610046387, |
| "memory(GiB)": 160.86, |
| "step": 2085, |
| "token_acc": 0.9990743078347218, |
| "train_speed(iter/s)": 0.03579 |
| }, |
| { |
| "epoch": 0.7659886384460326, |
| "grad_norm": 0.045746754854917526, |
| "learning_rate": 8.47531627718351e-06, |
| "loss": 0.00045907222665846347, |
| "memory(GiB)": 160.86, |
| "step": 2090, |
| "token_acc": 0.9998315080033698, |
| "train_speed(iter/s)": 0.035808 |
| }, |
| { |
| "epoch": 0.7678211471504489, |
| "grad_norm": 0.01716403290629387, |
| "learning_rate": 8.46841030893957e-06, |
| "loss": 0.0005397152155637742, |
| "memory(GiB)": 160.86, |
| "step": 2095, |
| "token_acc": 0.9997475597441938, |
| "train_speed(iter/s)": 0.035827 |
| }, |
| { |
| "epoch": 0.7696536558548653, |
| "grad_norm": 0.0022040277253836393, |
| "learning_rate": 8.461491563402807e-06, |
| "loss": 0.0012433138675987721, |
| "memory(GiB)": 160.86, |
| "step": 2100, |
| "token_acc": 0.9997475385003787, |
| "train_speed(iter/s)": 0.035846 |
| }, |
| { |
| "epoch": 0.7714861645592817, |
| "grad_norm": 0.028352022171020508, |
| "learning_rate": 8.454560066061225e-06, |
| "loss": 0.0011054543778300286, |
| "memory(GiB)": 160.86, |
| "step": 2105, |
| "token_acc": 0.9995790891489182, |
| "train_speed(iter/s)": 0.035865 |
| }, |
| { |
| "epoch": 0.773318673263698, |
| "grad_norm": 0.017512010410428047, |
| "learning_rate": 8.447615842449799e-06, |
| "loss": 0.00045901937410235404, |
| "memory(GiB)": 160.86, |
| "step": 2110, |
| "token_acc": 0.9999158249158249, |
| "train_speed(iter/s)": 0.035883 |
| }, |
| { |
| "epoch": 0.7751511819681144, |
| "grad_norm": 0.014501676894724369, |
| "learning_rate": 8.440658918150383e-06, |
| "loss": 0.0004790318664163351, |
| "memory(GiB)": 160.86, |
| "step": 2115, |
| "token_acc": 0.9997476446837147, |
| "train_speed(iter/s)": 0.035901 |
| }, |
| { |
| "epoch": 0.7769836906725307, |
| "grad_norm": 0.06630018353462219, |
| "learning_rate": 8.433689318791628e-06, |
| "loss": 0.0008208448067307472, |
| "memory(GiB)": 160.86, |
| "step": 2120, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.03592 |
| }, |
| { |
| "epoch": 0.778816199376947, |
| "grad_norm": 0.029544832184910774, |
| "learning_rate": 8.426707070048867e-06, |
| "loss": 0.00034202171955257656, |
| "memory(GiB)": 160.86, |
| "step": 2125, |
| "token_acc": 0.9999158036541214, |
| "train_speed(iter/s)": 0.035938 |
| }, |
| { |
| "epoch": 0.7806487080813634, |
| "grad_norm": 0.020295366644859314, |
| "learning_rate": 8.419712197644042e-06, |
| "loss": 0.00047438177280128, |
| "memory(GiB)": 160.86, |
| "step": 2130, |
| "token_acc": 0.9998316356595673, |
| "train_speed(iter/s)": 0.035956 |
| }, |
| { |
| "epoch": 0.7824812167857798, |
| "grad_norm": 0.021269747987389565, |
| "learning_rate": 8.412704727345597e-06, |
| "loss": 0.0006256222724914551, |
| "memory(GiB)": 160.86, |
| "step": 2135, |
| "token_acc": 0.9999158390843292, |
| "train_speed(iter/s)": 0.035974 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 0.035125475376844406, |
| "learning_rate": 8.405684684968383e-06, |
| "loss": 0.0005730021744966507, |
| "memory(GiB)": 160.86, |
| "step": 2140, |
| "token_acc": 0.9998315647633484, |
| "train_speed(iter/s)": 0.035992 |
| }, |
| { |
| "epoch": 0.7861462341946124, |
| "grad_norm": 0.06994622200727463, |
| "learning_rate": 8.398652096373566e-06, |
| "loss": 0.0003744778921827674, |
| "memory(GiB)": 160.86, |
| "step": 2145, |
| "token_acc": 0.9999157894736842, |
| "train_speed(iter/s)": 0.03601 |
| }, |
| { |
| "epoch": 0.7879787428990288, |
| "grad_norm": 0.006813399959355593, |
| "learning_rate": 8.39160698746853e-06, |
| "loss": 0.0007882724516093731, |
| "memory(GiB)": 160.86, |
| "step": 2150, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.036027 |
| }, |
| { |
| "epoch": 0.7898112516034451, |
| "grad_norm": 0.20248223841190338, |
| "learning_rate": 8.38454938420679e-06, |
| "loss": 0.00029504401609301565, |
| "memory(GiB)": 160.86, |
| "step": 2155, |
| "token_acc": 0.9999157823816742, |
| "train_speed(iter/s)": 0.036045 |
| }, |
| { |
| "epoch": 0.7916437603078614, |
| "grad_norm": 0.10259495675563812, |
| "learning_rate": 8.37747931258788e-06, |
| "loss": 0.0013766267336905002, |
| "memory(GiB)": 160.86, |
| "step": 2160, |
| "token_acc": 0.9995792662403231, |
| "train_speed(iter/s)": 0.036063 |
| }, |
| { |
| "epoch": 0.7934762690122779, |
| "grad_norm": 0.022682547569274902, |
| "learning_rate": 8.370396798657269e-06, |
| "loss": 0.0003458364633843303, |
| "memory(GiB)": 160.86, |
| "step": 2165, |
| "token_acc": 0.9999158532480646, |
| "train_speed(iter/s)": 0.036081 |
| }, |
| { |
| "epoch": 0.7953087777166942, |
| "grad_norm": 0.05654159560799599, |
| "learning_rate": 8.363301868506264e-06, |
| "loss": 0.0008417519740760327, |
| "memory(GiB)": 160.86, |
| "step": 2170, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.036099 |
| }, |
| { |
| "epoch": 0.7971412864211105, |
| "grad_norm": 0.010379817336797714, |
| "learning_rate": 8.35619454827191e-06, |
| "loss": 0.00014047393342480062, |
| "memory(GiB)": 160.86, |
| "step": 2175, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036117 |
| }, |
| { |
| "epoch": 0.7989737951255268, |
| "grad_norm": 0.002908756723627448, |
| "learning_rate": 8.349074864136897e-06, |
| "loss": 0.0010122337378561496, |
| "memory(GiB)": 160.86, |
| "step": 2180, |
| "token_acc": 0.9995790891489182, |
| "train_speed(iter/s)": 0.036134 |
| }, |
| { |
| "epoch": 0.8008063038299432, |
| "grad_norm": 0.015968699008226395, |
| "learning_rate": 8.341942842329465e-06, |
| "loss": 0.0010151905938982964, |
| "memory(GiB)": 160.86, |
| "step": 2185, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.036152 |
| }, |
| { |
| "epoch": 0.8026388125343595, |
| "grad_norm": 0.02950908988714218, |
| "learning_rate": 8.3347985091233e-06, |
| "loss": 0.0006167484447360039, |
| "memory(GiB)": 160.86, |
| "step": 2190, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.036169 |
| }, |
| { |
| "epoch": 0.8044713212387758, |
| "grad_norm": 0.004527771379798651, |
| "learning_rate": 8.327641890837443e-06, |
| "loss": 0.0001240343088284135, |
| "memory(GiB)": 160.86, |
| "step": 2195, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036187 |
| }, |
| { |
| "epoch": 0.8063038299431923, |
| "grad_norm": 0.09493066370487213, |
| "learning_rate": 8.320473013836197e-06, |
| "loss": 0.0003447512863203883, |
| "memory(GiB)": 160.86, |
| "step": 2200, |
| "token_acc": 0.9999158603281447, |
| "train_speed(iter/s)": 0.036205 |
| }, |
| { |
| "epoch": 0.8081363386476086, |
| "grad_norm": 0.016084903851151466, |
| "learning_rate": 8.313291904529018e-06, |
| "loss": 0.0009649941697716713, |
| "memory(GiB)": 160.86, |
| "step": 2205, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.036222 |
| }, |
| { |
| "epoch": 0.8099688473520249, |
| "grad_norm": 0.05419844388961792, |
| "learning_rate": 8.306098589370427e-06, |
| "loss": 0.0005068023223429918, |
| "memory(GiB)": 160.86, |
| "step": 2210, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.036239 |
| }, |
| { |
| "epoch": 0.8118013560564412, |
| "grad_norm": 0.12476948648691177, |
| "learning_rate": 8.298893094859916e-06, |
| "loss": 0.0009864597581326962, |
| "memory(GiB)": 160.86, |
| "step": 2215, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.036257 |
| }, |
| { |
| "epoch": 0.8136338647608576, |
| "grad_norm": 0.06563253700733185, |
| "learning_rate": 8.291675447541834e-06, |
| "loss": 0.000346578611060977, |
| "memory(GiB)": 160.86, |
| "step": 2220, |
| "token_acc": 0.999831734814067, |
| "train_speed(iter/s)": 0.036274 |
| }, |
| { |
| "epoch": 0.815466373465274, |
| "grad_norm": 0.0007064275559969246, |
| "learning_rate": 8.28444567400531e-06, |
| "loss": 0.0002860090462490916, |
| "memory(GiB)": 160.86, |
| "step": 2225, |
| "token_acc": 0.9998316073082428, |
| "train_speed(iter/s)": 0.03629 |
| }, |
| { |
| "epoch": 0.8172988821696903, |
| "grad_norm": 0.06441126018762589, |
| "learning_rate": 8.277203800884137e-06, |
| "loss": 0.0004928476177155971, |
| "memory(GiB)": 160.86, |
| "step": 2230, |
| "token_acc": 0.9999158036541214, |
| "train_speed(iter/s)": 0.036307 |
| }, |
| { |
| "epoch": 0.8191313908741067, |
| "grad_norm": 0.07549826800823212, |
| "learning_rate": 8.269949854856687e-06, |
| "loss": 0.0014977409504354, |
| "memory(GiB)": 160.86, |
| "step": 2235, |
| "token_acc": 0.9997476022211005, |
| "train_speed(iter/s)": 0.036324 |
| }, |
| { |
| "epoch": 0.820963899578523, |
| "grad_norm": 0.02339329943060875, |
| "learning_rate": 8.262683862645804e-06, |
| "loss": 0.00037619960494339466, |
| "memory(GiB)": 160.86, |
| "step": 2240, |
| "token_acc": 0.9998315221969506, |
| "train_speed(iter/s)": 0.036341 |
| }, |
| { |
| "epoch": 0.8227964082829393, |
| "grad_norm": 0.013340925797820091, |
| "learning_rate": 8.255405851018713e-06, |
| "loss": 0.0004039745777845383, |
| "memory(GiB)": 160.86, |
| "step": 2245, |
| "token_acc": 0.9999158249158249, |
| "train_speed(iter/s)": 0.036358 |
| }, |
| { |
| "epoch": 0.8246289169873557, |
| "grad_norm": 0.1738908737897873, |
| "learning_rate": 8.24811584678691e-06, |
| "loss": 0.0009243869222700596, |
| "memory(GiB)": 160.86, |
| "step": 2250, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.036375 |
| }, |
| { |
| "epoch": 0.8264614256917721, |
| "grad_norm": 0.1292845755815506, |
| "learning_rate": 8.24081387680608e-06, |
| "loss": 0.0004229114390909672, |
| "memory(GiB)": 160.86, |
| "step": 2255, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.036392 |
| }, |
| { |
| "epoch": 0.8282939343961884, |
| "grad_norm": 0.03298277407884598, |
| "learning_rate": 8.233499967975981e-06, |
| "loss": 0.0003614515298977494, |
| "memory(GiB)": 160.86, |
| "step": 2260, |
| "token_acc": 0.9999158674070335, |
| "train_speed(iter/s)": 0.036406 |
| }, |
| { |
| "epoch": 0.8301264431006047, |
| "grad_norm": 0.0037736741360276937, |
| "learning_rate": 8.226174147240359e-06, |
| "loss": 0.0006478279829025269, |
| "memory(GiB)": 160.86, |
| "step": 2265, |
| "token_acc": 0.9998315363881402, |
| "train_speed(iter/s)": 0.036422 |
| }, |
| { |
| "epoch": 0.8319589518050211, |
| "grad_norm": 0.010557832196354866, |
| "learning_rate": 8.218836441586834e-06, |
| "loss": 0.0005696366541087627, |
| "memory(GiB)": 160.86, |
| "step": 2270, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.036439 |
| }, |
| { |
| "epoch": 0.8337914605094374, |
| "grad_norm": 0.003406501142308116, |
| "learning_rate": 8.211486878046819e-06, |
| "loss": 0.0006424786522984504, |
| "memory(GiB)": 160.86, |
| "step": 2275, |
| "token_acc": 0.9998316498316498, |
| "train_speed(iter/s)": 0.036454 |
| }, |
| { |
| "epoch": 0.8356239692138537, |
| "grad_norm": 0.0992351546883583, |
| "learning_rate": 8.204125483695403e-06, |
| "loss": 0.0005788296461105346, |
| "memory(GiB)": 160.86, |
| "step": 2280, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.036471 |
| }, |
| { |
| "epoch": 0.8374564779182702, |
| "grad_norm": 0.010372207500040531, |
| "learning_rate": 8.196752285651261e-06, |
| "loss": 0.00029938730876892804, |
| "memory(GiB)": 160.86, |
| "step": 2285, |
| "token_acc": 0.9999157469036987, |
| "train_speed(iter/s)": 0.036487 |
| }, |
| { |
| "epoch": 0.8392889866226865, |
| "grad_norm": 0.0683954581618309, |
| "learning_rate": 8.189367311076551e-06, |
| "loss": 0.0007511110045015812, |
| "memory(GiB)": 160.86, |
| "step": 2290, |
| "token_acc": 0.9998317489694625, |
| "train_speed(iter/s)": 0.036504 |
| }, |
| { |
| "epoch": 0.8411214953271028, |
| "grad_norm": 0.006293443962931633, |
| "learning_rate": 8.181970587176814e-06, |
| "loss": 0.0003692630911245942, |
| "memory(GiB)": 160.86, |
| "step": 2295, |
| "token_acc": 0.9997475809844342, |
| "train_speed(iter/s)": 0.03652 |
| }, |
| { |
| "epoch": 0.8429540040315191, |
| "grad_norm": 0.006763943005353212, |
| "learning_rate": 8.174562141200878e-06, |
| "loss": 0.0002094252035021782, |
| "memory(GiB)": 160.86, |
| "step": 2300, |
| "token_acc": 0.9999158036541214, |
| "train_speed(iter/s)": 0.036535 |
| }, |
| { |
| "epoch": 0.8447865127359355, |
| "grad_norm": 0.04695817828178406, |
| "learning_rate": 8.167142000440749e-06, |
| "loss": 0.0005172740202397108, |
| "memory(GiB)": 160.86, |
| "step": 2305, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.036551 |
| }, |
| { |
| "epoch": 0.8466190214403518, |
| "grad_norm": 0.026909319683909416, |
| "learning_rate": 8.15971019223152e-06, |
| "loss": 0.00024677792098373177, |
| "memory(GiB)": 160.86, |
| "step": 2310, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036567 |
| }, |
| { |
| "epoch": 0.8484515301447682, |
| "grad_norm": 0.0009972673142328858, |
| "learning_rate": 8.152266743951264e-06, |
| "loss": 0.00048431595787405967, |
| "memory(GiB)": 160.86, |
| "step": 2315, |
| "token_acc": 0.9999157752884696, |
| "train_speed(iter/s)": 0.036583 |
| }, |
| { |
| "epoch": 0.8502840388491846, |
| "grad_norm": 0.1550913155078888, |
| "learning_rate": 8.144811683020932e-06, |
| "loss": 0.00014740382321178913, |
| "memory(GiB)": 160.86, |
| "step": 2320, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.036599 |
| }, |
| { |
| "epoch": 0.8521165475536009, |
| "grad_norm": 0.04358501732349396, |
| "learning_rate": 8.13734503690426e-06, |
| "loss": 0.0010699840262532235, |
| "memory(GiB)": 160.86, |
| "step": 2325, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.036612 |
| }, |
| { |
| "epoch": 0.8539490562580172, |
| "grad_norm": 0.002750721760094166, |
| "learning_rate": 8.12986683310766e-06, |
| "loss": 0.0002569463336840272, |
| "memory(GiB)": 160.86, |
| "step": 2330, |
| "token_acc": 0.9999158390843292, |
| "train_speed(iter/s)": 0.036627 |
| }, |
| { |
| "epoch": 0.8557815649624335, |
| "grad_norm": 0.010151500813663006, |
| "learning_rate": 8.12237709918012e-06, |
| "loss": 0.00014050663448870183, |
| "memory(GiB)": 160.86, |
| "step": 2335, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036644 |
| }, |
| { |
| "epoch": 0.8576140736668499, |
| "grad_norm": 0.004389213863760233, |
| "learning_rate": 8.114875862713107e-06, |
| "loss": 5.258661694824695e-05, |
| "memory(GiB)": 160.86, |
| "step": 2340, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036659 |
| }, |
| { |
| "epoch": 0.8594465823712663, |
| "grad_norm": 0.004478363320231438, |
| "learning_rate": 8.10736315134046e-06, |
| "loss": 0.0017528504133224488, |
| "memory(GiB)": 160.86, |
| "step": 2345, |
| "token_acc": 0.9996633846671716, |
| "train_speed(iter/s)": 0.036675 |
| }, |
| { |
| "epoch": 0.8612790910756826, |
| "grad_norm": 0.004733589943498373, |
| "learning_rate": 8.099838992738292e-06, |
| "loss": 0.0013998121954500674, |
| "memory(GiB)": 160.86, |
| "step": 2350, |
| "token_acc": 0.9994953318193288, |
| "train_speed(iter/s)": 0.03669 |
| }, |
| { |
| "epoch": 0.863111599780099, |
| "grad_norm": 0.00977323018014431, |
| "learning_rate": 8.092303414624884e-06, |
| "loss": 0.00046326019801199434, |
| "memory(GiB)": 160.86, |
| "step": 2355, |
| "token_acc": 0.9998316781686585, |
| "train_speed(iter/s)": 0.036705 |
| }, |
| { |
| "epoch": 0.8649441084845153, |
| "grad_norm": 0.04947784170508385, |
| "learning_rate": 8.08475644476059e-06, |
| "loss": 0.0001862859120592475, |
| "memory(GiB)": 160.86, |
| "step": 2360, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036721 |
| }, |
| { |
| "epoch": 0.8667766171889316, |
| "grad_norm": 0.21693383157253265, |
| "learning_rate": 8.077198110947725e-06, |
| "loss": 0.0009612908586859703, |
| "memory(GiB)": 160.86, |
| "step": 2365, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.036737 |
| }, |
| { |
| "epoch": 0.868609125893348, |
| "grad_norm": 0.023295719176530838, |
| "learning_rate": 8.069628441030472e-06, |
| "loss": 0.0004069589078426361, |
| "memory(GiB)": 160.86, |
| "step": 2370, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.036697 |
| }, |
| { |
| "epoch": 0.8704416345977644, |
| "grad_norm": 0.06745916604995728, |
| "learning_rate": 8.062047462894771e-06, |
| "loss": 0.0006006782408803701, |
| "memory(GiB)": 160.86, |
| "step": 2375, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.036712 |
| }, |
| { |
| "epoch": 0.8722741433021807, |
| "grad_norm": 0.05341252312064171, |
| "learning_rate": 8.054455204468225e-06, |
| "loss": 0.000835646316409111, |
| "memory(GiB)": 160.86, |
| "step": 2380, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.036728 |
| }, |
| { |
| "epoch": 0.874106652006597, |
| "grad_norm": 0.01815791241824627, |
| "learning_rate": 8.046851693719986e-06, |
| "loss": 0.00021557288710027933, |
| "memory(GiB)": 160.86, |
| "step": 2385, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036743 |
| }, |
| { |
| "epoch": 0.8759391607110134, |
| "grad_norm": 0.0018982563633471727, |
| "learning_rate": 8.039236958660666e-06, |
| "loss": 0.00010541609954088927, |
| "memory(GiB)": 160.86, |
| "step": 2390, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036759 |
| }, |
| { |
| "epoch": 0.8777716694154297, |
| "grad_norm": 0.0008025880670174956, |
| "learning_rate": 8.031611027342221e-06, |
| "loss": 0.00029539645183831455, |
| "memory(GiB)": 160.86, |
| "step": 2395, |
| "token_acc": 0.9998317631224765, |
| "train_speed(iter/s)": 0.036774 |
| }, |
| { |
| "epoch": 0.879604178119846, |
| "grad_norm": 0.02493736520409584, |
| "learning_rate": 8.023973927857857e-06, |
| "loss": 0.0010729983448982238, |
| "memory(GiB)": 160.86, |
| "step": 2400, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.036789 |
| }, |
| { |
| "epoch": 0.8814366868242625, |
| "grad_norm": 0.23594622313976288, |
| "learning_rate": 8.016325688341919e-06, |
| "loss": 0.0005186852067708969, |
| "memory(GiB)": 160.86, |
| "step": 2405, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.036805 |
| }, |
| { |
| "epoch": 0.8832691955286788, |
| "grad_norm": 0.014162681996822357, |
| "learning_rate": 8.00866633696979e-06, |
| "loss": 0.00019059464102610946, |
| "memory(GiB)": 160.86, |
| "step": 2410, |
| "token_acc": 0.9999158390843292, |
| "train_speed(iter/s)": 0.03682 |
| }, |
| { |
| "epoch": 0.8851017042330951, |
| "grad_norm": 0.04650455340743065, |
| "learning_rate": 8.000995901957792e-06, |
| "loss": 0.0004015204031020403, |
| "memory(GiB)": 160.86, |
| "step": 2415, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.036835 |
| }, |
| { |
| "epoch": 0.8869342129375114, |
| "grad_norm": 0.04503090679645538, |
| "learning_rate": 7.993314411563075e-06, |
| "loss": 0.0006881221663206816, |
| "memory(GiB)": 160.86, |
| "step": 2420, |
| "token_acc": 0.9997475172529877, |
| "train_speed(iter/s)": 0.03685 |
| }, |
| { |
| "epoch": 0.8887667216419278, |
| "grad_norm": 0.008592194877564907, |
| "learning_rate": 7.98562189408352e-06, |
| "loss": 0.0002544657327234745, |
| "memory(GiB)": 160.86, |
| "step": 2425, |
| "token_acc": 0.9999158107425492, |
| "train_speed(iter/s)": 0.036865 |
| }, |
| { |
| "epoch": 0.8905992303463441, |
| "grad_norm": 0.04029720276594162, |
| "learning_rate": 7.977918377857625e-06, |
| "loss": 0.0004797634668648243, |
| "memory(GiB)": 160.86, |
| "step": 2430, |
| "token_acc": 0.9998316498316498, |
| "train_speed(iter/s)": 0.03688 |
| }, |
| { |
| "epoch": 0.8924317390507605, |
| "grad_norm": 0.012428953312337399, |
| "learning_rate": 7.970203891264408e-06, |
| "loss": 0.00046463338658213614, |
| "memory(GiB)": 160.86, |
| "step": 2435, |
| "token_acc": 0.9998317631224765, |
| "train_speed(iter/s)": 0.036895 |
| }, |
| { |
| "epoch": 0.8942642477551769, |
| "grad_norm": 0.1128624677658081, |
| "learning_rate": 7.962478462723306e-06, |
| "loss": 0.000577373243868351, |
| "memory(GiB)": 160.86, |
| "step": 2440, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.03691 |
| }, |
| { |
| "epoch": 0.8960967564595932, |
| "grad_norm": 0.005943561438471079, |
| "learning_rate": 7.954742120694059e-06, |
| "loss": 0.0005296251736581325, |
| "memory(GiB)": 160.86, |
| "step": 2445, |
| "token_acc": 0.9998317489694625, |
| "train_speed(iter/s)": 0.036925 |
| }, |
| { |
| "epoch": 0.8979292651640095, |
| "grad_norm": 0.014219972304999828, |
| "learning_rate": 7.946994893676611e-06, |
| "loss": 5.174783291295171e-05, |
| "memory(GiB)": 160.86, |
| "step": 2450, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.03694 |
| }, |
| { |
| "epoch": 0.8997617738684259, |
| "grad_norm": 0.01472583319991827, |
| "learning_rate": 7.93923681021101e-06, |
| "loss": 0.0009220579639077186, |
| "memory(GiB)": 160.86, |
| "step": 2455, |
| "token_acc": 0.9996634413125789, |
| "train_speed(iter/s)": 0.036954 |
| }, |
| { |
| "epoch": 0.9015942825728422, |
| "grad_norm": 0.0020888156723231077, |
| "learning_rate": 7.931467898877298e-06, |
| "loss": 0.0004309060052037239, |
| "memory(GiB)": 160.86, |
| "step": 2460, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.036969 |
| }, |
| { |
| "epoch": 0.9034267912772586, |
| "grad_norm": 0.054128147661685944, |
| "learning_rate": 7.9236881882954e-06, |
| "loss": 0.00036832981277257204, |
| "memory(GiB)": 160.86, |
| "step": 2465, |
| "token_acc": 0.9999157752884696, |
| "train_speed(iter/s)": 0.036983 |
| }, |
| { |
| "epoch": 0.9052592999816749, |
| "grad_norm": 0.009187346324324608, |
| "learning_rate": 7.915897707125027e-06, |
| "loss": 0.0009874864481389523, |
| "memory(GiB)": 160.86, |
| "step": 2470, |
| "token_acc": 0.9996633280026934, |
| "train_speed(iter/s)": 0.036998 |
| }, |
| { |
| "epoch": 0.9070918086860913, |
| "grad_norm": 0.015212767757475376, |
| "learning_rate": 7.908096484065569e-06, |
| "loss": 0.00035822123754769564, |
| "memory(GiB)": 160.86, |
| "step": 2475, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.037012 |
| }, |
| { |
| "epoch": 0.9089243173905076, |
| "grad_norm": 0.028434082865715027, |
| "learning_rate": 7.900284547855992e-06, |
| "loss": 0.00033626847434788945, |
| "memory(GiB)": 160.86, |
| "step": 2480, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.037027 |
| }, |
| { |
| "epoch": 0.9107568260949239, |
| "grad_norm": 0.003858706448227167, |
| "learning_rate": 7.892461927274719e-06, |
| "loss": 0.00038427968975156545, |
| "memory(GiB)": 160.86, |
| "step": 2485, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.037041 |
| }, |
| { |
| "epoch": 0.9125893347993403, |
| "grad_norm": 0.028237823396921158, |
| "learning_rate": 7.884628651139543e-06, |
| "loss": 0.0008647294715046882, |
| "memory(GiB)": 160.86, |
| "step": 2490, |
| "token_acc": 0.9995789119083712, |
| "train_speed(iter/s)": 0.037056 |
| }, |
| { |
| "epoch": 0.9144218435037567, |
| "grad_norm": 0.014561748132109642, |
| "learning_rate": 7.876784748307502e-06, |
| "loss": 8.994525414891541e-05, |
| "memory(GiB)": 160.86, |
| "step": 2495, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.03707 |
| }, |
| { |
| "epoch": 0.916254352208173, |
| "grad_norm": 0.011074830777943134, |
| "learning_rate": 7.868930247674787e-06, |
| "loss": 0.0002087874570861459, |
| "memory(GiB)": 160.86, |
| "step": 2500, |
| "token_acc": 0.9999158107425492, |
| "train_speed(iter/s)": 0.037084 |
| }, |
| { |
| "epoch": 0.916254352208173, |
| "eval_loss": 0.0007594987982884049, |
| "eval_runtime": 172.1874, |
| "eval_samples_per_second": 2.555, |
| "eval_steps_per_second": 2.555, |
| "eval_token_acc": 0.9997704421284606, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9180868609125893, |
| "grad_norm": 0.04182349890470505, |
| "learning_rate": 7.86106517817663e-06, |
| "loss": 0.00022406417410820724, |
| "memory(GiB)": 160.86, |
| "step": 2505, |
| "token_acc": 0.9997928078422231, |
| "train_speed(iter/s)": 0.036773 |
| }, |
| { |
| "epoch": 0.9199193696170057, |
| "grad_norm": 0.010813858360052109, |
| "learning_rate": 7.8531895687872e-06, |
| "loss": 0.0001518705626949668, |
| "memory(GiB)": 160.86, |
| "step": 2510, |
| "token_acc": 0.9999158603281447, |
| "train_speed(iter/s)": 0.036788 |
| }, |
| { |
| "epoch": 0.921751878321422, |
| "grad_norm": 4.607898881658912e-05, |
| "learning_rate": 7.845303448519486e-06, |
| "loss": 0.0005594564136117697, |
| "memory(GiB)": 160.86, |
| "step": 2515, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.036802 |
| }, |
| { |
| "epoch": 0.9235843870258383, |
| "grad_norm": 0.059696584939956665, |
| "learning_rate": 7.837406846425205e-06, |
| "loss": 0.0005560083314776421, |
| "memory(GiB)": 160.86, |
| "step": 2520, |
| "token_acc": 0.9994947368421052, |
| "train_speed(iter/s)": 0.036817 |
| }, |
| { |
| "epoch": 0.9254168957302548, |
| "grad_norm": 0.1952117681503296, |
| "learning_rate": 7.829499791594684e-06, |
| "loss": 0.0007309889886528253, |
| "memory(GiB)": 160.86, |
| "step": 2525, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.036831 |
| }, |
| { |
| "epoch": 0.9272494044346711, |
| "grad_norm": 0.005678711924701929, |
| "learning_rate": 7.821582313156763e-06, |
| "loss": 0.00012894930550828577, |
| "memory(GiB)": 160.86, |
| "step": 2530, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036845 |
| }, |
| { |
| "epoch": 0.9290819131390874, |
| "grad_norm": 0.0016558946808800101, |
| "learning_rate": 7.813654440278677e-06, |
| "loss": 0.0004136775154620409, |
| "memory(GiB)": 160.86, |
| "step": 2535, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.036859 |
| }, |
| { |
| "epoch": 0.9309144218435037, |
| "grad_norm": 0.0007809648523107171, |
| "learning_rate": 7.805716202165949e-06, |
| "loss": 4.669466288760304e-05, |
| "memory(GiB)": 160.86, |
| "step": 2540, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036873 |
| }, |
| { |
| "epoch": 0.9327469305479201, |
| "grad_norm": 0.0005511490162461996, |
| "learning_rate": 7.797767628062296e-06, |
| "loss": 2.539183187764138e-05, |
| "memory(GiB)": 160.86, |
| "step": 2545, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036887 |
| }, |
| { |
| "epoch": 0.9345794392523364, |
| "grad_norm": 0.008907792158424854, |
| "learning_rate": 7.789808747249505e-06, |
| "loss": 8.047035662457347e-05, |
| "memory(GiB)": 160.86, |
| "step": 2550, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036901 |
| }, |
| { |
| "epoch": 0.9364119479567528, |
| "grad_norm": 0.16766001284122467, |
| "learning_rate": 7.781839589047336e-06, |
| "loss": 0.001341984234750271, |
| "memory(GiB)": 160.86, |
| "step": 2555, |
| "token_acc": 0.9997474960020201, |
| "train_speed(iter/s)": 0.036915 |
| }, |
| { |
| "epoch": 0.9382444566611692, |
| "grad_norm": 0.0007593165501020849, |
| "learning_rate": 7.773860182813404e-06, |
| "loss": 6.514263805001974e-05, |
| "memory(GiB)": 160.86, |
| "step": 2560, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.036929 |
| }, |
| { |
| "epoch": 0.9400769653655855, |
| "grad_norm": 0.02255651168525219, |
| "learning_rate": 7.765870557943083e-06, |
| "loss": 0.0009576915763318539, |
| "memory(GiB)": 160.86, |
| "step": 2565, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.036943 |
| }, |
| { |
| "epoch": 0.9419094740700018, |
| "grad_norm": 0.04713983088731766, |
| "learning_rate": 7.75787074386939e-06, |
| "loss": 0.0006936299148947, |
| "memory(GiB)": 160.86, |
| "step": 2570, |
| "token_acc": 0.9997474322276477, |
| "train_speed(iter/s)": 0.036957 |
| }, |
| { |
| "epoch": 0.9437419827744182, |
| "grad_norm": 0.038788143545389175, |
| "learning_rate": 7.749860770062874e-06, |
| "loss": 0.0007801173254847526, |
| "memory(GiB)": 160.86, |
| "step": 2575, |
| "token_acc": 0.9998316073082428, |
| "train_speed(iter/s)": 0.036971 |
| }, |
| { |
| "epoch": 0.9455744914788345, |
| "grad_norm": 0.026828216388821602, |
| "learning_rate": 7.741840666031517e-06, |
| "loss": 0.0009264941327273846, |
| "memory(GiB)": 160.86, |
| "step": 2580, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.036984 |
| }, |
| { |
| "epoch": 0.9474070001832509, |
| "grad_norm": 0.03660447522997856, |
| "learning_rate": 7.733810461320619e-06, |
| "loss": 0.0004160061478614807, |
| "memory(GiB)": 160.86, |
| "step": 2585, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.036998 |
| }, |
| { |
| "epoch": 0.9492395088876672, |
| "grad_norm": 0.004005759488791227, |
| "learning_rate": 7.725770185512685e-06, |
| "loss": 0.00036098186392337085, |
| "memory(GiB)": 160.86, |
| "step": 2590, |
| "token_acc": 0.9999157752884696, |
| "train_speed(iter/s)": 0.037012 |
| }, |
| { |
| "epoch": 0.9510720175920836, |
| "grad_norm": 0.0006123992498032749, |
| "learning_rate": 7.717719868227327e-06, |
| "loss": 0.0003307197941467166, |
| "memory(GiB)": 160.86, |
| "step": 2595, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.037025 |
| }, |
| { |
| "epoch": 0.9529045262964999, |
| "grad_norm": 0.029207419604063034, |
| "learning_rate": 7.709659539121144e-06, |
| "loss": 7.62599753215909e-05, |
| "memory(GiB)": 160.86, |
| "step": 2600, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037039 |
| }, |
| { |
| "epoch": 0.9547370350009162, |
| "grad_norm": 0.03443612530827522, |
| "learning_rate": 7.70158922788762e-06, |
| "loss": 0.00035016366746276617, |
| "memory(GiB)": 160.86, |
| "step": 2605, |
| "token_acc": 0.9999158461667929, |
| "train_speed(iter/s)": 0.037052 |
| }, |
| { |
| "epoch": 0.9565695437053326, |
| "grad_norm": 0.020582979544997215, |
| "learning_rate": 7.693508964257015e-06, |
| "loss": 0.0006867663934826851, |
| "memory(GiB)": 160.86, |
| "step": 2610, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.037066 |
| }, |
| { |
| "epoch": 0.958402052409749, |
| "grad_norm": 0.010320069268345833, |
| "learning_rate": 7.685418777996245e-06, |
| "loss": 0.0002992436056956649, |
| "memory(GiB)": 160.86, |
| "step": 2615, |
| "token_acc": 0.9998317064961293, |
| "train_speed(iter/s)": 0.037079 |
| }, |
| { |
| "epoch": 0.9602345611141653, |
| "grad_norm": 0.06350167840719223, |
| "learning_rate": 7.677318698908788e-06, |
| "loss": 0.0014985553920269013, |
| "memory(GiB)": 160.86, |
| "step": 2620, |
| "token_acc": 0.9995792662403231, |
| "train_speed(iter/s)": 0.037092 |
| }, |
| { |
| "epoch": 0.9620670698185816, |
| "grad_norm": 0.0018099630251526833, |
| "learning_rate": 7.669208756834563e-06, |
| "loss": 0.0006455457769334316, |
| "memory(GiB)": 160.86, |
| "step": 2625, |
| "token_acc": 0.9997474109623642, |
| "train_speed(iter/s)": 0.037106 |
| }, |
| { |
| "epoch": 0.963899578522998, |
| "grad_norm": 0.02232094667851925, |
| "learning_rate": 7.66108898164982e-06, |
| "loss": 0.0005441450979560613, |
| "memory(GiB)": 160.86, |
| "step": 2630, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.037119 |
| }, |
| { |
| "epoch": 0.9657320872274143, |
| "grad_norm": 0.08803337812423706, |
| "learning_rate": 7.65295940326704e-06, |
| "loss": 0.00035574983339756725, |
| "memory(GiB)": 160.86, |
| "step": 2635, |
| "token_acc": 0.9998316498316498, |
| "train_speed(iter/s)": 0.037132 |
| }, |
| { |
| "epoch": 0.9675645959318306, |
| "grad_norm": 0.003819872625172138, |
| "learning_rate": 7.644820051634813e-06, |
| "loss": 0.0005564328283071518, |
| "memory(GiB)": 160.86, |
| "step": 2640, |
| "token_acc": 0.9998315363881402, |
| "train_speed(iter/s)": 0.037146 |
| }, |
| { |
| "epoch": 0.9693971046362471, |
| "grad_norm": 0.012264705263078213, |
| "learning_rate": 7.636670956737735e-06, |
| "loss": 0.0008389626629650593, |
| "memory(GiB)": 160.86, |
| "step": 2645, |
| "token_acc": 0.9995793016407236, |
| "train_speed(iter/s)": 0.037159 |
| }, |
| { |
| "epoch": 0.9712296133406634, |
| "grad_norm": 0.012444542720913887, |
| "learning_rate": 7.628512148596292e-06, |
| "loss": 0.0002988637425005436, |
| "memory(GiB)": 160.86, |
| "step": 2650, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.037172 |
| }, |
| { |
| "epoch": 0.9730621220450797, |
| "grad_norm": 0.04613952711224556, |
| "learning_rate": 7.620343657266758e-06, |
| "loss": 0.0006712310016155243, |
| "memory(GiB)": 160.86, |
| "step": 2655, |
| "token_acc": 0.9997473896934995, |
| "train_speed(iter/s)": 0.037185 |
| }, |
| { |
| "epoch": 0.974894630749496, |
| "grad_norm": 0.009678124450147152, |
| "learning_rate": 7.612165512841076e-06, |
| "loss": 0.0002654188079759479, |
| "memory(GiB)": 160.86, |
| "step": 2660, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.037198 |
| }, |
| { |
| "epoch": 0.9767271394539124, |
| "grad_norm": 0.10645924508571625, |
| "learning_rate": 7.603977745446749e-06, |
| "loss": 0.0006820098031312227, |
| "memory(GiB)": 160.86, |
| "step": 2665, |
| "token_acc": 0.999578947368421, |
| "train_speed(iter/s)": 0.037212 |
| }, |
| { |
| "epoch": 0.9785596481583287, |
| "grad_norm": 0.052510544657707214, |
| "learning_rate": 7.595780385246729e-06, |
| "loss": 0.000298806675709784, |
| "memory(GiB)": 160.86, |
| "step": 2670, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.037225 |
| }, |
| { |
| "epoch": 0.9803921568627451, |
| "grad_norm": 0.010894379578530788, |
| "learning_rate": 7.587573462439315e-06, |
| "loss": 0.0006402578670531512, |
| "memory(GiB)": 160.86, |
| "step": 2675, |
| "token_acc": 0.9996632996632997, |
| "train_speed(iter/s)": 0.037237 |
| }, |
| { |
| "epoch": 0.9822246655671615, |
| "grad_norm": 0.04109283536672592, |
| "learning_rate": 7.579357007258022e-06, |
| "loss": 0.0008437959477305412, |
| "memory(GiB)": 160.86, |
| "step": 2680, |
| "token_acc": 0.9997474534893509, |
| "train_speed(iter/s)": 0.03725 |
| }, |
| { |
| "epoch": 0.9840571742715778, |
| "grad_norm": 0.005569992121309042, |
| "learning_rate": 7.571131049971492e-06, |
| "loss": 0.00014509292086586356, |
| "memory(GiB)": 160.86, |
| "step": 2685, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037263 |
| }, |
| { |
| "epoch": 0.9858896829759941, |
| "grad_norm": 0.03271030634641647, |
| "learning_rate": 7.562895620883364e-06, |
| "loss": 0.0003884633770212531, |
| "memory(GiB)": 160.86, |
| "step": 2690, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.037276 |
| }, |
| { |
| "epoch": 0.9877221916804105, |
| "grad_norm": 0.01711997203528881, |
| "learning_rate": 7.554650750332175e-06, |
| "loss": 0.0009255507960915565, |
| "memory(GiB)": 160.86, |
| "step": 2695, |
| "token_acc": 0.9998315647633484, |
| "train_speed(iter/s)": 0.037289 |
| }, |
| { |
| "epoch": 0.9895547003848268, |
| "grad_norm": 0.02630673162639141, |
| "learning_rate": 7.546396468691241e-06, |
| "loss": 0.0005463588051497937, |
| "memory(GiB)": 160.86, |
| "step": 2700, |
| "token_acc": 0.9998316214850985, |
| "train_speed(iter/s)": 0.037302 |
| }, |
| { |
| "epoch": 0.9913872090892432, |
| "grad_norm": 0.005354244727641344, |
| "learning_rate": 7.53813280636855e-06, |
| "loss": 0.000519955437630415, |
| "memory(GiB)": 160.86, |
| "step": 2705, |
| "token_acc": 0.9999157823816742, |
| "train_speed(iter/s)": 0.037314 |
| }, |
| { |
| "epoch": 0.9932197177936595, |
| "grad_norm": 0.028666380792856216, |
| "learning_rate": 7.5298597938066446e-06, |
| "loss": 0.0007598635274916887, |
| "memory(GiB)": 160.86, |
| "step": 2710, |
| "token_acc": 0.9997474747474747, |
| "train_speed(iter/s)": 0.037327 |
| }, |
| { |
| "epoch": 0.9950522264980759, |
| "grad_norm": 0.027820078656077385, |
| "learning_rate": 7.5215774614825144e-06, |
| "loss": 0.00038032070733606815, |
| "memory(GiB)": 160.86, |
| "step": 2715, |
| "token_acc": 0.9998315931289997, |
| "train_speed(iter/s)": 0.037339 |
| }, |
| { |
| "epoch": 0.9968847352024922, |
| "grad_norm": 0.03211966156959534, |
| "learning_rate": 7.51328583990748e-06, |
| "loss": 0.0006773354019969702, |
| "memory(GiB)": 160.86, |
| "step": 2720, |
| "token_acc": 0.9996630443939011, |
| "train_speed(iter/s)": 0.037349 |
| }, |
| { |
| "epoch": 0.9987172439069085, |
| "grad_norm": 0.008736282587051392, |
| "learning_rate": 7.504984959627089e-06, |
| "loss": 0.0001820398378185928, |
| "memory(GiB)": 160.86, |
| "step": 2725, |
| "token_acc": 0.9999157894736842, |
| "train_speed(iter/s)": 0.037362 |
| }, |
| { |
| "epoch": 1.0003665017408834, |
| "grad_norm": 0.04173569008708, |
| "learning_rate": 7.4966748512209884e-06, |
| "loss": 0.00037901154719293116, |
| "memory(GiB)": 160.86, |
| "step": 2730, |
| "token_acc": 0.9998129267608269, |
| "train_speed(iter/s)": 0.037379 |
| }, |
| { |
| "epoch": 1.0021990104452996, |
| "grad_norm": 0.002946143504232168, |
| "learning_rate": 7.488355545302829e-06, |
| "loss": 0.00021834177896380426, |
| "memory(GiB)": 160.86, |
| "step": 2735, |
| "token_acc": 0.9999157965644998, |
| "train_speed(iter/s)": 0.037391 |
| }, |
| { |
| "epoch": 1.004031519149716, |
| "grad_norm": 0.020436054095625877, |
| "learning_rate": 7.480027072520137e-06, |
| "loss": 0.0004638895858079195, |
| "memory(GiB)": 160.86, |
| "step": 2740, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.037403 |
| }, |
| { |
| "epoch": 1.0058640278541322, |
| "grad_norm": 0.00012372307537589222, |
| "learning_rate": 7.471689463554212e-06, |
| "loss": 0.00014013800537213684, |
| "memory(GiB)": 160.86, |
| "step": 2745, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.037415 |
| }, |
| { |
| "epoch": 1.0076965365585486, |
| "grad_norm": 0.10363256931304932, |
| "learning_rate": 7.463342749120014e-06, |
| "loss": 0.0012814832851290702, |
| "memory(GiB)": 160.86, |
| "step": 2750, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.037427 |
| }, |
| { |
| "epoch": 1.009529045262965, |
| "grad_norm": 0.0360257662832737, |
| "learning_rate": 7.454986959966038e-06, |
| "loss": 0.0002859779866412282, |
| "memory(GiB)": 160.86, |
| "step": 2755, |
| "token_acc": 0.9998315363881402, |
| "train_speed(iter/s)": 0.037439 |
| }, |
| { |
| "epoch": 1.0113615539673813, |
| "grad_norm": 0.0018664754461497068, |
| "learning_rate": 7.446622126874219e-06, |
| "loss": 0.0011785308830440044, |
| "memory(GiB)": 160.86, |
| "step": 2760, |
| "token_acc": 0.999663129526697, |
| "train_speed(iter/s)": 0.037451 |
| }, |
| { |
| "epoch": 1.0131940626717977, |
| "grad_norm": 0.03385569900274277, |
| "learning_rate": 7.438248280659801e-06, |
| "loss": 0.00015975049464032054, |
| "memory(GiB)": 160.86, |
| "step": 2765, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037463 |
| }, |
| { |
| "epoch": 1.015026571376214, |
| "grad_norm": 0.017654770985245705, |
| "learning_rate": 7.4298654521712364e-06, |
| "loss": 0.0003454319899901748, |
| "memory(GiB)": 160.86, |
| "step": 2770, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.037475 |
| }, |
| { |
| "epoch": 1.0168590800806303, |
| "grad_norm": 0.05392535775899887, |
| "learning_rate": 7.4214736722900675e-06, |
| "loss": 0.0005449390038847924, |
| "memory(GiB)": 160.86, |
| "step": 2775, |
| "token_acc": 0.9997476022211005, |
| "train_speed(iter/s)": 0.037487 |
| }, |
| { |
| "epoch": 1.0186915887850467, |
| "grad_norm": 0.004342063330113888, |
| "learning_rate": 7.413072971930807e-06, |
| "loss": 0.0007950126193463803, |
| "memory(GiB)": 160.86, |
| "step": 2780, |
| "token_acc": 0.9998315647633484, |
| "train_speed(iter/s)": 0.037499 |
| }, |
| { |
| "epoch": 1.0205240974894632, |
| "grad_norm": 0.00310046155937016, |
| "learning_rate": 7.404663382040838e-06, |
| "loss": 0.0002729130210354924, |
| "memory(GiB)": 160.86, |
| "step": 2785, |
| "token_acc": 0.9999158532480646, |
| "train_speed(iter/s)": 0.03751 |
| }, |
| { |
| "epoch": 1.0223566061938794, |
| "grad_norm": 0.0021550292149186134, |
| "learning_rate": 7.396244933600285e-06, |
| "loss": 0.00016694137593731284, |
| "memory(GiB)": 160.86, |
| "step": 2790, |
| "token_acc": 0.9999158603281447, |
| "train_speed(iter/s)": 0.037522 |
| }, |
| { |
| "epoch": 1.0241891148982958, |
| "grad_norm": 0.000986380036920309, |
| "learning_rate": 7.387817657621911e-06, |
| "loss": 0.00015597309684380888, |
| "memory(GiB)": 160.86, |
| "step": 2795, |
| "token_acc": 0.9999158744847312, |
| "train_speed(iter/s)": 0.037533 |
| }, |
| { |
| "epoch": 1.0260216236027122, |
| "grad_norm": 0.001334765343926847, |
| "learning_rate": 7.379381585150997e-06, |
| "loss": 2.5839175214059652e-05, |
| "memory(GiB)": 160.86, |
| "step": 2800, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037545 |
| }, |
| { |
| "epoch": 1.0278541323071284, |
| "grad_norm": 0.0036596362479031086, |
| "learning_rate": 7.370936747265226e-06, |
| "loss": 0.00017838862258940936, |
| "memory(GiB)": 160.86, |
| "step": 2805, |
| "token_acc": 0.9999157752884696, |
| "train_speed(iter/s)": 0.037557 |
| }, |
| { |
| "epoch": 1.0296866410115448, |
| "grad_norm": 0.04679948464035988, |
| "learning_rate": 7.36248317507458e-06, |
| "loss": 9.25394706428051e-05, |
| "memory(GiB)": 160.86, |
| "step": 2810, |
| "token_acc": 0.9999157256025619, |
| "train_speed(iter/s)": 0.037568 |
| }, |
| { |
| "epoch": 1.0315191497159613, |
| "grad_norm": 0.014712713658809662, |
| "learning_rate": 7.35402089972121e-06, |
| "loss": 0.00011562753934413195, |
| "memory(GiB)": 160.86, |
| "step": 2815, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.03758 |
| }, |
| { |
| "epoch": 1.0333516584203775, |
| "grad_norm": 2.521344504202716e-05, |
| "learning_rate": 7.345549952379334e-06, |
| "loss": 3.463000466581434e-05, |
| "memory(GiB)": 160.86, |
| "step": 2820, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037592 |
| }, |
| { |
| "epoch": 1.0351841671247939, |
| "grad_norm": 0.24957123398780823, |
| "learning_rate": 7.337070364255112e-06, |
| "loss": 0.0008360546082258225, |
| "memory(GiB)": 160.86, |
| "step": 2825, |
| "token_acc": 0.9996632713191346, |
| "train_speed(iter/s)": 0.037604 |
| }, |
| { |
| "epoch": 1.03701667582921, |
| "grad_norm": 0.21494735777378082, |
| "learning_rate": 7.32858216658654e-06, |
| "loss": 0.0008594411425292492, |
| "memory(GiB)": 160.86, |
| "step": 2830, |
| "token_acc": 0.9999158532480646, |
| "train_speed(iter/s)": 0.037615 |
| }, |
| { |
| "epoch": 1.0388491845336265, |
| "grad_norm": 0.008956658653914928, |
| "learning_rate": 7.320085390643326e-06, |
| "loss": 0.00030957753770053385, |
| "memory(GiB)": 160.86, |
| "step": 2835, |
| "token_acc": 0.9999158532480646, |
| "train_speed(iter/s)": 0.037627 |
| }, |
| { |
| "epoch": 1.040681693238043, |
| "grad_norm": 0.002504108939319849, |
| "learning_rate": 7.311580067726783e-06, |
| "loss": 0.000167914351914078, |
| "memory(GiB)": 160.86, |
| "step": 2840, |
| "token_acc": 0.9999158886365548, |
| "train_speed(iter/s)": 0.037638 |
| }, |
| { |
| "epoch": 1.0425142019424591, |
| "grad_norm": 0.0135150495916605, |
| "learning_rate": 7.3030662291697105e-06, |
| "loss": 4.5498591498471795e-05, |
| "memory(GiB)": 160.86, |
| "step": 2845, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.03765 |
| }, |
| { |
| "epoch": 1.0443467106468756, |
| "grad_norm": 0.002792476676404476, |
| "learning_rate": 7.294543906336279e-06, |
| "loss": 0.000167688459623605, |
| "memory(GiB)": 160.86, |
| "step": 2850, |
| "token_acc": 0.9999157823816742, |
| "train_speed(iter/s)": 0.037661 |
| }, |
| { |
| "epoch": 1.046179219351292, |
| "grad_norm": 0.04909972473978996, |
| "learning_rate": 7.28601313062191e-06, |
| "loss": 0.000728160934522748, |
| "memory(GiB)": 160.86, |
| "step": 2855, |
| "token_acc": 0.9996635545462192, |
| "train_speed(iter/s)": 0.037672 |
| }, |
| { |
| "epoch": 1.0480117280557082, |
| "grad_norm": 0.002446983242407441, |
| "learning_rate": 7.27747393345317e-06, |
| "loss": 0.0003103788709267974, |
| "memory(GiB)": 160.86, |
| "step": 2860, |
| "token_acc": 0.9998317206562894, |
| "train_speed(iter/s)": 0.037684 |
| }, |
| { |
| "epoch": 1.0498442367601246, |
| "grad_norm": 0.005002601537853479, |
| "learning_rate": 7.268926346287647e-06, |
| "loss": 0.000590520678088069, |
| "memory(GiB)": 160.86, |
| "step": 2865, |
| "token_acc": 0.9998316781686585, |
| "train_speed(iter/s)": 0.037695 |
| }, |
| { |
| "epoch": 1.051676745464541, |
| "grad_norm": 0.0063280281610786915, |
| "learning_rate": 7.2603704006138365e-06, |
| "loss": 0.0006456949282437563, |
| "memory(GiB)": 160.86, |
| "step": 2870, |
| "token_acc": 0.9997474109623642, |
| "train_speed(iter/s)": 0.037707 |
| }, |
| { |
| "epoch": 1.0535092541689572, |
| "grad_norm": 0.005347462370991707, |
| "learning_rate": 7.251806127951025e-06, |
| "loss": 0.00015139146707952023, |
| "memory(GiB)": 160.86, |
| "step": 2875, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.037718 |
| }, |
| { |
| "epoch": 1.0553417628733737, |
| "grad_norm": 0.005681968294084072, |
| "learning_rate": 7.243233559849179e-06, |
| "loss": 0.00019556223414838315, |
| "memory(GiB)": 160.86, |
| "step": 2880, |
| "token_acc": 0.9999158320006734, |
| "train_speed(iter/s)": 0.037729 |
| }, |
| { |
| "epoch": 1.05717427157779, |
| "grad_norm": 0.0017381316283717752, |
| "learning_rate": 7.234652727888819e-06, |
| "loss": 0.0006761848460882902, |
| "memory(GiB)": 160.86, |
| "step": 2885, |
| "token_acc": 0.9998316498316498, |
| "train_speed(iter/s)": 0.03774 |
| }, |
| { |
| "epoch": 1.0590067802822063, |
| "grad_norm": 0.012453123927116394, |
| "learning_rate": 7.226063663680915e-06, |
| "loss": 0.0005378074944019318, |
| "memory(GiB)": 160.86, |
| "step": 2890, |
| "token_acc": 0.999663356337317, |
| "train_speed(iter/s)": 0.037751 |
| }, |
| { |
| "epoch": 1.0608392889866227, |
| "grad_norm": 0.026770737022161484, |
| "learning_rate": 7.217466398866757e-06, |
| "loss": 0.0007396583911031485, |
| "memory(GiB)": 160.86, |
| "step": 2895, |
| "token_acc": 0.9997474534893509, |
| "train_speed(iter/s)": 0.037762 |
| }, |
| { |
| "epoch": 1.062671797691039, |
| "grad_norm": 0.13343772292137146, |
| "learning_rate": 7.2088609651178505e-06, |
| "loss": 0.0006303425878286361, |
| "memory(GiB)": 160.86, |
| "step": 2900, |
| "token_acc": 0.9997473045822103, |
| "train_speed(iter/s)": 0.037773 |
| }, |
| { |
| "epoch": 1.0645043063954553, |
| "grad_norm": 0.04957849159836769, |
| "learning_rate": 7.200247394135793e-06, |
| "loss": 0.0002914240350946784, |
| "memory(GiB)": 160.86, |
| "step": 2905, |
| "token_acc": 0.9999157894736842, |
| "train_speed(iter/s)": 0.037784 |
| }, |
| { |
| "epoch": 1.0663368150998718, |
| "grad_norm": 0.0030663548968732357, |
| "learning_rate": 7.191625717652158e-06, |
| "loss": 0.0006854488048702479, |
| "memory(GiB)": 160.86, |
| "step": 2910, |
| "token_acc": 0.9997475597441938, |
| "train_speed(iter/s)": 0.037795 |
| }, |
| { |
| "epoch": 1.068169323804288, |
| "grad_norm": 0.044960979372262955, |
| "learning_rate": 7.18299596742838e-06, |
| "loss": 0.0005464905872941018, |
| "memory(GiB)": 160.86, |
| "step": 2915, |
| "token_acc": 0.9998315789473684, |
| "train_speed(iter/s)": 0.037806 |
| }, |
| { |
| "epoch": 1.0700018325087044, |
| "grad_norm": 0.05764192342758179, |
| "learning_rate": 7.174358175255636e-06, |
| "loss": 0.0005072502885013819, |
| "memory(GiB)": 160.86, |
| "step": 2920, |
| "token_acc": 0.9998316640013467, |
| "train_speed(iter/s)": 0.037816 |
| }, |
| { |
| "epoch": 1.0718343412131208, |
| "grad_norm": 0.010302331298589706, |
| "learning_rate": 7.1657123729547275e-06, |
| "loss": 0.0011625357903540135, |
| "memory(GiB)": 160.86, |
| "step": 2925, |
| "token_acc": 0.9999158178297837, |
| "train_speed(iter/s)": 0.037827 |
| }, |
| { |
| "epoch": 1.073666849917537, |
| "grad_norm": 0.04408176988363266, |
| "learning_rate": 7.157058592375966e-06, |
| "loss": 0.0004973907489329576, |
| "memory(GiB)": 160.86, |
| "step": 2930, |
| "token_acc": 0.9998316498316498, |
| "train_speed(iter/s)": 0.037838 |
| }, |
| { |
| "epoch": 1.0754993586219534, |
| "grad_norm": 0.0012950595701113343, |
| "learning_rate": 7.148396865399054e-06, |
| "loss": 0.00015295968623831868, |
| "memory(GiB)": 160.86, |
| "step": 2935, |
| "token_acc": 0.9999158886365548, |
| "train_speed(iter/s)": 0.037849 |
| }, |
| { |
| "epoch": 1.0773318673263699, |
| "grad_norm": 0.032750971615314484, |
| "learning_rate": 7.1397272239329684e-06, |
| "loss": 0.0010722282342612744, |
| "memory(GiB)": 160.86, |
| "step": 2940, |
| "token_acc": 0.999663242970197, |
| "train_speed(iter/s)": 0.03786 |
| }, |
| { |
| "epoch": 1.079164376030786, |
| "grad_norm": 0.0168730691075325, |
| "learning_rate": 7.131049699915842e-06, |
| "loss": 7.366950740106404e-05, |
| "memory(GiB)": 160.86, |
| "step": 2945, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037871 |
| }, |
| { |
| "epoch": 1.0809968847352025, |
| "grad_norm": 0.007587254513055086, |
| "learning_rate": 7.122364325314844e-06, |
| "loss": 0.0006255113985389471, |
| "memory(GiB)": 160.86, |
| "step": 2950, |
| "token_acc": 0.9999158107425492, |
| "train_speed(iter/s)": 0.037881 |
| }, |
| { |
| "epoch": 1.082829393439619, |
| "grad_norm": 0.0203808955848217, |
| "learning_rate": 7.113671132126067e-06, |
| "loss": 0.00010994931217283011, |
| "memory(GiB)": 160.86, |
| "step": 2955, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037892 |
| }, |
| { |
| "epoch": 1.0846619021440351, |
| "grad_norm": 0.00795274693518877, |
| "learning_rate": 7.104970152374405e-06, |
| "loss": 0.00014865098055452108, |
| "memory(GiB)": 160.86, |
| "step": 2960, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037902 |
| }, |
| { |
| "epoch": 1.0864944108484516, |
| "grad_norm": 0.005757440812885761, |
| "learning_rate": 7.09626141811344e-06, |
| "loss": 0.000553938839584589, |
| "memory(GiB)": 160.86, |
| "step": 2965, |
| "token_acc": 0.9998316923335858, |
| "train_speed(iter/s)": 0.037913 |
| }, |
| { |
| "epoch": 1.088326919552868, |
| "grad_norm": 0.010678775608539581, |
| "learning_rate": 7.087544961425317e-06, |
| "loss": 0.0004192313179373741, |
| "memory(GiB)": 160.86, |
| "step": 2970, |
| "token_acc": 0.9999157752884696, |
| "train_speed(iter/s)": 0.037924 |
| }, |
| { |
| "epoch": 1.0901594282572842, |
| "grad_norm": 0.0032097063958644867, |
| "learning_rate": 7.078820814420629e-06, |
| "loss": 0.0006281842943280935, |
| "memory(GiB)": 160.86, |
| "step": 2975, |
| "token_acc": 0.9997473045822103, |
| "train_speed(iter/s)": 0.037935 |
| }, |
| { |
| "epoch": 1.0919919369617006, |
| "grad_norm": 0.012336465530097485, |
| "learning_rate": 7.070089009238306e-06, |
| "loss": 0.000180811935570091, |
| "memory(GiB)": 160.86, |
| "step": 2980, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037945 |
| }, |
| { |
| "epoch": 1.093824445666117, |
| "grad_norm": 0.0761614739894867, |
| "learning_rate": 7.061349578045481e-06, |
| "loss": 0.0011349070817232132, |
| "memory(GiB)": 160.86, |
| "step": 2985, |
| "token_acc": 0.999578947368421, |
| "train_speed(iter/s)": 0.037956 |
| }, |
| { |
| "epoch": 1.0956569543705332, |
| "grad_norm": 0.0008425001287832856, |
| "learning_rate": 7.05260255303739e-06, |
| "loss": 0.000435651745647192, |
| "memory(GiB)": 160.86, |
| "step": 2990, |
| "token_acc": 0.9999158461667929, |
| "train_speed(iter/s)": 0.037967 |
| }, |
| { |
| "epoch": 1.0974894630749497, |
| "grad_norm": 0.0662672221660614, |
| "learning_rate": 7.043847966437235e-06, |
| "loss": 0.0007866304367780685, |
| "memory(GiB)": 160.86, |
| "step": 2995, |
| "token_acc": 0.9996635262449529, |
| "train_speed(iter/s)": 0.037978 |
| }, |
| { |
| "epoch": 1.0993219717793659, |
| "grad_norm": 0.02012745290994644, |
| "learning_rate": 7.035085850496079e-06, |
| "loss": 6.958455196581781e-05, |
| "memory(GiB)": 160.86, |
| "step": 3000, |
| "token_acc": 1.0, |
| "train_speed(iter/s)": 0.037988 |
| }, |
| { |
| "epoch": 1.0993219717793659, |
| "eval_loss": 0.0006502080941572785, |
| "eval_runtime": 172.5767, |
| "eval_samples_per_second": 2.55, |
| "eval_steps_per_second": 2.55, |
| "eval_token_acc": 0.9997857459865632, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 8184, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.74067294651731e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|