| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 2060, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009708737864077669, |
| "grad_norm": 8.540091514587402, |
| "learning_rate": 3.883495145631068e-07, |
| "loss": 2.734, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.019417475728155338, |
| "grad_norm": 8.14619255065918, |
| "learning_rate": 1.359223300970874e-06, |
| "loss": 2.6711, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02912621359223301, |
| "grad_norm": 6.858204364776611, |
| "learning_rate": 2.330097087378641e-06, |
| "loss": 2.6055, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.038834951456310676, |
| "grad_norm": 6.228450298309326, |
| "learning_rate": 3.300970873786408e-06, |
| "loss": 2.5322, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04854368932038835, |
| "grad_norm": 5.512765407562256, |
| "learning_rate": 4.271844660194175e-06, |
| "loss": 2.466, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05825242718446602, |
| "grad_norm": 5.114351272583008, |
| "learning_rate": 5.242718446601942e-06, |
| "loss": 2.4828, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06796116504854369, |
| "grad_norm": 4.749820232391357, |
| "learning_rate": 6.213592233009709e-06, |
| "loss": 2.4529, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07766990291262135, |
| "grad_norm": 4.962618827819824, |
| "learning_rate": 7.184466019417476e-06, |
| "loss": 2.3377, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08737864077669903, |
| "grad_norm": 4.963841915130615, |
| "learning_rate": 8.155339805825243e-06, |
| "loss": 2.3914, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 5.020650386810303, |
| "learning_rate": 9.12621359223301e-06, |
| "loss": 2.2748, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10679611650485436, |
| "grad_norm": 4.788837909698486, |
| "learning_rate": 1.0097087378640778e-05, |
| "loss": 2.2975, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11650485436893204, |
| "grad_norm": 5.005253791809082, |
| "learning_rate": 1.1067961165048544e-05, |
| "loss": 2.3025, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1262135922330097, |
| "grad_norm": 5.113918304443359, |
| "learning_rate": 1.2038834951456311e-05, |
| "loss": 2.2121, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.13592233009708737, |
| "grad_norm": 5.089141368865967, |
| "learning_rate": 1.300970873786408e-05, |
| "loss": 2.233, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14563106796116504, |
| "grad_norm": 5.0798749923706055, |
| "learning_rate": 1.3980582524271846e-05, |
| "loss": 2.1688, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 5.230806827545166, |
| "learning_rate": 1.4951456310679614e-05, |
| "loss": 2.2232, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1650485436893204, |
| "grad_norm": 5.30760383605957, |
| "learning_rate": 1.592233009708738e-05, |
| "loss": 2.1506, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.17475728155339806, |
| "grad_norm": 5.008656978607178, |
| "learning_rate": 1.6893203883495145e-05, |
| "loss": 2.1866, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.18446601941747573, |
| "grad_norm": 4.95796537399292, |
| "learning_rate": 1.7864077669902916e-05, |
| "loss": 2.2156, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 4.717769145965576, |
| "learning_rate": 1.883495145631068e-05, |
| "loss": 2.164, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.20388349514563106, |
| "grad_norm": 4.858338832855225, |
| "learning_rate": 1.9805825242718447e-05, |
| "loss": 2.1291, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.21359223300970873, |
| "grad_norm": 5.218167781829834, |
| "learning_rate": 1.9913700107874866e-05, |
| "loss": 2.1617, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.22330097087378642, |
| "grad_norm": 5.097916126251221, |
| "learning_rate": 1.9805825242718447e-05, |
| "loss": 2.1721, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.23300970873786409, |
| "grad_norm": 5.860560417175293, |
| "learning_rate": 1.969795037756203e-05, |
| "loss": 2.1412, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.24271844660194175, |
| "grad_norm": 5.395883560180664, |
| "learning_rate": 1.959007551240561e-05, |
| "loss": 2.1397, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2524271844660194, |
| "grad_norm": 5.043527126312256, |
| "learning_rate": 1.9482200647249193e-05, |
| "loss": 2.1314, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2621359223300971, |
| "grad_norm": 4.853712558746338, |
| "learning_rate": 1.9374325782092775e-05, |
| "loss": 2.118, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.27184466019417475, |
| "grad_norm": 5.681634902954102, |
| "learning_rate": 1.9266450916936353e-05, |
| "loss": 2.1189, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2815533980582524, |
| "grad_norm": 5.401227951049805, |
| "learning_rate": 1.9158576051779935e-05, |
| "loss": 2.1148, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 5.208418369293213, |
| "learning_rate": 1.905070118662352e-05, |
| "loss": 2.128, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.30097087378640774, |
| "grad_norm": 5.307718276977539, |
| "learning_rate": 1.89428263214671e-05, |
| "loss": 2.0858, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 5.279410362243652, |
| "learning_rate": 1.883495145631068e-05, |
| "loss": 2.1371, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.32038834951456313, |
| "grad_norm": 5.151274681091309, |
| "learning_rate": 1.8727076591154262e-05, |
| "loss": 2.0932, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3300970873786408, |
| "grad_norm": 5.083354473114014, |
| "learning_rate": 1.8619201725997844e-05, |
| "loss": 2.0623, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.33980582524271846, |
| "grad_norm": 5.1322550773620605, |
| "learning_rate": 1.8511326860841425e-05, |
| "loss": 2.05, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.34951456310679613, |
| "grad_norm": 4.970919609069824, |
| "learning_rate": 1.8403451995685007e-05, |
| "loss": 2.0901, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3592233009708738, |
| "grad_norm": 5.15512752532959, |
| "learning_rate": 1.829557713052859e-05, |
| "loss": 2.0883, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.36893203883495146, |
| "grad_norm": 5.088575839996338, |
| "learning_rate": 1.818770226537217e-05, |
| "loss": 2.1119, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3786407766990291, |
| "grad_norm": 6.092918872833252, |
| "learning_rate": 1.807982740021575e-05, |
| "loss": 2.0979, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 4.909801483154297, |
| "learning_rate": 1.797195253505933e-05, |
| "loss": 2.0283, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.39805825242718446, |
| "grad_norm": 5.128530025482178, |
| "learning_rate": 1.7864077669902916e-05, |
| "loss": 2.023, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4077669902912621, |
| "grad_norm": 4.998912811279297, |
| "learning_rate": 1.7756202804746498e-05, |
| "loss": 2.0207, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4174757281553398, |
| "grad_norm": 5.182358264923096, |
| "learning_rate": 1.7648327939590076e-05, |
| "loss": 1.9837, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.42718446601941745, |
| "grad_norm": 5.3191022872924805, |
| "learning_rate": 1.7540453074433658e-05, |
| "loss": 2.058, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4368932038834951, |
| "grad_norm": 5.306585788726807, |
| "learning_rate": 1.743257820927724e-05, |
| "loss": 2.0832, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.44660194174757284, |
| "grad_norm": 5.278446197509766, |
| "learning_rate": 1.732470334412082e-05, |
| "loss": 2.0594, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4563106796116505, |
| "grad_norm": 5.484086990356445, |
| "learning_rate": 1.7216828478964403e-05, |
| "loss": 2.0763, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 5.767387866973877, |
| "learning_rate": 1.7108953613807985e-05, |
| "loss": 2.0634, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.47572815533980584, |
| "grad_norm": 4.96846342086792, |
| "learning_rate": 1.7001078748651563e-05, |
| "loss": 2.0769, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 5.264239311218262, |
| "learning_rate": 1.6893203883495145e-05, |
| "loss": 2.0604, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.49514563106796117, |
| "grad_norm": 5.036663055419922, |
| "learning_rate": 1.6785329018338727e-05, |
| "loss": 2.1031, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5048543689320388, |
| "grad_norm": 4.875285625457764, |
| "learning_rate": 1.6677454153182312e-05, |
| "loss": 2.0457, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5145631067961165, |
| "grad_norm": 4.933873653411865, |
| "learning_rate": 1.6569579288025894e-05, |
| "loss": 2.0312, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5242718446601942, |
| "grad_norm": 5.284345626831055, |
| "learning_rate": 1.6461704422869472e-05, |
| "loss": 2.0656, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5339805825242718, |
| "grad_norm": 5.3404998779296875, |
| "learning_rate": 1.6353829557713054e-05, |
| "loss": 2.1049, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5436893203883495, |
| "grad_norm": 5.243639945983887, |
| "learning_rate": 1.6245954692556636e-05, |
| "loss": 2.0382, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5533980582524272, |
| "grad_norm": 5.110634803771973, |
| "learning_rate": 1.6138079827400217e-05, |
| "loss": 2.004, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5631067961165048, |
| "grad_norm": 5.063004493713379, |
| "learning_rate": 1.60302049622438e-05, |
| "loss": 2.0207, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5728155339805825, |
| "grad_norm": 4.7647271156311035, |
| "learning_rate": 1.592233009708738e-05, |
| "loss": 2.0534, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 5.176267147064209, |
| "learning_rate": 1.581445523193096e-05, |
| "loss": 2.0627, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5922330097087378, |
| "grad_norm": 5.2062225341796875, |
| "learning_rate": 1.570658036677454e-05, |
| "loss": 2.0063, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6019417475728155, |
| "grad_norm": 5.044838905334473, |
| "learning_rate": 1.5598705501618123e-05, |
| "loss": 1.9475, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6116504854368932, |
| "grad_norm": 5.029117584228516, |
| "learning_rate": 1.5490830636461708e-05, |
| "loss": 2.0673, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 5.190179347991943, |
| "learning_rate": 1.5382955771305286e-05, |
| "loss": 2.0176, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6310679611650486, |
| "grad_norm": 4.956365585327148, |
| "learning_rate": 1.5275080906148868e-05, |
| "loss": 1.9984, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6407766990291263, |
| "grad_norm": 4.972413539886475, |
| "learning_rate": 1.516720604099245e-05, |
| "loss": 1.9919, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.6504854368932039, |
| "grad_norm": 5.319215297698975, |
| "learning_rate": 1.5059331175836032e-05, |
| "loss": 1.984, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6601941747572816, |
| "grad_norm": 5.120510578155518, |
| "learning_rate": 1.4951456310679614e-05, |
| "loss": 1.9838, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6699029126213593, |
| "grad_norm": 4.868938446044922, |
| "learning_rate": 1.4843581445523194e-05, |
| "loss": 2.006, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6796116504854369, |
| "grad_norm": 5.22821044921875, |
| "learning_rate": 1.4735706580366775e-05, |
| "loss": 2.0099, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6893203883495146, |
| "grad_norm": 4.970730781555176, |
| "learning_rate": 1.4627831715210357e-05, |
| "loss": 2.0078, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6990291262135923, |
| "grad_norm": 4.913213729858398, |
| "learning_rate": 1.4519956850053937e-05, |
| "loss": 2.0198, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7087378640776699, |
| "grad_norm": 5.104898452758789, |
| "learning_rate": 1.4412081984897519e-05, |
| "loss": 2.0224, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7184466019417476, |
| "grad_norm": 4.992263317108154, |
| "learning_rate": 1.4304207119741102e-05, |
| "loss": 2.0013, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7281553398058253, |
| "grad_norm": 5.0994038581848145, |
| "learning_rate": 1.4196332254584684e-05, |
| "loss": 1.9542, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7378640776699029, |
| "grad_norm": 5.849913120269775, |
| "learning_rate": 1.4088457389428264e-05, |
| "loss": 1.9941, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7475728155339806, |
| "grad_norm": 5.09085750579834, |
| "learning_rate": 1.3980582524271846e-05, |
| "loss": 2.0384, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.7572815533980582, |
| "grad_norm": 5.28529167175293, |
| "learning_rate": 1.3872707659115428e-05, |
| "loss": 2.0251, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7669902912621359, |
| "grad_norm": 5.162165641784668, |
| "learning_rate": 1.3764832793959008e-05, |
| "loss": 1.9806, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 5.865965843200684, |
| "learning_rate": 1.365695792880259e-05, |
| "loss": 2.0359, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7864077669902912, |
| "grad_norm": 4.936879634857178, |
| "learning_rate": 1.3549083063646171e-05, |
| "loss": 1.993, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7961165048543689, |
| "grad_norm": 5.331514358520508, |
| "learning_rate": 1.3441208198489753e-05, |
| "loss": 1.9685, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8058252427184466, |
| "grad_norm": 5.171398639678955, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 1.9885, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8155339805825242, |
| "grad_norm": 4.853579998016357, |
| "learning_rate": 1.3225458468176915e-05, |
| "loss": 1.9818, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8252427184466019, |
| "grad_norm": 5.196751117706299, |
| "learning_rate": 1.3117583603020499e-05, |
| "loss": 1.946, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8349514563106796, |
| "grad_norm": 4.931100845336914, |
| "learning_rate": 1.300970873786408e-05, |
| "loss": 1.9898, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8446601941747572, |
| "grad_norm": 5.232204437255859, |
| "learning_rate": 1.290183387270766e-05, |
| "loss": 2.0314, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8543689320388349, |
| "grad_norm": 5.175143718719482, |
| "learning_rate": 1.2793959007551242e-05, |
| "loss": 1.9958, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8640776699029126, |
| "grad_norm": 5.501524925231934, |
| "learning_rate": 1.2686084142394824e-05, |
| "loss": 1.969, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.8737864077669902, |
| "grad_norm": 5.200106620788574, |
| "learning_rate": 1.2578209277238404e-05, |
| "loss": 1.9624, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.883495145631068, |
| "grad_norm": 5.435555934906006, |
| "learning_rate": 1.2470334412081986e-05, |
| "loss": 2.0057, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8932038834951457, |
| "grad_norm": 5.041926860809326, |
| "learning_rate": 1.2362459546925568e-05, |
| "loss": 1.9604, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9029126213592233, |
| "grad_norm": 5.86530065536499, |
| "learning_rate": 1.2254584681769148e-05, |
| "loss": 1.9904, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.912621359223301, |
| "grad_norm": 5.039781093597412, |
| "learning_rate": 1.214670981661273e-05, |
| "loss": 1.9681, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9223300970873787, |
| "grad_norm": 5.195461273193359, |
| "learning_rate": 1.2038834951456311e-05, |
| "loss": 2.0042, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9320388349514563, |
| "grad_norm": 5.229151725769043, |
| "learning_rate": 1.1930960086299891e-05, |
| "loss": 2.0039, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.941747572815534, |
| "grad_norm": 4.973319053649902, |
| "learning_rate": 1.1823085221143475e-05, |
| "loss": 2.0065, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9514563106796117, |
| "grad_norm": 5.273900032043457, |
| "learning_rate": 1.1715210355987056e-05, |
| "loss": 1.9896, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.9611650485436893, |
| "grad_norm": 5.269063472747803, |
| "learning_rate": 1.1607335490830638e-05, |
| "loss": 1.9592, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.970873786407767, |
| "grad_norm": 5.100589752197266, |
| "learning_rate": 1.149946062567422e-05, |
| "loss": 1.9354, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9805825242718447, |
| "grad_norm": 5.232608318328857, |
| "learning_rate": 1.1413160733549084e-05, |
| "loss": 2.0395, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9902912621359223, |
| "grad_norm": 5.08132791519165, |
| "learning_rate": 1.1305285868392666e-05, |
| "loss": 1.9437, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 5.696261405944824, |
| "learning_rate": 1.1197411003236248e-05, |
| "loss": 1.9665, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.0097087378640777, |
| "grad_norm": 5.359734058380127, |
| "learning_rate": 1.1089536138079828e-05, |
| "loss": 1.841, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.0194174757281553, |
| "grad_norm": 5.353382587432861, |
| "learning_rate": 1.098166127292341e-05, |
| "loss": 1.8354, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.029126213592233, |
| "grad_norm": 5.595281600952148, |
| "learning_rate": 1.0873786407766991e-05, |
| "loss": 1.8011, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.0388349514563107, |
| "grad_norm": 5.603349685668945, |
| "learning_rate": 1.0765911542610571e-05, |
| "loss": 1.8443, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.0485436893203883, |
| "grad_norm": 5.549112319946289, |
| "learning_rate": 1.0658036677454153e-05, |
| "loss": 1.8022, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.058252427184466, |
| "grad_norm": 5.524362087249756, |
| "learning_rate": 1.0550161812297735e-05, |
| "loss": 1.8361, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.0679611650485437, |
| "grad_norm": 5.287837028503418, |
| "learning_rate": 1.0442286947141318e-05, |
| "loss": 1.7831, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0776699029126213, |
| "grad_norm": 5.503448486328125, |
| "learning_rate": 1.03344120819849e-05, |
| "loss": 1.816, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.087378640776699, |
| "grad_norm": 5.447495460510254, |
| "learning_rate": 1.022653721682848e-05, |
| "loss": 1.7967, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.0970873786407767, |
| "grad_norm": 5.651370525360107, |
| "learning_rate": 1.0118662351672062e-05, |
| "loss": 1.767, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.1067961165048543, |
| "grad_norm": 5.455685138702393, |
| "learning_rate": 1.0010787486515644e-05, |
| "loss": 1.7986, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.116504854368932, |
| "grad_norm": 5.773884296417236, |
| "learning_rate": 9.902912621359224e-06, |
| "loss": 1.8061, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1262135922330097, |
| "grad_norm": 5.654228687286377, |
| "learning_rate": 9.795037756202806e-06, |
| "loss": 1.8051, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.1359223300970873, |
| "grad_norm": 5.353296756744385, |
| "learning_rate": 9.687162891046387e-06, |
| "loss": 1.8157, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.145631067961165, |
| "grad_norm": 5.371259689331055, |
| "learning_rate": 9.579288025889967e-06, |
| "loss": 1.7957, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.1553398058252426, |
| "grad_norm": 5.551179885864258, |
| "learning_rate": 9.47141316073355e-06, |
| "loss": 1.8099, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.1650485436893203, |
| "grad_norm": 5.763803958892822, |
| "learning_rate": 9.363538295577131e-06, |
| "loss": 1.8243, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.174757281553398, |
| "grad_norm": 5.4469313621521, |
| "learning_rate": 9.255663430420713e-06, |
| "loss": 1.7813, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.1844660194174756, |
| "grad_norm": 5.914862155914307, |
| "learning_rate": 9.147788565264294e-06, |
| "loss": 1.8308, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.1941747572815533, |
| "grad_norm": 5.619472980499268, |
| "learning_rate": 9.039913700107874e-06, |
| "loss": 1.8566, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.203883495145631, |
| "grad_norm": 5.747879981994629, |
| "learning_rate": 8.932038834951458e-06, |
| "loss": 1.831, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.2135922330097086, |
| "grad_norm": 5.515039443969727, |
| "learning_rate": 8.824163969795038e-06, |
| "loss": 1.8279, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.2233009708737863, |
| "grad_norm": 5.6780171394348145, |
| "learning_rate": 8.71628910463862e-06, |
| "loss": 1.7496, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.233009708737864, |
| "grad_norm": 5.678586006164551, |
| "learning_rate": 8.608414239482202e-06, |
| "loss": 1.7862, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.2427184466019416, |
| "grad_norm": 5.727756977081299, |
| "learning_rate": 8.500539374325782e-06, |
| "loss": 1.8364, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.2524271844660193, |
| "grad_norm": 5.593883037567139, |
| "learning_rate": 8.392664509169363e-06, |
| "loss": 1.7775, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.262135922330097, |
| "grad_norm": 5.856795310974121, |
| "learning_rate": 8.284789644012947e-06, |
| "loss": 1.8652, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.2718446601941746, |
| "grad_norm": 5.66147518157959, |
| "learning_rate": 8.176914778856527e-06, |
| "loss": 1.7747, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.2815533980582523, |
| "grad_norm": 5.7044291496276855, |
| "learning_rate": 8.069039913700109e-06, |
| "loss": 1.7781, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.29126213592233, |
| "grad_norm": 5.8010149002075195, |
| "learning_rate": 7.96116504854369e-06, |
| "loss": 1.812, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.3009708737864076, |
| "grad_norm": 5.601301670074463, |
| "learning_rate": 7.85329018338727e-06, |
| "loss": 1.8109, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.3106796116504853, |
| "grad_norm": 5.59577751159668, |
| "learning_rate": 7.745415318230854e-06, |
| "loss": 1.794, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.3203883495145632, |
| "grad_norm": 6.064187526702881, |
| "learning_rate": 7.637540453074434e-06, |
| "loss": 1.7978, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.3300970873786409, |
| "grad_norm": 5.477755069732666, |
| "learning_rate": 7.529665587918016e-06, |
| "loss": 1.7732, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.3398058252427185, |
| "grad_norm": 5.672438144683838, |
| "learning_rate": 7.421790722761597e-06, |
| "loss": 1.8356, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.3495145631067962, |
| "grad_norm": 5.968810558319092, |
| "learning_rate": 7.3139158576051786e-06, |
| "loss": 1.8513, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.3592233009708738, |
| "grad_norm": 5.984207630157471, |
| "learning_rate": 7.2060409924487595e-06, |
| "loss": 1.7821, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.3689320388349515, |
| "grad_norm": 6.006514072418213, |
| "learning_rate": 7.098166127292342e-06, |
| "loss": 1.756, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.3786407766990292, |
| "grad_norm": 5.548986911773682, |
| "learning_rate": 6.990291262135923e-06, |
| "loss": 1.8, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.3883495145631068, |
| "grad_norm": 5.688983917236328, |
| "learning_rate": 6.882416396979504e-06, |
| "loss": 1.7601, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.3980582524271845, |
| "grad_norm": 5.5633225440979, |
| "learning_rate": 6.774541531823086e-06, |
| "loss": 1.7876, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.4077669902912622, |
| "grad_norm": 5.3327226638793945, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.7388, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.4174757281553398, |
| "grad_norm": 5.563536643981934, |
| "learning_rate": 6.558791801510249e-06, |
| "loss": 1.7894, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.4271844660194175, |
| "grad_norm": 5.413880825042725, |
| "learning_rate": 6.45091693635383e-06, |
| "loss": 1.8045, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.4368932038834952, |
| "grad_norm": 5.559710502624512, |
| "learning_rate": 6.343042071197412e-06, |
| "loss": 1.7388, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.4466019417475728, |
| "grad_norm": 5.674643039703369, |
| "learning_rate": 6.235167206040993e-06, |
| "loss": 1.7521, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.4563106796116505, |
| "grad_norm": 6.015341758728027, |
| "learning_rate": 6.127292340884574e-06, |
| "loss": 1.7567, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.4660194174757282, |
| "grad_norm": 5.763010025024414, |
| "learning_rate": 6.0194174757281556e-06, |
| "loss": 1.7494, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.4757281553398058, |
| "grad_norm": 5.727349758148193, |
| "learning_rate": 5.911542610571737e-06, |
| "loss": 1.8048, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.4854368932038835, |
| "grad_norm": 5.473784923553467, |
| "learning_rate": 5.803667745415319e-06, |
| "loss": 1.7469, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.4951456310679612, |
| "grad_norm": 5.847958087921143, |
| "learning_rate": 5.6957928802589e-06, |
| "loss": 1.7803, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.5048543689320388, |
| "grad_norm": 6.08969259262085, |
| "learning_rate": 5.587918015102482e-06, |
| "loss": 1.796, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.5145631067961165, |
| "grad_norm": 5.455092430114746, |
| "learning_rate": 5.480043149946063e-06, |
| "loss": 1.7495, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.5242718446601942, |
| "grad_norm": 5.9275031089782715, |
| "learning_rate": 5.372168284789644e-06, |
| "loss": 1.814, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.5339805825242718, |
| "grad_norm": 5.613204002380371, |
| "learning_rate": 5.264293419633226e-06, |
| "loss": 1.8095, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.5436893203883495, |
| "grad_norm": 5.575292110443115, |
| "learning_rate": 5.156418554476807e-06, |
| "loss": 1.794, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.5533980582524272, |
| "grad_norm": 6.1768107414245605, |
| "learning_rate": 5.048543689320389e-06, |
| "loss": 1.7858, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.5631067961165048, |
| "grad_norm": 5.83579158782959, |
| "learning_rate": 4.94066882416397e-06, |
| "loss": 1.7229, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.5728155339805825, |
| "grad_norm": 5.574371814727783, |
| "learning_rate": 4.832793959007552e-06, |
| "loss": 1.784, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.5825242718446602, |
| "grad_norm": 5.452093124389648, |
| "learning_rate": 4.724919093851133e-06, |
| "loss": 1.7639, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.5922330097087378, |
| "grad_norm": 5.442083358764648, |
| "learning_rate": 4.617044228694714e-06, |
| "loss": 1.8157, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.6019417475728155, |
| "grad_norm": 5.7211079597473145, |
| "learning_rate": 4.509169363538296e-06, |
| "loss": 1.7338, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.6116504854368932, |
| "grad_norm": 6.113297462463379, |
| "learning_rate": 4.401294498381877e-06, |
| "loss": 1.7044, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.6213592233009708, |
| "grad_norm": 5.693146705627441, |
| "learning_rate": 4.293419633225459e-06, |
| "loss": 1.7722, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.6310679611650487, |
| "grad_norm": 5.956842422485352, |
| "learning_rate": 4.1855447680690406e-06, |
| "loss": 1.7618, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.6407766990291264, |
| "grad_norm": 5.646125793457031, |
| "learning_rate": 4.0776699029126215e-06, |
| "loss": 1.7872, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.650485436893204, |
| "grad_norm": 5.913788795471191, |
| "learning_rate": 3.969795037756203e-06, |
| "loss": 1.7913, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.6601941747572817, |
| "grad_norm": 6.05329704284668, |
| "learning_rate": 3.861920172599784e-06, |
| "loss": 1.7771, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.6699029126213594, |
| "grad_norm": 6.139546871185303, |
| "learning_rate": 3.754045307443366e-06, |
| "loss": 1.7966, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.679611650485437, |
| "grad_norm": 6.158768653869629, |
| "learning_rate": 3.6461704422869477e-06, |
| "loss": 1.8097, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.6893203883495147, |
| "grad_norm": 5.726659774780273, |
| "learning_rate": 3.5382955771305286e-06, |
| "loss": 1.7568, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.6990291262135924, |
| "grad_norm": 5.508258819580078, |
| "learning_rate": 3.43042071197411e-06, |
| "loss": 1.8406, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.70873786407767, |
| "grad_norm": 6.076147556304932, |
| "learning_rate": 3.3225458468176918e-06, |
| "loss": 1.7246, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.7184466019417477, |
| "grad_norm": 5.596787452697754, |
| "learning_rate": 3.214670981661273e-06, |
| "loss": 1.8189, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.7281553398058254, |
| "grad_norm": 6.001366138458252, |
| "learning_rate": 3.1067961165048544e-06, |
| "loss": 1.7681, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.737864077669903, |
| "grad_norm": 5.889746189117432, |
| "learning_rate": 2.9989212513484362e-06, |
| "loss": 1.7634, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.7475728155339807, |
| "grad_norm": 5.484528541564941, |
| "learning_rate": 2.8910463861920176e-06, |
| "loss": 1.7928, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.7572815533980584, |
| "grad_norm": 5.366055011749268, |
| "learning_rate": 2.7831715210355993e-06, |
| "loss": 1.7284, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.766990291262136, |
| "grad_norm": 6.179636478424072, |
| "learning_rate": 2.6752966558791803e-06, |
| "loss": 1.7973, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.7766990291262137, |
| "grad_norm": 5.786418437957764, |
| "learning_rate": 2.5674217907227616e-06, |
| "loss": 1.7271, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.7864077669902914, |
| "grad_norm": 5.728253364562988, |
| "learning_rate": 2.4595469255663434e-06, |
| "loss": 1.7947, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.796116504854369, |
| "grad_norm": 5.840207576751709, |
| "learning_rate": 2.3516720604099247e-06, |
| "loss": 1.8411, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.8058252427184467, |
| "grad_norm": 6.026117324829102, |
| "learning_rate": 2.243797195253506e-06, |
| "loss": 1.7801, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.8155339805825244, |
| "grad_norm": 5.574731826782227, |
| "learning_rate": 2.1359223300970874e-06, |
| "loss": 1.8296, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.825242718446602, |
| "grad_norm": 5.741345405578613, |
| "learning_rate": 2.0280474649406688e-06, |
| "loss": 1.8048, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.8349514563106797, |
| "grad_norm": 5.989925384521484, |
| "learning_rate": 1.9201725997842505e-06, |
| "loss": 1.7559, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.8446601941747574, |
| "grad_norm": 5.998227119445801, |
| "learning_rate": 1.812297734627832e-06, |
| "loss": 1.7678, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.854368932038835, |
| "grad_norm": 5.9274420738220215, |
| "learning_rate": 1.7044228694714132e-06, |
| "loss": 1.7886, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.8640776699029127, |
| "grad_norm": 5.719155788421631, |
| "learning_rate": 1.5965480043149948e-06, |
| "loss": 1.7921, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.8737864077669903, |
| "grad_norm": 5.4220147132873535, |
| "learning_rate": 1.4886731391585763e-06, |
| "loss": 1.7813, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.883495145631068, |
| "grad_norm": 5.772354602813721, |
| "learning_rate": 1.3807982740021575e-06, |
| "loss": 1.7606, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.8932038834951457, |
| "grad_norm": 5.864536762237549, |
| "learning_rate": 1.272923408845739e-06, |
| "loss": 1.7354, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.9029126213592233, |
| "grad_norm": 5.453779697418213, |
| "learning_rate": 1.1650485436893206e-06, |
| "loss": 1.7403, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.912621359223301, |
| "grad_norm": 5.6637492179870605, |
| "learning_rate": 1.057173678532902e-06, |
| "loss": 1.7649, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.9223300970873787, |
| "grad_norm": 5.886834621429443, |
| "learning_rate": 9.492988133764834e-07, |
| "loss": 1.8095, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.9320388349514563, |
| "grad_norm": 6.255226135253906, |
| "learning_rate": 8.414239482200648e-07, |
| "loss": 1.7987, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.941747572815534, |
| "grad_norm": 5.694814205169678, |
| "learning_rate": 7.335490830636462e-07, |
| "loss": 1.7715, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.9514563106796117, |
| "grad_norm": 5.4670257568359375, |
| "learning_rate": 6.256742179072277e-07, |
| "loss": 1.737, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.9611650485436893, |
| "grad_norm": 5.776082992553711, |
| "learning_rate": 5.393743257820928e-07, |
| "loss": 1.7518, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.970873786407767, |
| "grad_norm": 5.826039791107178, |
| "learning_rate": 4.314994606256743e-07, |
| "loss": 1.8036, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.9805825242718447, |
| "grad_norm": 5.991348743438721, |
| "learning_rate": 3.2362459546925565e-07, |
| "loss": 1.756, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.9902912621359223, |
| "grad_norm": 5.450629234313965, |
| "learning_rate": 2.1574973031283715e-07, |
| "loss": 1.7953, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 5.462096214294434, |
| "learning_rate": 1.0787486515641857e-07, |
| "loss": 1.7721, |
| "step": 2060 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2060, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|