| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.997824139255088, |
| "eval_steps": 500, |
| "global_step": 9765, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02559836170485089, |
| "grad_norm": 0.7305641763872902, |
| "learning_rate": 1.9897593445980545e-05, |
| "loss": 0.24, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05119672340970178, |
| "grad_norm": 0.05103642085416945, |
| "learning_rate": 1.9795186891961087e-05, |
| "loss": 0.0049, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07679508511455267, |
| "grad_norm": 0.030274497499991383, |
| "learning_rate": 1.969278033794163e-05, |
| "loss": 0.0017, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10239344681940356, |
| "grad_norm": 0.029756392410786685, |
| "learning_rate": 1.9590373783922173e-05, |
| "loss": 0.0011, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12799180852425446, |
| "grad_norm": 0.02549118945457913, |
| "learning_rate": 1.9487967229902716e-05, |
| "loss": 0.0008, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15359017022910534, |
| "grad_norm": 0.02438470537104193, |
| "learning_rate": 1.9385560675883256e-05, |
| "loss": 0.0008, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.17918853193395623, |
| "grad_norm": 0.016258758537083494, |
| "learning_rate": 1.9283154121863802e-05, |
| "loss": 0.0007, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.20478689363880712, |
| "grad_norm": 0.014976187170917211, |
| "learning_rate": 1.9180747567844345e-05, |
| "loss": 0.0005, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.230385255343658, |
| "grad_norm": 0.014196620668655414, |
| "learning_rate": 1.9078341013824884e-05, |
| "loss": 0.0005, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2559836170485089, |
| "grad_norm": 0.01865887251990293, |
| "learning_rate": 1.897593445980543e-05, |
| "loss": 0.0004, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2815819787533598, |
| "grad_norm": 0.016349380331586796, |
| "learning_rate": 1.887352790578597e-05, |
| "loss": 0.0004, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3071803404582107, |
| "grad_norm": 0.01413169547355441, |
| "learning_rate": 1.8771121351766516e-05, |
| "loss": 0.0003, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.33277870216306155, |
| "grad_norm": 0.010152573483527069, |
| "learning_rate": 1.866871479774706e-05, |
| "loss": 0.0003, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.35837706386791246, |
| "grad_norm": 0.01017225834937972, |
| "learning_rate": 1.85663082437276e-05, |
| "loss": 0.0003, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3839754255727633, |
| "grad_norm": 0.013000431764113899, |
| "learning_rate": 1.8463901689708145e-05, |
| "loss": 0.0003, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.40957378727761423, |
| "grad_norm": 0.013254090310897974, |
| "learning_rate": 1.8361495135688684e-05, |
| "loss": 0.0003, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.43517214898246515, |
| "grad_norm": 0.00966168025347855, |
| "learning_rate": 1.8259088581669227e-05, |
| "loss": 0.0002, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.460770510687316, |
| "grad_norm": 0.00712828374097157, |
| "learning_rate": 1.815668202764977e-05, |
| "loss": 0.0002, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4863688723921669, |
| "grad_norm": 0.010211960398621855, |
| "learning_rate": 1.8054275473630313e-05, |
| "loss": 0.0002, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5119672340970178, |
| "grad_norm": 0.01261082529443916, |
| "learning_rate": 1.7951868919610856e-05, |
| "loss": 0.0002, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5375655958018687, |
| "grad_norm": 0.011854166152885242, |
| "learning_rate": 1.78494623655914e-05, |
| "loss": 0.0002, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5631639575067195, |
| "grad_norm": 0.009916438252277934, |
| "learning_rate": 1.7747055811571942e-05, |
| "loss": 0.0003, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5887623192115704, |
| "grad_norm": 0.005025129187771019, |
| "learning_rate": 1.7644649257552485e-05, |
| "loss": 0.0002, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6143606809164214, |
| "grad_norm": 0.006788101186805052, |
| "learning_rate": 1.7542242703533028e-05, |
| "loss": 0.0002, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6399590426212722, |
| "grad_norm": 0.00978494920049853, |
| "learning_rate": 1.743983614951357e-05, |
| "loss": 0.0002, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6655574043261231, |
| "grad_norm": 0.009494329535464946, |
| "learning_rate": 1.7337429595494113e-05, |
| "loss": 0.0002, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6911557660309741, |
| "grad_norm": 0.010405998997878094, |
| "learning_rate": 1.7235023041474656e-05, |
| "loss": 0.0002, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7167541277358249, |
| "grad_norm": 0.01406008137282546, |
| "learning_rate": 1.71326164874552e-05, |
| "loss": 0.0001, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7423524894406758, |
| "grad_norm": 0.012511809648905668, |
| "learning_rate": 1.7030209933435742e-05, |
| "loss": 0.0002, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.7679508511455266, |
| "grad_norm": 0.014402924650339832, |
| "learning_rate": 1.6927803379416285e-05, |
| "loss": 0.0002, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7935492128503776, |
| "grad_norm": 0.007985015692090758, |
| "learning_rate": 1.6825396825396828e-05, |
| "loss": 0.0002, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8191475745552285, |
| "grad_norm": 0.016922696684847503, |
| "learning_rate": 1.6722990271377367e-05, |
| "loss": 0.0002, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8447459362600793, |
| "grad_norm": 0.0058610303905484145, |
| "learning_rate": 1.6620583717357914e-05, |
| "loss": 0.0002, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.8703442979649303, |
| "grad_norm": 0.005758710688055935, |
| "learning_rate": 1.6518177163338457e-05, |
| "loss": 0.0001, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.8959426596697811, |
| "grad_norm": 0.010005518642531934, |
| "learning_rate": 1.6415770609318996e-05, |
| "loss": 0.0001, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.921541021374632, |
| "grad_norm": 0.006740034442277339, |
| "learning_rate": 1.6313364055299542e-05, |
| "loss": 0.0001, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.9471393830794829, |
| "grad_norm": 0.0062403985576606705, |
| "learning_rate": 1.6210957501280082e-05, |
| "loss": 0.0001, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.9727377447843338, |
| "grad_norm": 0.006225396199669411, |
| "learning_rate": 1.6108550947260625e-05, |
| "loss": 0.0001, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.9983361064891847, |
| "grad_norm": 0.006345423633281202, |
| "learning_rate": 1.600614439324117e-05, |
| "loss": 0.0001, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.0235504927684629, |
| "grad_norm": 0.00782413794335222, |
| "learning_rate": 1.590373783922171e-05, |
| "loss": 0.0001, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.0491488544733136, |
| "grad_norm": 0.004640264761208562, |
| "learning_rate": 1.5801331285202253e-05, |
| "loss": 0.0001, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.0747472161781646, |
| "grad_norm": 0.004837968806765287, |
| "learning_rate": 1.5698924731182796e-05, |
| "loss": 0.0001, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1003455778830156, |
| "grad_norm": 0.003966873491178343, |
| "learning_rate": 1.559651817716334e-05, |
| "loss": 0.0001, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.1259439395878663, |
| "grad_norm": 0.007280756408676898, |
| "learning_rate": 1.5494111623143882e-05, |
| "loss": 0.0001, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.1515423012927173, |
| "grad_norm": 0.007676063330830094, |
| "learning_rate": 1.5391705069124425e-05, |
| "loss": 0.0001, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.177140662997568, |
| "grad_norm": 0.00759666513908814, |
| "learning_rate": 1.5289298515104968e-05, |
| "loss": 0.0001, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.202739024702419, |
| "grad_norm": 0.006094073180808279, |
| "learning_rate": 1.518689196108551e-05, |
| "loss": 0.0001, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.22833738640727, |
| "grad_norm": 0.009992171934592443, |
| "learning_rate": 1.5084485407066054e-05, |
| "loss": 0.0001, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.253935748112121, |
| "grad_norm": 0.004425939861516632, |
| "learning_rate": 1.4982078853046595e-05, |
| "loss": 0.0001, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.2795341098169717, |
| "grad_norm": 0.00618221779159838, |
| "learning_rate": 1.487967229902714e-05, |
| "loss": 0.0001, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.3051324715218227, |
| "grad_norm": 0.0037721408282199286, |
| "learning_rate": 1.477726574500768e-05, |
| "loss": 0.0001, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.3307308332266734, |
| "grad_norm": 0.006148728469232912, |
| "learning_rate": 1.4674859190988225e-05, |
| "loss": 0.0001, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.3563291949315244, |
| "grad_norm": 0.00661518038661282, |
| "learning_rate": 1.4572452636968768e-05, |
| "loss": 0.0001, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.3819275566363753, |
| "grad_norm": 0.0036588312853667437, |
| "learning_rate": 1.447004608294931e-05, |
| "loss": 0.0001, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.407525918341226, |
| "grad_norm": 0.005484459005497015, |
| "learning_rate": 1.4367639528929854e-05, |
| "loss": 0.0001, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.433124280046077, |
| "grad_norm": 0.010402616539983395, |
| "learning_rate": 1.4265232974910395e-05, |
| "loss": 0.0001, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.4587226417509278, |
| "grad_norm": 0.007335666061283071, |
| "learning_rate": 1.4162826420890938e-05, |
| "loss": 0.0001, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.4843210034557788, |
| "grad_norm": 0.006550005824502188, |
| "learning_rate": 1.4060419866871483e-05, |
| "loss": 0.0001, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.5099193651606297, |
| "grad_norm": 0.0027811575400146435, |
| "learning_rate": 1.3958013312852024e-05, |
| "loss": 0.0001, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.5355177268654807, |
| "grad_norm": 0.006308963330505965, |
| "learning_rate": 1.3855606758832567e-05, |
| "loss": 0.0001, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.5611160885703315, |
| "grad_norm": 0.006401332782035864, |
| "learning_rate": 1.3753200204813108e-05, |
| "loss": 0.0001, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.5867144502751824, |
| "grad_norm": 0.004718742517696451, |
| "learning_rate": 1.3650793650793652e-05, |
| "loss": 0.0001, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.6123128119800332, |
| "grad_norm": 0.003877950477268835, |
| "learning_rate": 1.3548387096774194e-05, |
| "loss": 0.0001, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.6379111736848841, |
| "grad_norm": 0.0063083392896106745, |
| "learning_rate": 1.3445980542754738e-05, |
| "loss": 0.0001, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.6635095353897351, |
| "grad_norm": 0.006412039922690925, |
| "learning_rate": 1.3343573988735281e-05, |
| "loss": 0.0001, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.689107897094586, |
| "grad_norm": 0.0029627793040849877, |
| "learning_rate": 1.3241167434715822e-05, |
| "loss": 0.0001, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.7147062587994368, |
| "grad_norm": 0.002164481803452725, |
| "learning_rate": 1.3138760880696367e-05, |
| "loss": 0.0001, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.7403046205042876, |
| "grad_norm": 0.004111311446657877, |
| "learning_rate": 1.3036354326676908e-05, |
| "loss": 0.0001, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.7659029822091385, |
| "grad_norm": 0.0024071410600000186, |
| "learning_rate": 1.2933947772657451e-05, |
| "loss": 0.0001, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.7915013439139895, |
| "grad_norm": 0.00428027777175206, |
| "learning_rate": 1.2831541218637992e-05, |
| "loss": 0.0001, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.8170997056188405, |
| "grad_norm": 0.0035937450146907115, |
| "learning_rate": 1.2729134664618537e-05, |
| "loss": 0.0001, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.8426980673236912, |
| "grad_norm": 0.007360372295628917, |
| "learning_rate": 1.262672811059908e-05, |
| "loss": 0.0001, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.8682964290285422, |
| "grad_norm": 0.004225210869508024, |
| "learning_rate": 1.2524321556579622e-05, |
| "loss": 0.0001, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.893894790733393, |
| "grad_norm": 0.00344941681643163, |
| "learning_rate": 1.2421915002560165e-05, |
| "loss": 0.0001, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.919493152438244, |
| "grad_norm": 0.0036839082828609084, |
| "learning_rate": 1.2319508448540707e-05, |
| "loss": 0.0001, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.945091514143095, |
| "grad_norm": 0.009934710271474315, |
| "learning_rate": 1.2217101894521251e-05, |
| "loss": 0.0001, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.9706898758479459, |
| "grad_norm": 0.0024663729558732648, |
| "learning_rate": 1.2114695340501794e-05, |
| "loss": 0.0001, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.9962882375527966, |
| "grad_norm": 0.003817898440024657, |
| "learning_rate": 1.2012288786482335e-05, |
| "loss": 0.0001, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.021502623832075, |
| "grad_norm": 0.0031639489328900696, |
| "learning_rate": 1.190988223246288e-05, |
| "loss": 0.0001, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.0471009855369258, |
| "grad_norm": 0.002020596329737904, |
| "learning_rate": 1.1807475678443421e-05, |
| "loss": 0.0001, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.0726993472417767, |
| "grad_norm": 0.0041297671592259375, |
| "learning_rate": 1.1705069124423964e-05, |
| "loss": 0.0001, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.0982977089466273, |
| "grad_norm": 0.0030187753698489852, |
| "learning_rate": 1.1602662570404507e-05, |
| "loss": 0.0001, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.1238960706514782, |
| "grad_norm": 0.006719688660763743, |
| "learning_rate": 1.150025601638505e-05, |
| "loss": 0.0001, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.149494432356329, |
| "grad_norm": 0.007455082822481147, |
| "learning_rate": 1.1397849462365593e-05, |
| "loss": 0.0001, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.17509279406118, |
| "grad_norm": 0.0020929393058777236, |
| "learning_rate": 1.1295442908346135e-05, |
| "loss": 0.0001, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.200691155766031, |
| "grad_norm": 0.004647943941373522, |
| "learning_rate": 1.1193036354326678e-05, |
| "loss": 0.0001, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.2262895174708817, |
| "grad_norm": 0.002919096778092067, |
| "learning_rate": 1.109062980030722e-05, |
| "loss": 0.0001, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.2518878791757326, |
| "grad_norm": 0.0022980302252630044, |
| "learning_rate": 1.0988223246287764e-05, |
| "loss": 0.0001, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.2774862408805836, |
| "grad_norm": 0.0007777129612344223, |
| "learning_rate": 1.0885816692268305e-05, |
| "loss": 0.0001, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.3030846025854346, |
| "grad_norm": 0.001856334209823885, |
| "learning_rate": 1.0783410138248848e-05, |
| "loss": 0.0001, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.3286829642902855, |
| "grad_norm": 0.009562277401713636, |
| "learning_rate": 1.0681003584229393e-05, |
| "loss": 0.0115, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.354281325995136, |
| "grad_norm": 0.008162550932912013, |
| "learning_rate": 1.0578597030209934e-05, |
| "loss": 0.0001, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.379879687699987, |
| "grad_norm": 0.006086517667163692, |
| "learning_rate": 1.0476190476190477e-05, |
| "loss": 0.0001, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.405478049404838, |
| "grad_norm": 0.0027507057924501116, |
| "learning_rate": 1.037378392217102e-05, |
| "loss": 0.0001, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.431076411109689, |
| "grad_norm": 0.005652923712553444, |
| "learning_rate": 1.0271377368151563e-05, |
| "loss": 0.0001, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.45667477281454, |
| "grad_norm": 0.0022548738506609723, |
| "learning_rate": 1.0168970814132104e-05, |
| "loss": 0.0001, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.482273134519391, |
| "grad_norm": 0.0034968078517645545, |
| "learning_rate": 1.0066564260112648e-05, |
| "loss": 0.0001, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.507871496224242, |
| "grad_norm": 0.0029725026316189704, |
| "learning_rate": 9.96415770609319e-06, |
| "loss": 0.0001, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.5334698579290924, |
| "grad_norm": 0.002417471051371214, |
| "learning_rate": 9.861751152073733e-06, |
| "loss": 0.0, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.5590682196339434, |
| "grad_norm": 0.0037793013717612994, |
| "learning_rate": 9.759344598054277e-06, |
| "loss": 0.0, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.5846665813387943, |
| "grad_norm": 0.0020282875993942345, |
| "learning_rate": 9.65693804403482e-06, |
| "loss": 0.0, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.6102649430436453, |
| "grad_norm": 0.006653751475623009, |
| "learning_rate": 9.554531490015361e-06, |
| "loss": 0.0, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.6358633047484963, |
| "grad_norm": 0.0006177303859045243, |
| "learning_rate": 9.452124935995904e-06, |
| "loss": 0.0, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.661461666453347, |
| "grad_norm": 0.001574016672401847, |
| "learning_rate": 9.349718381976447e-06, |
| "loss": 0.0, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.6870600281581978, |
| "grad_norm": 0.002722823477162341, |
| "learning_rate": 9.24731182795699e-06, |
| "loss": 0.0, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.7126583898630487, |
| "grad_norm": 0.0031352467257030996, |
| "learning_rate": 9.144905273937533e-06, |
| "loss": 0.0, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.7382567515678997, |
| "grad_norm": 0.002855241030593728, |
| "learning_rate": 9.042498719918076e-06, |
| "loss": 0.0, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.7638551132727507, |
| "grad_norm": 0.001616961495391244, |
| "learning_rate": 8.940092165898619e-06, |
| "loss": 0.0, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.789453474977601, |
| "grad_norm": 0.005071888691549564, |
| "learning_rate": 8.837685611879161e-06, |
| "loss": 0.0001, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.815051836682452, |
| "grad_norm": 0.0029692529220848914, |
| "learning_rate": 8.735279057859704e-06, |
| "loss": 0.0, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.840650198387303, |
| "grad_norm": 0.0004172545224757254, |
| "learning_rate": 8.632872503840246e-06, |
| "loss": 0.0, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.866248560092154, |
| "grad_norm": 0.008642812223624062, |
| "learning_rate": 8.530465949820788e-06, |
| "loss": 0.0, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.891846921797005, |
| "grad_norm": 0.0027205512248527812, |
| "learning_rate": 8.428059395801333e-06, |
| "loss": 0.0001, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.9174452835018556, |
| "grad_norm": 0.0019007511180201269, |
| "learning_rate": 8.325652841781874e-06, |
| "loss": 0.0, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.943043645206707, |
| "grad_norm": 0.0011932680335579203, |
| "learning_rate": 8.223246287762417e-06, |
| "loss": 0.0, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.9686420069115576, |
| "grad_norm": 0.0017683528985724605, |
| "learning_rate": 8.12083973374296e-06, |
| "loss": 0.0, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.9942403686164085, |
| "grad_norm": 0.0010172759283751088, |
| "learning_rate": 8.018433179723503e-06, |
| "loss": 0.0, |
| "step": 5850 |
| }, |
| { |
| "epoch": 3.0194547548956865, |
| "grad_norm": 0.0035687260604544088, |
| "learning_rate": 7.916026625704046e-06, |
| "loss": 0.0, |
| "step": 5900 |
| }, |
| { |
| "epoch": 3.0450531166005375, |
| "grad_norm": 0.0017674951945728509, |
| "learning_rate": 7.813620071684589e-06, |
| "loss": 0.0, |
| "step": 5950 |
| }, |
| { |
| "epoch": 3.0706514783053884, |
| "grad_norm": 0.001417796923981359, |
| "learning_rate": 7.711213517665132e-06, |
| "loss": 0.0, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.0962498400102394, |
| "grad_norm": 0.001392466393900218, |
| "learning_rate": 7.6088069636456744e-06, |
| "loss": 0.0, |
| "step": 6050 |
| }, |
| { |
| "epoch": 3.1218482017150904, |
| "grad_norm": 0.0010317131953453697, |
| "learning_rate": 7.5064004096262165e-06, |
| "loss": 0.0, |
| "step": 6100 |
| }, |
| { |
| "epoch": 3.1474465634199413, |
| "grad_norm": 0.002671567960001357, |
| "learning_rate": 7.403993855606759e-06, |
| "loss": 0.0, |
| "step": 6150 |
| }, |
| { |
| "epoch": 3.173044925124792, |
| "grad_norm": 0.0010993308683270022, |
| "learning_rate": 7.301587301587301e-06, |
| "loss": 0.0, |
| "step": 6200 |
| }, |
| { |
| "epoch": 3.198643286829643, |
| "grad_norm": 0.0011792521113742672, |
| "learning_rate": 7.199180747567845e-06, |
| "loss": 0.0, |
| "step": 6250 |
| }, |
| { |
| "epoch": 3.224241648534494, |
| "grad_norm": 0.002377211855286283, |
| "learning_rate": 7.096774193548388e-06, |
| "loss": 0.0, |
| "step": 6300 |
| }, |
| { |
| "epoch": 3.2498400102393448, |
| "grad_norm": 0.0013709631857059255, |
| "learning_rate": 6.994367639528931e-06, |
| "loss": 0.0, |
| "step": 6350 |
| }, |
| { |
| "epoch": 3.2754383719441957, |
| "grad_norm": 0.0010686686441566805, |
| "learning_rate": 6.891961085509473e-06, |
| "loss": 0.0, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.3010367336490463, |
| "grad_norm": 0.0013656971047177361, |
| "learning_rate": 6.789554531490016e-06, |
| "loss": 0.0, |
| "step": 6450 |
| }, |
| { |
| "epoch": 3.3266350953538972, |
| "grad_norm": 0.0030691234388346175, |
| "learning_rate": 6.687147977470559e-06, |
| "loss": 0.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.352233457058748, |
| "grad_norm": 0.0016958004183478155, |
| "learning_rate": 6.584741423451101e-06, |
| "loss": 0.0, |
| "step": 6550 |
| }, |
| { |
| "epoch": 3.377831818763599, |
| "grad_norm": 0.002679660529985062, |
| "learning_rate": 6.4823348694316445e-06, |
| "loss": 0.0, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.40343018046845, |
| "grad_norm": 0.0009123671464204819, |
| "learning_rate": 6.379928315412187e-06, |
| "loss": 0.0, |
| "step": 6650 |
| }, |
| { |
| "epoch": 3.4290285421733007, |
| "grad_norm": 0.0011492363622442438, |
| "learning_rate": 6.2775217613927295e-06, |
| "loss": 0.0, |
| "step": 6700 |
| }, |
| { |
| "epoch": 3.4546269038781516, |
| "grad_norm": 0.0008232117328172145, |
| "learning_rate": 6.175115207373272e-06, |
| "loss": 0.0, |
| "step": 6750 |
| }, |
| { |
| "epoch": 3.4802252655830026, |
| "grad_norm": 0.0022449544565699437, |
| "learning_rate": 6.072708653353815e-06, |
| "loss": 0.0, |
| "step": 6800 |
| }, |
| { |
| "epoch": 3.5058236272878536, |
| "grad_norm": 0.0015276485422571994, |
| "learning_rate": 5.970302099334357e-06, |
| "loss": 0.0, |
| "step": 6850 |
| }, |
| { |
| "epoch": 3.5314219889927045, |
| "grad_norm": 0.0011827584209431721, |
| "learning_rate": 5.867895545314901e-06, |
| "loss": 0.0, |
| "step": 6900 |
| }, |
| { |
| "epoch": 3.5570203506975555, |
| "grad_norm": 0.003917245208198937, |
| "learning_rate": 5.765488991295444e-06, |
| "loss": 0.0, |
| "step": 6950 |
| }, |
| { |
| "epoch": 3.5826187124024065, |
| "grad_norm": 0.0016006551963278512, |
| "learning_rate": 5.663082437275986e-06, |
| "loss": 0.0, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.608217074107257, |
| "grad_norm": 0.0006324168438582504, |
| "learning_rate": 5.560675883256529e-06, |
| "loss": 0.0, |
| "step": 7050 |
| }, |
| { |
| "epoch": 3.633815435812108, |
| "grad_norm": 0.0019190937453439484, |
| "learning_rate": 5.458269329237072e-06, |
| "loss": 0.0, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.659413797516959, |
| "grad_norm": 0.0014235404292782222, |
| "learning_rate": 5.355862775217614e-06, |
| "loss": 0.0, |
| "step": 7150 |
| }, |
| { |
| "epoch": 3.68501215922181, |
| "grad_norm": 0.002036273934913596, |
| "learning_rate": 5.253456221198157e-06, |
| "loss": 0.0, |
| "step": 7200 |
| }, |
| { |
| "epoch": 3.710610520926661, |
| "grad_norm": 0.001317406088761277, |
| "learning_rate": 5.1510496671787e-06, |
| "loss": 0.0, |
| "step": 7250 |
| }, |
| { |
| "epoch": 3.7362088826315114, |
| "grad_norm": 0.0036103172590374257, |
| "learning_rate": 5.0486431131592425e-06, |
| "loss": 0.0, |
| "step": 7300 |
| }, |
| { |
| "epoch": 3.7618072443363624, |
| "grad_norm": 0.004126819254015297, |
| "learning_rate": 4.946236559139785e-06, |
| "loss": 0.0, |
| "step": 7350 |
| }, |
| { |
| "epoch": 3.7874056060412133, |
| "grad_norm": 0.0016332834938483983, |
| "learning_rate": 4.843830005120328e-06, |
| "loss": 0.0, |
| "step": 7400 |
| }, |
| { |
| "epoch": 3.8130039677460643, |
| "grad_norm": 0.0004651327688046216, |
| "learning_rate": 4.741423451100871e-06, |
| "loss": 0.0, |
| "step": 7450 |
| }, |
| { |
| "epoch": 3.8386023294509153, |
| "grad_norm": 0.0014442256648981874, |
| "learning_rate": 4.639016897081414e-06, |
| "loss": 0.0, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.864200691155766, |
| "grad_norm": 0.0044845026985442255, |
| "learning_rate": 4.536610343061956e-06, |
| "loss": 0.0, |
| "step": 7550 |
| }, |
| { |
| "epoch": 3.889799052860617, |
| "grad_norm": 0.0018809502401269417, |
| "learning_rate": 4.434203789042499e-06, |
| "loss": 0.0, |
| "step": 7600 |
| }, |
| { |
| "epoch": 3.9153974145654677, |
| "grad_norm": 0.0038926669396500383, |
| "learning_rate": 4.331797235023042e-06, |
| "loss": 0.0, |
| "step": 7650 |
| }, |
| { |
| "epoch": 3.9409957762703187, |
| "grad_norm": 0.004267638189231195, |
| "learning_rate": 4.229390681003585e-06, |
| "loss": 0.0, |
| "step": 7700 |
| }, |
| { |
| "epoch": 3.9665941379751697, |
| "grad_norm": 0.00238145784225052, |
| "learning_rate": 4.126984126984127e-06, |
| "loss": 0.0, |
| "step": 7750 |
| }, |
| { |
| "epoch": 3.99219249968002, |
| "grad_norm": 0.0014756463100693462, |
| "learning_rate": 4.0245775729646705e-06, |
| "loss": 0.0, |
| "step": 7800 |
| }, |
| { |
| "epoch": 4.017406885959298, |
| "grad_norm": 0.0014856407081637794, |
| "learning_rate": 3.9221710189452126e-06, |
| "loss": 0.0, |
| "step": 7850 |
| }, |
| { |
| "epoch": 4.04300524766415, |
| "grad_norm": 0.0023551139030403893, |
| "learning_rate": 3.8197644649257554e-06, |
| "loss": 0.0, |
| "step": 7900 |
| }, |
| { |
| "epoch": 4.068603609369, |
| "grad_norm": 0.0011981597110419454, |
| "learning_rate": 3.7173579109062983e-06, |
| "loss": 0.0, |
| "step": 7950 |
| }, |
| { |
| "epoch": 4.0942019710738515, |
| "grad_norm": 0.0028843508232214783, |
| "learning_rate": 3.6149513568868412e-06, |
| "loss": 0.0, |
| "step": 8000 |
| }, |
| { |
| "epoch": 4.119800332778702, |
| "grad_norm": 0.0018843735220208588, |
| "learning_rate": 3.5125448028673837e-06, |
| "loss": 0.0, |
| "step": 8050 |
| }, |
| { |
| "epoch": 4.1453986944835535, |
| "grad_norm": 0.0009664312361068302, |
| "learning_rate": 3.4101382488479266e-06, |
| "loss": 0.0, |
| "step": 8100 |
| }, |
| { |
| "epoch": 4.170997056188404, |
| "grad_norm": 0.0007573250297130035, |
| "learning_rate": 3.3077316948284695e-06, |
| "loss": 0.0, |
| "step": 8150 |
| }, |
| { |
| "epoch": 4.1965954178932545, |
| "grad_norm": 0.0011501400163836143, |
| "learning_rate": 3.205325140809012e-06, |
| "loss": 0.0, |
| "step": 8200 |
| }, |
| { |
| "epoch": 4.222193779598106, |
| "grad_norm": 0.001612883833598221, |
| "learning_rate": 3.1029185867895553e-06, |
| "loss": 0.0, |
| "step": 8250 |
| }, |
| { |
| "epoch": 4.2477921413029565, |
| "grad_norm": 0.0018641211713703483, |
| "learning_rate": 3.0005120327700977e-06, |
| "loss": 0.0, |
| "step": 8300 |
| }, |
| { |
| "epoch": 4.273390503007808, |
| "grad_norm": 0.0009646184071954378, |
| "learning_rate": 2.89810547875064e-06, |
| "loss": 0.0, |
| "step": 8350 |
| }, |
| { |
| "epoch": 4.298988864712658, |
| "grad_norm": 0.0009154804456547847, |
| "learning_rate": 2.7956989247311827e-06, |
| "loss": 0.0, |
| "step": 8400 |
| }, |
| { |
| "epoch": 4.324587226417509, |
| "grad_norm": 0.0020826965736280532, |
| "learning_rate": 2.693292370711726e-06, |
| "loss": 0.0, |
| "step": 8450 |
| }, |
| { |
| "epoch": 4.35018558812236, |
| "grad_norm": 0.0011199777755488004, |
| "learning_rate": 2.5908858166922684e-06, |
| "loss": 0.0, |
| "step": 8500 |
| }, |
| { |
| "epoch": 4.375783949827211, |
| "grad_norm": 0.0008422274985046506, |
| "learning_rate": 2.4884792626728113e-06, |
| "loss": 0.0, |
| "step": 8550 |
| }, |
| { |
| "epoch": 4.401382311532062, |
| "grad_norm": 0.0006334420803463363, |
| "learning_rate": 2.386072708653354e-06, |
| "loss": 0.0, |
| "step": 8600 |
| }, |
| { |
| "epoch": 4.426980673236913, |
| "grad_norm": 0.001586741258322779, |
| "learning_rate": 2.2836661546338967e-06, |
| "loss": 0.0, |
| "step": 8650 |
| }, |
| { |
| "epoch": 4.452579034941763, |
| "grad_norm": 0.0009727630299961603, |
| "learning_rate": 2.1812596006144396e-06, |
| "loss": 0.0, |
| "step": 8700 |
| }, |
| { |
| "epoch": 4.478177396646615, |
| "grad_norm": 0.0018829318719699433, |
| "learning_rate": 2.078853046594982e-06, |
| "loss": 0.0, |
| "step": 8750 |
| }, |
| { |
| "epoch": 4.503775758351465, |
| "grad_norm": 0.0010542211381998285, |
| "learning_rate": 1.976446492575525e-06, |
| "loss": 0.0, |
| "step": 8800 |
| }, |
| { |
| "epoch": 4.529374120056317, |
| "grad_norm": 0.0008019247102530823, |
| "learning_rate": 1.8740399385560678e-06, |
| "loss": 0.0, |
| "step": 8850 |
| }, |
| { |
| "epoch": 4.554972481761167, |
| "grad_norm": 0.0025552327229986455, |
| "learning_rate": 1.7716333845366105e-06, |
| "loss": 0.0, |
| "step": 8900 |
| }, |
| { |
| "epoch": 4.580570843466019, |
| "grad_norm": 0.003395542465106546, |
| "learning_rate": 1.6692268305171534e-06, |
| "loss": 0.0, |
| "step": 8950 |
| }, |
| { |
| "epoch": 4.606169205170869, |
| "grad_norm": 0.0013482230913774654, |
| "learning_rate": 1.5668202764976959e-06, |
| "loss": 0.0, |
| "step": 9000 |
| }, |
| { |
| "epoch": 4.63176756687572, |
| "grad_norm": 0.0015382731152315148, |
| "learning_rate": 1.4644137224782387e-06, |
| "loss": 0.0, |
| "step": 9050 |
| }, |
| { |
| "epoch": 4.657365928580571, |
| "grad_norm": 0.0019761534195212524, |
| "learning_rate": 1.3620071684587816e-06, |
| "loss": 0.0, |
| "step": 9100 |
| }, |
| { |
| "epoch": 4.682964290285422, |
| "grad_norm": 0.00038991149426242233, |
| "learning_rate": 1.259600614439324e-06, |
| "loss": 0.0, |
| "step": 9150 |
| }, |
| { |
| "epoch": 4.708562651990272, |
| "grad_norm": 0.002601115466918085, |
| "learning_rate": 1.157194060419867e-06, |
| "loss": 0.0, |
| "step": 9200 |
| }, |
| { |
| "epoch": 4.7341610136951235, |
| "grad_norm": 0.002305779477709061, |
| "learning_rate": 1.0547875064004097e-06, |
| "loss": 0.0, |
| "step": 9250 |
| }, |
| { |
| "epoch": 4.759759375399974, |
| "grad_norm": 0.0014673554584682805, |
| "learning_rate": 9.523809523809525e-07, |
| "loss": 0.0, |
| "step": 9300 |
| }, |
| { |
| "epoch": 4.7853577371048255, |
| "grad_norm": 0.0032349538713087054, |
| "learning_rate": 8.499743983614952e-07, |
| "loss": 0.0, |
| "step": 9350 |
| }, |
| { |
| "epoch": 4.810956098809676, |
| "grad_norm": 0.000820434912366508, |
| "learning_rate": 7.475678443420379e-07, |
| "loss": 0.0, |
| "step": 9400 |
| }, |
| { |
| "epoch": 4.836554460514527, |
| "grad_norm": 0.0018281915833400296, |
| "learning_rate": 6.451612903225807e-07, |
| "loss": 0.0, |
| "step": 9450 |
| }, |
| { |
| "epoch": 4.862152822219378, |
| "grad_norm": 0.001079014832848559, |
| "learning_rate": 5.427547363031235e-07, |
| "loss": 0.0, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.8877511839242285, |
| "grad_norm": 0.0009643703560768136, |
| "learning_rate": 4.4034818228366616e-07, |
| "loss": 0.0, |
| "step": 9550 |
| }, |
| { |
| "epoch": 4.91334954562908, |
| "grad_norm": 0.0012576968463295232, |
| "learning_rate": 3.3794162826420895e-07, |
| "loss": 0.0, |
| "step": 9600 |
| }, |
| { |
| "epoch": 4.93894790733393, |
| "grad_norm": 0.0017081309137603447, |
| "learning_rate": 2.355350742447517e-07, |
| "loss": 0.0, |
| "step": 9650 |
| }, |
| { |
| "epoch": 4.964546269038782, |
| "grad_norm": 0.0014382920472094229, |
| "learning_rate": 1.3312852022529444e-07, |
| "loss": 0.0, |
| "step": 9700 |
| }, |
| { |
| "epoch": 4.990144630743632, |
| "grad_norm": 0.0014054516528993024, |
| "learning_rate": 3.0721966205837177e-08, |
| "loss": 0.0, |
| "step": 9750 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 9765, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.471170805648589e+16, |
| "train_batch_size": 6, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|