| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 331, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.015105740181268883, |
| "grad_norm": 65.32524871826172, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 1.7699, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.030211480362537766, |
| "grad_norm": 1.1226208209991455, |
| "learning_rate": 5.294117647058824e-06, |
| "loss": 0.3128, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.045317220543806644, |
| "grad_norm": 1.0650861263275146, |
| "learning_rate": 8.23529411764706e-06, |
| "loss": 0.0691, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06042296072507553, |
| "grad_norm": 0.5207144021987915, |
| "learning_rate": 1.1176470588235295e-05, |
| "loss": 0.0675, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0755287009063444, |
| "grad_norm": 0.518444299697876, |
| "learning_rate": 1.4117647058823532e-05, |
| "loss": 0.0669, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.09063444108761329, |
| "grad_norm": 0.43817996978759766, |
| "learning_rate": 1.7058823529411767e-05, |
| "loss": 0.0658, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10574018126888217, |
| "grad_norm": 1.0586720705032349, |
| "learning_rate": 2e-05, |
| "loss": 0.0648, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.12084592145015106, |
| "grad_norm": 1.3109872341156006, |
| "learning_rate": 1.9986017152454497e-05, |
| "loss": 0.0657, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13595166163141995, |
| "grad_norm": 0.48269444704055786, |
| "learning_rate": 1.9944107713823068e-05, |
| "loss": 0.0609, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1510574018126888, |
| "grad_norm": 0.9956803917884827, |
| "learning_rate": 1.9874388886763944e-05, |
| "loss": 0.0712, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1661631419939577, |
| "grad_norm": 0.8316070437431335, |
| "learning_rate": 1.9777055644823087e-05, |
| "loss": 0.0669, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.18126888217522658, |
| "grad_norm": 0.09602731466293335, |
| "learning_rate": 1.9652380187177128e-05, |
| "loss": 0.0623, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.19637462235649547, |
| "grad_norm": 0.23348145186901093, |
| "learning_rate": 1.9500711177409456e-05, |
| "loss": 0.0561, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.21148036253776434, |
| "grad_norm": 0.1733577698469162, |
| "learning_rate": 1.932247276844826e-05, |
| "loss": 0.0614, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22658610271903323, |
| "grad_norm": 0.21881505846977234, |
| "learning_rate": 1.9118163416393392e-05, |
| "loss": 0.0567, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.24169184290030213, |
| "grad_norm": 0.3463956117630005, |
| "learning_rate": 1.8888354486549238e-05, |
| "loss": 0.0598, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.256797583081571, |
| "grad_norm": 0.11425940692424774, |
| "learning_rate": 1.863368865556191e-05, |
| "loss": 0.0564, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2719033232628399, |
| "grad_norm": 0.15162110328674316, |
| "learning_rate": 1.8354878114129368e-05, |
| "loss": 0.056, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.28700906344410876, |
| "grad_norm": 0.1482110172510147, |
| "learning_rate": 1.8052702575310588e-05, |
| "loss": 0.0493, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3021148036253776, |
| "grad_norm": 0.36696767807006836, |
| "learning_rate": 1.772800709400383e-05, |
| "loss": 0.048, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.31722054380664655, |
| "grad_norm": 0.25465500354766846, |
| "learning_rate": 1.7381699703691866e-05, |
| "loss": 0.0537, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3323262839879154, |
| "grad_norm": 0.38290655612945557, |
| "learning_rate": 1.7014748877063212e-05, |
| "loss": 0.0537, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3474320241691843, |
| "grad_norm": 0.3702133297920227, |
| "learning_rate": 1.6628180817610963e-05, |
| "loss": 0.051, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.36253776435045315, |
| "grad_norm": 0.1437833160161972, |
| "learning_rate": 1.6223076589783368e-05, |
| "loss": 0.0473, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3776435045317221, |
| "grad_norm": 0.15008145570755005, |
| "learning_rate": 1.5800569095711983e-05, |
| "loss": 0.0509, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.39274924471299094, |
| "grad_norm": 0.15623906254768372, |
| "learning_rate": 1.5361839906972095e-05, |
| "loss": 0.0577, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4078549848942598, |
| "grad_norm": 0.10994814336299896, |
| "learning_rate": 1.4908115960235683e-05, |
| "loss": 0.0513, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4229607250755287, |
| "grad_norm": 0.15456193685531616, |
| "learning_rate": 1.4440666126057743e-05, |
| "loss": 0.0572, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4380664652567976, |
| "grad_norm": 0.16252553462982178, |
| "learning_rate": 1.396079766039157e-05, |
| "loss": 0.0538, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.45317220543806647, |
| "grad_norm": 0.4133623540401459, |
| "learning_rate": 1.3469852548756626e-05, |
| "loss": 0.0512, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.46827794561933533, |
| "grad_norm": 0.1537550389766693, |
| "learning_rate": 1.296920375328275e-05, |
| "loss": 0.0494, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.48338368580060426, |
| "grad_norm": 0.5091661214828491, |
| "learning_rate": 1.2460251373126136e-05, |
| "loss": 0.0513, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4984894259818731, |
| "grad_norm": 0.18158994615077972, |
| "learning_rate": 1.194441872899471e-05, |
| "loss": 0.046, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.513595166163142, |
| "grad_norm": 0.39592429995536804, |
| "learning_rate": 1.1423148382732854e-05, |
| "loss": 0.05, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5287009063444109, |
| "grad_norm": 0.22556884586811066, |
| "learning_rate": 1.0897898103096917e-05, |
| "loss": 0.0485, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5438066465256798, |
| "grad_norm": 0.35634785890579224, |
| "learning_rate": 1.0370136789003582e-05, |
| "loss": 0.0458, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5589123867069486, |
| "grad_norm": 0.5002294778823853, |
| "learning_rate": 9.841340361651921e-06, |
| "loss": 0.0471, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5740181268882175, |
| "grad_norm": 0.1892608106136322, |
| "learning_rate": 9.312987637007191e-06, |
| "loss": 0.0397, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5891238670694864, |
| "grad_norm": 0.2997090816497803, |
| "learning_rate": 8.786556190189183e-06, |
| "loss": 0.0432, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6042296072507553, |
| "grad_norm": 0.10828253626823425, |
| "learning_rate": 8.263518223330698e-06, |
| "loss": 0.0456, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6193353474320241, |
| "grad_norm": 0.11140269786119461, |
| "learning_rate": 7.745336448461958e-06, |
| "loss": 0.0464, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6344410876132931, |
| "grad_norm": 0.21382257342338562, |
| "learning_rate": 7.233459996934731e-06, |
| "loss": 0.0436, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.649546827794562, |
| "grad_norm": 0.22236904501914978, |
| "learning_rate": 6.729320366825785e-06, |
| "loss": 0.0395, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6646525679758308, |
| "grad_norm": 0.18689888715744019, |
| "learning_rate": 6.234327419653013e-06, |
| "loss": 0.0434, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6797583081570997, |
| "grad_norm": 0.11527472734451294, |
| "learning_rate": 5.749865437599703e-06, |
| "loss": 0.0448, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6948640483383686, |
| "grad_norm": 0.12977807223796844, |
| "learning_rate": 5.277289252273175e-06, |
| "loss": 0.0443, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7099697885196374, |
| "grad_norm": 0.28918567299842834, |
| "learning_rate": 4.817920455824045e-06, |
| "loss": 0.042, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7250755287009063, |
| "grad_norm": 0.1708402782678604, |
| "learning_rate": 4.373043705021899e-06, |
| "loss": 0.036, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7401812688821753, |
| "grad_norm": 0.2525939345359802, |
| "learning_rate": 3.943903128623336e-06, |
| "loss": 0.0392, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7552870090634441, |
| "grad_norm": 0.3220805525779724, |
| "learning_rate": 3.5316988480794255e-06, |
| "loss": 0.036, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.770392749244713, |
| "grad_norm": 0.3768947422504425, |
| "learning_rate": 3.1375836213126653e-06, |
| "loss": 0.0373, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7854984894259819, |
| "grad_norm": 0.15737439692020416, |
| "learning_rate": 2.7626596189492983e-06, |
| "loss": 0.0363, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8006042296072508, |
| "grad_norm": 0.24124516546726227, |
| "learning_rate": 2.4079753420225694e-06, |
| "loss": 0.0345, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8157099697885196, |
| "grad_norm": 0.2828380763530731, |
| "learning_rate": 2.0745226897666858e-06, |
| "loss": 0.0427, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8308157099697885, |
| "grad_norm": 0.4477688670158386, |
| "learning_rate": 1.7632341857016733e-06, |
| "loss": 0.0382, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8459214501510574, |
| "grad_norm": 0.35590699315071106, |
| "learning_rate": 1.4749803697665366e-06, |
| "loss": 0.0423, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8610271903323263, |
| "grad_norm": 0.5138667821884155, |
| "learning_rate": 1.2105673637938054e-06, |
| "loss": 0.0411, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8761329305135952, |
| "grad_norm": 0.2003919929265976, |
| "learning_rate": 9.707346171337895e-07, |
| "loss": 0.0366, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8912386706948641, |
| "grad_norm": 0.4709911346435547, |
| "learning_rate": 7.561528387330797e-07, |
| "loss": 0.036, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9063444108761329, |
| "grad_norm": 0.14394928514957428, |
| "learning_rate": 5.674221214503639e-07, |
| "loss": 0.0324, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9214501510574018, |
| "grad_norm": 0.2846868932247162, |
| "learning_rate": 4.0507026385502747e-07, |
| "loss": 0.036, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9365558912386707, |
| "grad_norm": 0.2303285300731659, |
| "learning_rate": 2.6955129420176193e-07, |
| "loss": 0.0389, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9516616314199395, |
| "grad_norm": 0.27734264731407166, |
| "learning_rate": 1.612442007090076e-07, |
| "loss": 0.038, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9667673716012085, |
| "grad_norm": 0.17142164707183838, |
| "learning_rate": 8.04518716920466e-08, |
| "loss": 0.0294, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9818731117824774, |
| "grad_norm": 0.14783106744289398, |
| "learning_rate": 2.7400248514776184e-08, |
| "loss": 0.0394, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9969788519637462, |
| "grad_norm": 0.2399517148733139, |
| "learning_rate": 2.237693728981416e-09, |
| "loss": 0.0394, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 331, |
| "total_flos": 3.732036479342346e+17, |
| "train_loss": 0.07862781884086817, |
| "train_runtime": 1853.8902, |
| "train_samples_per_second": 11.415, |
| "train_steps_per_second": 0.179 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 331, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.732036479342346e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|