| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.999679630934837, |
| "eval_steps": 500, |
| "global_step": 39015, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9800000000000004e-05, |
| "loss": 0.049, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.997937713868709e-05, |
| "loss": 0.0574, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.991721127523219e-05, |
| "loss": 0.0621, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.981360546534587e-05, |
| "loss": 0.0639, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.966873201640091e-05, |
| "loss": 0.0679, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.948283186819981e-05, |
| "loss": 0.0746, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.9256214192266534e-05, |
| "loss": 0.0778, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.898982977024972e-05, |
| "loss": 0.0752, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.868305410275411e-05, |
| "loss": 0.0763, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.833689102217645e-05, |
| "loss": 0.0769, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.795191623417485e-05, |
| "loss": 0.0755, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.752965392266396e-05, |
| "loss": 0.0751, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.706911415528901e-05, |
| "loss": 0.0783, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.6571871126931825e-05, |
| "loss": 0.078, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.6038751805114225e-05, |
| "loss": 0.0785, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 4.547064282341224e-05, |
| "loss": 0.0719, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 4.486848900689111e-05, |
| "loss": 0.0593, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 4.423329180076173e-05, |
| "loss": 0.0636, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 4.356747316744135e-05, |
| "loss": 0.0639, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 4.286947219232885e-05, |
| "loss": 0.0614, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 4.214175240309113e-05, |
| "loss": 0.0657, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 4.138552407437527e-05, |
| "loss": 0.0625, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 4.0602044893531846e-05, |
| "loss": 0.065, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 3.979426173115841e-05, |
| "loss": 0.0625, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 3.89619724591825e-05, |
| "loss": 0.0635, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.81048191928653e-05, |
| "loss": 0.0627, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 3.7225871232136e-05, |
| "loss": 0.0669, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 3.6326590360017014e-05, |
| "loss": 0.0638, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 3.540847217530374e-05, |
| "loss": 0.0632, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 3.447304360522618e-05, |
| "loss": 0.0666, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 3.352186036600821e-05, |
| "loss": 0.0626, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 3.2556504375547626e-05, |
| "loss": 0.0514, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 3.158054843335624e-05, |
| "loss": 0.0463, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 3.0591704553180736e-05, |
| "loss": 0.0467, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 2.959356107945986e-05, |
| "loss": 0.0476, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2.858979611033543e-05, |
| "loss": 0.0505, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.75780564653496e-05, |
| "loss": 0.0502, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 2.656202924096166e-05, |
| "loss": 0.047, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 2.554340419744563e-05, |
| "loss": 0.0461, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2.4523875415522277e-05, |
| "loss": 0.0465, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 2.350513847892453e-05, |
| "loss": 0.05, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 2.2488887654463146e-05, |
| "loss": 0.0471, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 2.1478831935354516e-05, |
| "loss": 0.0478, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 2.047260339339565e-05, |
| "loss": 0.0467, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.9473904388782642e-05, |
| "loss": 0.0458, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.8484395863135113e-05, |
| "loss": 0.0451, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 1.750572347333834e-05, |
| "loss": 0.0443, |
| "step": 23500 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 1.6541433761257506e-05, |
| "loss": 0.0307, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 1.558926609041298e-05, |
| "loss": 0.0313, |
| "step": 24500 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 1.4652749461123058e-05, |
| "loss": 0.0362, |
| "step": 25000 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 1.3733441399168673e-05, |
| "loss": 0.035, |
| "step": 25500 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 1.2832870810670993e-05, |
| "loss": 0.0324, |
| "step": 26000 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 1.1952535439359563e-05, |
| "loss": 0.0335, |
| "step": 26500 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 1.1095594036489903e-05, |
| "loss": 0.0322, |
| "step": 27000 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 1.0260037628024472e-05, |
| "loss": 0.0332, |
| "step": 27500 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 9.448995329278178e-06, |
| "loss": 0.033, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 8.66381598900628e-06, |
| "loss": 0.035, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 7.905805443140457e-06, |
| "loss": 0.035, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 7.177654318094604e-06, |
| "loss": 0.0349, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 6.4776555698389505e-06, |
| "loss": 0.0333, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 5.808461430995301e-06, |
| "loss": 0.0313, |
| "step": 30500 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 5.171184841890728e-06, |
| "loss": 0.0358, |
| "step": 31000 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 4.568060670213628e-06, |
| "loss": 0.0301, |
| "step": 31500 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 3.99767497937713e-06, |
| "loss": 0.028, |
| "step": 32000 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 3.4622183668462316e-06, |
| "loss": 0.028, |
| "step": 32500 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 2.962581353357577e-06, |
| "loss": 0.0269, |
| "step": 33000 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 2.5004837614951537e-06, |
| "loss": 0.0273, |
| "step": 33500 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 2.0748422709038623e-06, |
| "loss": 0.0277, |
| "step": 34000 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 1.6873277319096742e-06, |
| "loss": 0.027, |
| "step": 34500 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 1.3385846220008548e-06, |
| "loss": 0.0271, |
| "step": 35000 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 1.0297720953505841e-06, |
| "loss": 0.0297, |
| "step": 35500 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 7.601661860652421e-07, |
| "loss": 0.0276, |
| "step": 36000 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 5.308736731572172e-07, |
| "loss": 0.0283, |
| "step": 36500 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 3.4227589422272943e-07, |
| "loss": 0.0305, |
| "step": 37000 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 1.9468650722975211e-07, |
| "loss": 0.0302, |
| "step": 37500 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 8.852233960627565e-08, |
| "loss": 0.028, |
| "step": 38000 |
| }, |
| { |
| "epoch": 4.93, |
| "learning_rate": 2.3534504957314906e-08, |
| "loss": 0.0344, |
| "step": 38500 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 9.056858269462787e-11, |
| "loss": 0.0306, |
| "step": 39000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 39015, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "total_flos": 3.307647360088965e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|