| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.384, |
| "eval_steps": 500, |
| "global_step": 60, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0064, |
| "grad_norm": 6.651687052805929, |
| "learning_rate": 3.125e-07, |
| "loss": 0.4684, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 6.265915305107489, |
| "learning_rate": 6.25e-07, |
| "loss": 0.4563, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 6.401273380714525, |
| "learning_rate": 9.375000000000001e-07, |
| "loss": 0.4547, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 6.56315483941447, |
| "learning_rate": 1.25e-06, |
| "loss": 0.4494, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 6.59719209633345, |
| "learning_rate": 1.5625e-06, |
| "loss": 0.4575, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 7.53537793152979, |
| "learning_rate": 1.8750000000000003e-06, |
| "loss": 0.4427, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 9.571011007558802, |
| "learning_rate": 2.1875000000000002e-06, |
| "loss": 0.4378, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 10.148947574276539, |
| "learning_rate": 2.5e-06, |
| "loss": 0.4369, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 5.2264536848502035, |
| "learning_rate": 2.8125e-06, |
| "loss": 0.4002, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 4.788413945220771, |
| "learning_rate": 3.125e-06, |
| "loss": 0.3976, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 4.514781509921602, |
| "learning_rate": 3.4375e-06, |
| "loss": 0.3942, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 4.538614240888282, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.3761, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 3.0503754653246355, |
| "learning_rate": 4.0625000000000005e-06, |
| "loss": 0.3664, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 3.0554055571037093, |
| "learning_rate": 4.3750000000000005e-06, |
| "loss": 0.3585, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.9763698617399021, |
| "learning_rate": 4.6875000000000004e-06, |
| "loss": 0.3561, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 1.5108193822789298, |
| "learning_rate": 5e-06, |
| "loss": 0.3205, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 1.3652592836546582, |
| "learning_rate": 4.999370587356267e-06, |
| "loss": 0.2957, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 1.8791692065602286, |
| "learning_rate": 4.997482666353287e-06, |
| "loss": 0.2974, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 1.6499778792766742, |
| "learning_rate": 4.99433718761614e-06, |
| "loss": 0.2882, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.7803281647114307, |
| "learning_rate": 4.989935734988098e-06, |
| "loss": 0.2822, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 0.6228676679830198, |
| "learning_rate": 4.984280524733107e-06, |
| "loss": 0.2708, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 0.6503139687960523, |
| "learning_rate": 4.977374404419838e-06, |
| "loss": 0.2624, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 0.6050613097065756, |
| "learning_rate": 4.9692208514878445e-06, |
| "loss": 0.2595, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 0.513863899627231, |
| "learning_rate": 4.959823971496575e-06, |
| "loss": 0.24, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5401602458085882, |
| "learning_rate": 4.949188496058089e-06, |
| "loss": 0.2564, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 0.4588626341682895, |
| "learning_rate": 4.937319780454559e-06, |
| "loss": 0.2559, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 0.5618821751922192, |
| "learning_rate": 4.924223800941718e-06, |
| "loss": 0.2396, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 0.47667362996506163, |
| "learning_rate": 4.909907151739634e-06, |
| "loss": 0.2417, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 0.4186548264796007, |
| "learning_rate": 4.894377041712327e-06, |
| "loss": 0.2439, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.4531599370801774, |
| "learning_rate": 4.8776412907378845e-06, |
| "loss": 0.2415, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 0.3909364624586131, |
| "learning_rate": 4.859708325770919e-06, |
| "loss": 0.2302, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 0.31305049271337076, |
| "learning_rate": 4.8405871765993435e-06, |
| "loss": 0.2333, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 0.3219167869220544, |
| "learning_rate": 4.820287471297598e-06, |
| "loss": 0.2341, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 0.3118803828567028, |
| "learning_rate": 4.7988194313786275e-06, |
| "loss": 0.2189, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.29310345514929165, |
| "learning_rate": 4.7761938666470405e-06, |
| "loss": 0.2114, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 0.3347074091997152, |
| "learning_rate": 4.752422169756048e-06, |
| "loss": 0.2249, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 0.29649389449972713, |
| "learning_rate": 4.72751631047092e-06, |
| "loss": 0.2206, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 0.2837151778804135, |
| "learning_rate": 4.701488829641845e-06, |
| "loss": 0.2304, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 0.2806309744043982, |
| "learning_rate": 4.674352832889239e-06, |
| "loss": 0.2207, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.27783183257142374, |
| "learning_rate": 4.646121984004666e-06, |
| "loss": 0.2155, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 0.27666228945567495, |
| "learning_rate": 4.6168104980707105e-06, |
| "loss": 0.2127, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 0.275485776092337, |
| "learning_rate": 4.586433134303257e-06, |
| "loss": 0.2238, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 0.27153628095668647, |
| "learning_rate": 4.555005188619776e-06, |
| "loss": 0.21, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 0.274290824493659, |
| "learning_rate": 4.522542485937369e-06, |
| "loss": 0.2131, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.2694663744414641, |
| "learning_rate": 4.4890613722044526e-06, |
| "loss": 0.2108, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 0.2642992894492684, |
| "learning_rate": 4.454578706170075e-06, |
| "loss": 0.2069, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 0.26815076119929016, |
| "learning_rate": 4.4191118508950286e-06, |
| "loss": 0.2063, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 0.25684981864747414, |
| "learning_rate": 4.382678665009028e-06, |
| "loss": 0.2121, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 0.2536186069276322, |
| "learning_rate": 4.345297493718352e-06, |
| "loss": 0.2074, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.256667169168501, |
| "learning_rate": 4.3069871595684795e-06, |
| "loss": 0.2106, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 0.2631748867077966, |
| "learning_rate": 4.267766952966369e-06, |
| "loss": 0.2031, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 0.2567671139005443, |
| "learning_rate": 4.227656622467162e-06, |
| "loss": 0.2063, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 0.26280340427063503, |
| "learning_rate": 4.186676364830187e-06, |
| "loss": 0.212, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 0.2599237100252332, |
| "learning_rate": 4.144846814849282e-06, |
| "loss": 0.2161, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.25711041735736345, |
| "learning_rate": 4.102189034962561e-06, |
| "loss": 0.1959, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 0.26568396343464284, |
| "learning_rate": 4.058724504646834e-06, |
| "loss": 0.2053, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 0.26965052490303654, |
| "learning_rate": 4.01447510960205e-06, |
| "loss": 0.197, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 0.2474576437319212, |
| "learning_rate": 3.969463130731183e-06, |
| "loss": 0.1963, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 0.26050605159298984, |
| "learning_rate": 3.92371123292113e-06, |
| "loss": 0.1985, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.25523756004269815, |
| "learning_rate": 3.8772424536302565e-06, |
| "loss": 0.1957, |
| "step": 60 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 121473953169408.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|