| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9956122856003191, |
| "eval_steps": 500, |
| "global_step": 117, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008509506714532641, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.8719, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.017019013429065283, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.8402, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.025528520143597924, |
| "learning_rate": 1e-05, |
| "loss": 0.8438, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.034038026858130566, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.8171, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04254753357266321, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.8029, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05105704028719585, |
| "learning_rate": 2e-05, |
| "loss": 0.7858, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0595665470017285, |
| "learning_rate": 2.3333333333333336e-05, |
| "loss": 0.7671, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.06807605371626113, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.7989, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.07658556043079377, |
| "learning_rate": 3e-05, |
| "loss": 0.7546, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08509506714532641, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.745, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09360457385985906, |
| "learning_rate": 3.6666666666666666e-05, |
| "loss": 0.7169, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1021140805743917, |
| "learning_rate": 4e-05, |
| "loss": 0.7561, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.11062358728892434, |
| "learning_rate": 4.3333333333333334e-05, |
| "loss": 0.8475, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.119133094003457, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.9401, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12764260071798963, |
| "learning_rate": 5e-05, |
| "loss": 0.8515, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.13615210743252226, |
| "learning_rate": 4.9509803921568634e-05, |
| "loss": 0.8282, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.14466161414705492, |
| "learning_rate": 4.901960784313725e-05, |
| "loss": 0.7526, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.15317112086158755, |
| "learning_rate": 4.8529411764705885e-05, |
| "loss": 0.709, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1616806275761202, |
| "learning_rate": 4.803921568627452e-05, |
| "loss": 0.7005, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.17019013429065283, |
| "learning_rate": 4.7549019607843135e-05, |
| "loss": 0.6978, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17869964100518548, |
| "learning_rate": 4.705882352941177e-05, |
| "loss": 0.6689, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1872091477197181, |
| "learning_rate": 4.656862745098039e-05, |
| "loss": 0.7231, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.19571865443425077, |
| "learning_rate": 4.607843137254902e-05, |
| "loss": 0.6685, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.2042281611487834, |
| "learning_rate": 4.558823529411765e-05, |
| "loss": 0.6776, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.21273766786331605, |
| "learning_rate": 4.5098039215686275e-05, |
| "loss": 0.6853, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.22124717457784868, |
| "learning_rate": 4.460784313725491e-05, |
| "loss": 0.7705, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.22975668129238133, |
| "learning_rate": 4.411764705882353e-05, |
| "loss": 0.7922, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.238266188006914, |
| "learning_rate": 4.362745098039216e-05, |
| "loss": 0.6948, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.24677569472144661, |
| "learning_rate": 4.313725490196079e-05, |
| "loss": 0.6748, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.25528520143597927, |
| "learning_rate": 4.2647058823529415e-05, |
| "loss": 0.7134, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2637947081505119, |
| "learning_rate": 4.215686274509804e-05, |
| "loss": 0.6921, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.2723042148650445, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.6839, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.2808137215795772, |
| "learning_rate": 4.11764705882353e-05, |
| "loss": 0.6808, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.28932322829410984, |
| "learning_rate": 4.068627450980392e-05, |
| "loss": 0.6939, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.2978327350086425, |
| "learning_rate": 4.0196078431372555e-05, |
| "loss": 0.6846, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3063422417231751, |
| "learning_rate": 3.970588235294117e-05, |
| "loss": 0.6456, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.31485174843770775, |
| "learning_rate": 3.9215686274509805e-05, |
| "loss": 0.6725, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3233612551522404, |
| "learning_rate": 3.872549019607844e-05, |
| "loss": 0.682, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.33187076186677306, |
| "learning_rate": 3.8235294117647055e-05, |
| "loss": 0.7029, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.34038026858130566, |
| "learning_rate": 3.774509803921569e-05, |
| "loss": 0.7072, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3488897752958383, |
| "learning_rate": 3.725490196078432e-05, |
| "loss": 0.6925, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.35739928201037097, |
| "learning_rate": 3.6764705882352945e-05, |
| "loss": 0.6642, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3659087887249036, |
| "learning_rate": 3.627450980392157e-05, |
| "loss": 0.6483, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.3744182954394362, |
| "learning_rate": 3.5784313725490195e-05, |
| "loss": 0.6779, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.3829278021539689, |
| "learning_rate": 3.529411764705883e-05, |
| "loss": 0.6664, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.39143730886850153, |
| "learning_rate": 3.480392156862745e-05, |
| "loss": 0.6493, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3999468155830342, |
| "learning_rate": 3.431372549019608e-05, |
| "loss": 0.6853, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.4084563222975668, |
| "learning_rate": 3.382352941176471e-05, |
| "loss": 0.6549, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.41696582901209944, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.6659, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4254753357266321, |
| "learning_rate": 3.284313725490196e-05, |
| "loss": 0.6639, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.43398484244116475, |
| "learning_rate": 3.235294117647059e-05, |
| "loss": 0.6583, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.44249434915569735, |
| "learning_rate": 3.186274509803922e-05, |
| "loss": 0.6585, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.45100385587023, |
| "learning_rate": 3.137254901960784e-05, |
| "loss": 0.6753, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.45951336258476266, |
| "learning_rate": 3.0882352941176475e-05, |
| "loss": 0.6712, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.4680228692992953, |
| "learning_rate": 3.0392156862745097e-05, |
| "loss": 0.6632, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.476532376013828, |
| "learning_rate": 2.9901960784313725e-05, |
| "loss": 0.6673, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.4850418827283606, |
| "learning_rate": 2.9411764705882354e-05, |
| "loss": 0.6444, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.49355138944289323, |
| "learning_rate": 2.8921568627450986e-05, |
| "loss": 0.6673, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.5020608961574259, |
| "learning_rate": 2.8431372549019608e-05, |
| "loss": 0.6629, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.5105704028719585, |
| "learning_rate": 2.7941176470588236e-05, |
| "loss": 0.6723, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5190799095864912, |
| "learning_rate": 2.7450980392156865e-05, |
| "loss": 0.6622, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5275894163010239, |
| "learning_rate": 2.696078431372549e-05, |
| "loss": 0.6606, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.5360989230155564, |
| "learning_rate": 2.647058823529412e-05, |
| "loss": 0.6823, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.544608429730089, |
| "learning_rate": 2.5980392156862747e-05, |
| "loss": 0.6519, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5531179364446217, |
| "learning_rate": 2.5490196078431373e-05, |
| "loss": 0.6702, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5616274431591544, |
| "learning_rate": 2.5e-05, |
| "loss": 0.6463, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.570136949873687, |
| "learning_rate": 2.4509803921568626e-05, |
| "loss": 0.6604, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.5786464565882197, |
| "learning_rate": 2.401960784313726e-05, |
| "loss": 0.6535, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "learning_rate": 2.3529411764705884e-05, |
| "loss": 0.6605, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.595665470017285, |
| "learning_rate": 2.303921568627451e-05, |
| "loss": 0.6514, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6041749767318175, |
| "learning_rate": 2.2549019607843138e-05, |
| "loss": 0.6533, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.6126844834463502, |
| "learning_rate": 2.2058823529411766e-05, |
| "loss": 0.6564, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.6211939901608828, |
| "learning_rate": 2.1568627450980395e-05, |
| "loss": 0.6679, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.6297034968754155, |
| "learning_rate": 2.107843137254902e-05, |
| "loss": 0.6476, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6382130035899481, |
| "learning_rate": 2.058823529411765e-05, |
| "loss": 0.6917, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6467225103044808, |
| "learning_rate": 2.0098039215686277e-05, |
| "loss": 0.6565, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.6552320170190135, |
| "learning_rate": 1.9607843137254903e-05, |
| "loss": 0.6329, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.6637415237335461, |
| "learning_rate": 1.9117647058823528e-05, |
| "loss": 0.6149, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6722510304480788, |
| "learning_rate": 1.862745098039216e-05, |
| "loss": 0.6799, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.6807605371626113, |
| "learning_rate": 1.8137254901960785e-05, |
| "loss": 0.6458, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.689270043877144, |
| "learning_rate": 1.7647058823529414e-05, |
| "loss": 0.6376, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.6977795505916766, |
| "learning_rate": 1.715686274509804e-05, |
| "loss": 0.6688, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.7062890573062093, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.6466, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.7147985640207419, |
| "learning_rate": 1.6176470588235296e-05, |
| "loss": 0.6386, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.7233080707352746, |
| "learning_rate": 1.568627450980392e-05, |
| "loss": 0.6427, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7318175774498072, |
| "learning_rate": 1.5196078431372548e-05, |
| "loss": 0.6617, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.7403270841643399, |
| "learning_rate": 1.4705882352941177e-05, |
| "loss": 0.6573, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.7488365908788724, |
| "learning_rate": 1.4215686274509804e-05, |
| "loss": 0.6342, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.7573460975934051, |
| "learning_rate": 1.3725490196078432e-05, |
| "loss": 0.6455, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.7658556043079378, |
| "learning_rate": 1.323529411764706e-05, |
| "loss": 0.6042, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7743651110224704, |
| "learning_rate": 1.2745098039215686e-05, |
| "loss": 0.639, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.7828746177370031, |
| "learning_rate": 1.2254901960784313e-05, |
| "loss": 0.6496, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.7913841244515357, |
| "learning_rate": 1.1764705882352942e-05, |
| "loss": 0.6474, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.7998936311660684, |
| "learning_rate": 1.1274509803921569e-05, |
| "loss": 0.6418, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.808403137880601, |
| "learning_rate": 1.0784313725490197e-05, |
| "loss": 0.6434, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.8169126445951336, |
| "learning_rate": 1.0294117647058824e-05, |
| "loss": 0.659, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.8254221513096662, |
| "learning_rate": 9.803921568627451e-06, |
| "loss": 0.6342, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.8339316580241989, |
| "learning_rate": 9.31372549019608e-06, |
| "loss": 0.647, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.8424411647387315, |
| "learning_rate": 8.823529411764707e-06, |
| "loss": 0.6306, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.8509506714532642, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.6724, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8594601781677969, |
| "learning_rate": 7.84313725490196e-06, |
| "loss": 0.6455, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.8679696848823295, |
| "learning_rate": 7.3529411764705884e-06, |
| "loss": 0.634, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.8764791915968622, |
| "learning_rate": 6.862745098039216e-06, |
| "loss": 0.6353, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.8849886983113947, |
| "learning_rate": 6.372549019607843e-06, |
| "loss": 0.6632, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.8934982050259274, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 0.6616, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.90200771174046, |
| "learning_rate": 5.392156862745099e-06, |
| "loss": 0.6312, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.9105172184549927, |
| "learning_rate": 4.901960784313726e-06, |
| "loss": 0.6592, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.9190267251695253, |
| "learning_rate": 4.411764705882353e-06, |
| "loss": 0.6634, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.927536231884058, |
| "learning_rate": 3.92156862745098e-06, |
| "loss": 0.6428, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.9360457385985906, |
| "learning_rate": 3.431372549019608e-06, |
| "loss": 0.6261, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9445552453131233, |
| "learning_rate": 2.9411764705882355e-06, |
| "loss": 0.6425, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.953064752027656, |
| "learning_rate": 2.450980392156863e-06, |
| "loss": 0.6614, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.9615742587421885, |
| "learning_rate": 1.96078431372549e-06, |
| "loss": 0.6545, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.9700837654567211, |
| "learning_rate": 1.4705882352941177e-06, |
| "loss": 0.6184, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.9785932721712538, |
| "learning_rate": 9.80392156862745e-07, |
| "loss": 0.6671, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.9871027788857865, |
| "learning_rate": 4.901960784313725e-07, |
| "loss": 0.65, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.9956122856003191, |
| "learning_rate": 0.0, |
| "loss": 0.6446, |
| "step": 117 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 117, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.689042619882799e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|