PR-pass-structbert-large / trainer_state.json
Deehan1866's picture
End of training
490fb84 verified
{
"best_metric": 93.7,
"best_model_checkpoint": "../results/phrase_retrieval/PR-pass/qa/mpsDistillbert/finetuned/checkpoint-5000",
"epoch": 2.0,
"eval_steps": 100,
"global_step": 5066,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03947887879984208,
"grad_norm": 36.08970642089844,
"learning_rate": 2.9407816818002367e-05,
"loss": 2.4898,
"step": 100
},
{
"epoch": 0.03947887879984208,
"eval_exact_match": 79.53333333333333,
"eval_f1": 85.07605280322392,
"step": 100
},
{
"epoch": 0.07895775759968417,
"grad_norm": 15.822399139404297,
"learning_rate": 2.8815633636004737e-05,
"loss": 0.7877,
"step": 200
},
{
"epoch": 0.07895775759968417,
"eval_exact_match": 82.16666666666667,
"eval_f1": 86.96412271126349,
"step": 200
},
{
"epoch": 0.11843663639952626,
"grad_norm": 53.009056091308594,
"learning_rate": 2.8223450454007107e-05,
"loss": 0.6493,
"step": 300
},
{
"epoch": 0.11843663639952626,
"eval_exact_match": 86.96666666666667,
"eval_f1": 90.26251606941636,
"step": 300
},
{
"epoch": 0.15791551519936833,
"grad_norm": 33.941368103027344,
"learning_rate": 2.7631267272009477e-05,
"loss": 0.6859,
"step": 400
},
{
"epoch": 0.15791551519936833,
"eval_exact_match": 87.7,
"eval_f1": 90.70352596039436,
"step": 400
},
{
"epoch": 0.1973943939992104,
"grad_norm": 33.127262115478516,
"learning_rate": 2.7039084090011843e-05,
"loss": 0.6207,
"step": 500
},
{
"epoch": 0.1973943939992104,
"eval_exact_match": 88.76666666666667,
"eval_f1": 91.3789169814247,
"step": 500
},
{
"epoch": 0.23687327279905251,
"grad_norm": 73.58187103271484,
"learning_rate": 2.6446900908014213e-05,
"loss": 0.4846,
"step": 600
},
{
"epoch": 0.23687327279905251,
"eval_exact_match": 89.6,
"eval_f1": 92.12643429555197,
"step": 600
},
{
"epoch": 0.2763521515988946,
"grad_norm": 38.15833282470703,
"learning_rate": 2.585471772601658e-05,
"loss": 0.4907,
"step": 700
},
{
"epoch": 0.2763521515988946,
"eval_exact_match": 89.5,
"eval_f1": 92.18318144579082,
"step": 700
},
{
"epoch": 0.31583103039873667,
"grad_norm": 48.2036018371582,
"learning_rate": 2.5262534544018952e-05,
"loss": 0.4449,
"step": 800
},
{
"epoch": 0.31583103039873667,
"eval_exact_match": 89.36666666666666,
"eval_f1": 92.03679853056012,
"step": 800
},
{
"epoch": 0.35530990919857874,
"grad_norm": 48.83040237426758,
"learning_rate": 2.467035136202132e-05,
"loss": 0.4882,
"step": 900
},
{
"epoch": 0.35530990919857874,
"eval_exact_match": 91.0,
"eval_f1": 93.24476194351007,
"step": 900
},
{
"epoch": 0.3947887879984208,
"grad_norm": 33.257354736328125,
"learning_rate": 2.407816818002369e-05,
"loss": 0.4357,
"step": 1000
},
{
"epoch": 0.3947887879984208,
"eval_exact_match": 91.16666666666667,
"eval_f1": 93.38561670865543,
"step": 1000
},
{
"epoch": 0.43426766679826295,
"grad_norm": 36.499168395996094,
"learning_rate": 2.3485984998026055e-05,
"loss": 0.499,
"step": 1100
},
{
"epoch": 0.43426766679826295,
"eval_exact_match": 90.96666666666667,
"eval_f1": 93.31853006155642,
"step": 1100
},
{
"epoch": 0.47374654559810503,
"grad_norm": 9.117573738098145,
"learning_rate": 2.2893801816028428e-05,
"loss": 0.4294,
"step": 1200
},
{
"epoch": 0.47374654559810503,
"eval_exact_match": 90.83333333333333,
"eval_f1": 93.1301494526572,
"step": 1200
},
{
"epoch": 0.513225424397947,
"grad_norm": 47.918365478515625,
"learning_rate": 2.2301618634030794e-05,
"loss": 0.4528,
"step": 1300
},
{
"epoch": 0.513225424397947,
"eval_exact_match": 91.8,
"eval_f1": 93.93781839408155,
"step": 1300
},
{
"epoch": 0.5527043031977892,
"grad_norm": 2.020205020904541,
"learning_rate": 2.1709435452033164e-05,
"loss": 0.3965,
"step": 1400
},
{
"epoch": 0.5527043031977892,
"eval_exact_match": 91.46666666666667,
"eval_f1": 93.66177518095938,
"step": 1400
},
{
"epoch": 0.5921831819976313,
"grad_norm": 16.072200775146484,
"learning_rate": 2.111725227003553e-05,
"loss": 0.3609,
"step": 1500
},
{
"epoch": 0.5921831819976313,
"eval_exact_match": 91.7,
"eval_f1": 93.75143838703438,
"step": 1500
},
{
"epoch": 0.6316620607974733,
"grad_norm": 48.690391540527344,
"learning_rate": 2.05250690880379e-05,
"loss": 0.4318,
"step": 1600
},
{
"epoch": 0.6316620607974733,
"eval_exact_match": 91.46666666666667,
"eval_f1": 93.55887712871922,
"step": 1600
},
{
"epoch": 0.6711409395973155,
"grad_norm": 3.189640998840332,
"learning_rate": 1.993288590604027e-05,
"loss": 0.3602,
"step": 1700
},
{
"epoch": 0.6711409395973155,
"eval_exact_match": 92.43333333333334,
"eval_f1": 94.2249514830154,
"step": 1700
},
{
"epoch": 0.7106198183971575,
"grad_norm": 0.2732882499694824,
"learning_rate": 1.9340702724042637e-05,
"loss": 0.3656,
"step": 1800
},
{
"epoch": 0.7106198183971575,
"eval_exact_match": 92.3,
"eval_f1": 94.17740849673203,
"step": 1800
},
{
"epoch": 0.7500986971969996,
"grad_norm": 25.344755172729492,
"learning_rate": 1.8748519542045006e-05,
"loss": 0.3825,
"step": 1900
},
{
"epoch": 0.7500986971969996,
"eval_exact_match": 92.16666666666667,
"eval_f1": 94.0608726088726,
"step": 1900
},
{
"epoch": 0.7895775759968416,
"grad_norm": 48.43545913696289,
"learning_rate": 1.8156336360047373e-05,
"loss": 0.3447,
"step": 2000
},
{
"epoch": 0.7895775759968416,
"eval_exact_match": 92.53333333333333,
"eval_f1": 94.25784554225729,
"step": 2000
},
{
"epoch": 0.8290564547966838,
"grad_norm": 38.915374755859375,
"learning_rate": 1.7564153178049743e-05,
"loss": 0.3033,
"step": 2100
},
{
"epoch": 0.8290564547966838,
"eval_exact_match": 92.0,
"eval_f1": 94.01127170868344,
"step": 2100
},
{
"epoch": 0.8685353335965259,
"grad_norm": 3.7626020908355713,
"learning_rate": 1.6971969996052112e-05,
"loss": 0.3126,
"step": 2200
},
{
"epoch": 0.8685353335965259,
"eval_exact_match": 92.4,
"eval_f1": 94.20074145705928,
"step": 2200
},
{
"epoch": 0.9080142123963679,
"grad_norm": 11.15784740447998,
"learning_rate": 1.6379786814054482e-05,
"loss": 0.368,
"step": 2300
},
{
"epoch": 0.9080142123963679,
"eval_exact_match": 92.46666666666667,
"eval_f1": 94.47528310578315,
"step": 2300
},
{
"epoch": 0.9474930911962101,
"grad_norm": 55.31396484375,
"learning_rate": 1.578760363205685e-05,
"loss": 0.3398,
"step": 2400
},
{
"epoch": 0.9474930911962101,
"eval_exact_match": 92.46666666666667,
"eval_f1": 94.40168950153159,
"step": 2400
},
{
"epoch": 0.9869719699960521,
"grad_norm": 32.73415756225586,
"learning_rate": 1.5195420450059218e-05,
"loss": 0.3177,
"step": 2500
},
{
"epoch": 0.9869719699960521,
"eval_exact_match": 92.46666666666667,
"eval_f1": 94.35837233137235,
"step": 2500
},
{
"epoch": 1.026450848795894,
"grad_norm": 24.14990234375,
"learning_rate": 1.4603237268061586e-05,
"loss": 0.2105,
"step": 2600
},
{
"epoch": 1.026450848795894,
"eval_exact_match": 92.7,
"eval_f1": 94.4984410208528,
"step": 2600
},
{
"epoch": 1.0659297275957362,
"grad_norm": 0.0023591353092342615,
"learning_rate": 1.4011054086063956e-05,
"loss": 0.1006,
"step": 2700
},
{
"epoch": 1.0659297275957362,
"eval_exact_match": 92.36666666666666,
"eval_f1": 94.37342015392016,
"step": 2700
},
{
"epoch": 1.1054086063955784,
"grad_norm": 20.859468460083008,
"learning_rate": 1.3418870904066324e-05,
"loss": 0.1476,
"step": 2800
},
{
"epoch": 1.1054086063955784,
"eval_exact_match": 92.63333333333334,
"eval_f1": 94.58407021341235,
"step": 2800
},
{
"epoch": 1.1448874851954205,
"grad_norm": 0.3791426420211792,
"learning_rate": 1.2826687722068692e-05,
"loss": 0.1295,
"step": 2900
},
{
"epoch": 1.1448874851954205,
"eval_exact_match": 93.13333333333334,
"eval_f1": 94.95533549783549,
"step": 2900
},
{
"epoch": 1.1843663639952626,
"grad_norm": 4.289775371551514,
"learning_rate": 1.2234504540071062e-05,
"loss": 0.1727,
"step": 3000
},
{
"epoch": 1.1843663639952626,
"eval_exact_match": 92.66666666666667,
"eval_f1": 94.65173544973547,
"step": 3000
},
{
"epoch": 1.2238452427951045,
"grad_norm": 0.0014754978474229574,
"learning_rate": 1.164232135807343e-05,
"loss": 0.1345,
"step": 3100
},
{
"epoch": 1.2238452427951045,
"eval_exact_match": 92.96666666666667,
"eval_f1": 94.9088371686793,
"step": 3100
},
{
"epoch": 1.2633241215949467,
"grad_norm": 43.49968338012695,
"learning_rate": 1.10501381760758e-05,
"loss": 0.202,
"step": 3200
},
{
"epoch": 1.2633241215949467,
"eval_exact_match": 92.66666666666667,
"eval_f1": 94.57915070481636,
"step": 3200
},
{
"epoch": 1.3028030003947888,
"grad_norm": 13.5677490234375,
"learning_rate": 1.0457954994078168e-05,
"loss": 0.1898,
"step": 3300
},
{
"epoch": 1.3028030003947888,
"eval_exact_match": 92.86666666666666,
"eval_f1": 94.8515283790284,
"step": 3300
},
{
"epoch": 1.342281879194631,
"grad_norm": 76.90211486816406,
"learning_rate": 9.865771812080538e-06,
"loss": 0.1433,
"step": 3400
},
{
"epoch": 1.342281879194631,
"eval_exact_match": 93.46666666666667,
"eval_f1": 95.2914963359081,
"step": 3400
},
{
"epoch": 1.3817607579944728,
"grad_norm": 7.809657096862793,
"learning_rate": 9.273588630082906e-06,
"loss": 0.1693,
"step": 3500
},
{
"epoch": 1.3817607579944728,
"eval_exact_match": 93.0,
"eval_f1": 94.87400432900435,
"step": 3500
},
{
"epoch": 1.421239636794315,
"grad_norm": 4.223247051239014,
"learning_rate": 8.681405448085274e-06,
"loss": 0.1827,
"step": 3600
},
{
"epoch": 1.421239636794315,
"eval_exact_match": 93.33333333333333,
"eval_f1": 95.12902597402599,
"step": 3600
},
{
"epoch": 1.460718515594157,
"grad_norm": 27.452770233154297,
"learning_rate": 8.089222266087644e-06,
"loss": 0.1843,
"step": 3700
},
{
"epoch": 1.460718515594157,
"eval_exact_match": 93.36666666666666,
"eval_f1": 95.09415584415585,
"step": 3700
},
{
"epoch": 1.5001973943939992,
"grad_norm": 20.997011184692383,
"learning_rate": 7.497039084090013e-06,
"loss": 0.1656,
"step": 3800
},
{
"epoch": 1.5001973943939992,
"eval_exact_match": 93.7,
"eval_f1": 95.35056277056279,
"step": 3800
},
{
"epoch": 1.5396762731938414,
"grad_norm": 0.005699894856661558,
"learning_rate": 6.90485590209238e-06,
"loss": 0.1013,
"step": 3900
},
{
"epoch": 1.5396762731938414,
"eval_exact_match": 93.4,
"eval_f1": 95.10358730158731,
"step": 3900
},
{
"epoch": 1.5791551519936835,
"grad_norm": 4.07551383972168,
"learning_rate": 6.312672720094749e-06,
"loss": 0.1632,
"step": 4000
},
{
"epoch": 1.5791551519936835,
"eval_exact_match": 93.43333333333334,
"eval_f1": 95.18676190476195,
"step": 4000
},
{
"epoch": 1.6186340307935254,
"grad_norm": 0.002155046910047531,
"learning_rate": 5.720489538097118e-06,
"loss": 0.1702,
"step": 4100
},
{
"epoch": 1.6186340307935254,
"eval_exact_match": 93.36666666666666,
"eval_f1": 95.09804473304477,
"step": 4100
},
{
"epoch": 1.6581129095933675,
"grad_norm": 0.017265846952795982,
"learning_rate": 5.128306356099487e-06,
"loss": 0.1308,
"step": 4200
},
{
"epoch": 1.6581129095933675,
"eval_exact_match": 93.23333333333333,
"eval_f1": 94.99049422799425,
"step": 4200
},
{
"epoch": 1.6975917883932097,
"grad_norm": 0.009932265616953373,
"learning_rate": 4.536123174101856e-06,
"loss": 0.1608,
"step": 4300
},
{
"epoch": 1.6975917883932097,
"eval_exact_match": 93.63333333333334,
"eval_f1": 95.32610028860032,
"step": 4300
},
{
"epoch": 1.7370706671930516,
"grad_norm": 56.21049880981445,
"learning_rate": 3.943939992104225e-06,
"loss": 0.1296,
"step": 4400
},
{
"epoch": 1.7370706671930516,
"eval_exact_match": 93.43333333333334,
"eval_f1": 95.17288023088024,
"step": 4400
},
{
"epoch": 1.7765495459928937,
"grad_norm": 78.09048461914062,
"learning_rate": 3.3517568101065932e-06,
"loss": 0.1752,
"step": 4500
},
{
"epoch": 1.7765495459928937,
"eval_exact_match": 93.66666666666667,
"eval_f1": 95.27616883116887,
"step": 4500
},
{
"epoch": 1.8160284247927359,
"grad_norm": 2.959677219390869,
"learning_rate": 2.7595736281089617e-06,
"loss": 0.1195,
"step": 4600
},
{
"epoch": 1.8160284247927359,
"eval_exact_match": 93.43333333333334,
"eval_f1": 95.0532770562771,
"step": 4600
},
{
"epoch": 1.855507303592578,
"grad_norm": 69.6814193725586,
"learning_rate": 2.1673904461113303e-06,
"loss": 0.1849,
"step": 4700
},
{
"epoch": 1.855507303592578,
"eval_exact_match": 93.5,
"eval_f1": 95.10591774891775,
"step": 4700
},
{
"epoch": 1.8949861823924201,
"grad_norm": 0.19182445108890533,
"learning_rate": 1.5752072641136992e-06,
"loss": 0.09,
"step": 4800
},
{
"epoch": 1.8949861823924201,
"eval_exact_match": 93.46666666666667,
"eval_f1": 95.12782467532469,
"step": 4800
},
{
"epoch": 1.9344650611922622,
"grad_norm": 65.0757827758789,
"learning_rate": 9.83024082116068e-07,
"loss": 0.1393,
"step": 4900
},
{
"epoch": 1.9344650611922622,
"eval_exact_match": 93.7,
"eval_f1": 95.2371066252588,
"step": 4900
},
{
"epoch": 1.9739439399921044,
"grad_norm": 96.50936889648438,
"learning_rate": 3.9084090011843665e-07,
"loss": 0.149,
"step": 5000
},
{
"epoch": 1.9739439399921044,
"eval_exact_match": 93.7,
"eval_f1": 95.25543995859213,
"step": 5000
},
{
"epoch": 2.0,
"step": 5066,
"total_flos": 3.763306473501082e+16,
"train_loss": 0.339278704556278,
"train_runtime": 7807.973,
"train_samples_per_second": 5.19,
"train_steps_per_second": 0.649
}
],
"logging_steps": 100,
"max_steps": 5066,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.763306473501082e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}