intern-v4 / trainer_state.json

Upload folder using huggingface_hub

5ed47c0 verified 4 months ago

362 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 2077,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.00048155059290916753,
	"grad_norm": 9.47897720336914,
	"learning_rate": 0.0,
	"loss": 2.5979,
	"step": 1
	},
	{
	"epoch": 0.0009631011858183351,
	"grad_norm": 13.405498504638672,
	"learning_rate": 3.1746031746031746e-06,
	"loss": 2.8498,
	"step": 2
	},
	{
	"epoch": 0.0014446517787275025,
	"grad_norm": 6.596500873565674,
	"learning_rate": 6.349206349206349e-06,
	"loss": 1.8474,
	"step": 3
	},
	{
	"epoch": 0.0019262023716366701,
	"grad_norm": 8.559901237487793,
	"learning_rate": 9.523809523809523e-06,
	"loss": 2.3071,
	"step": 4
	},
	{
	"epoch": 0.0024077529645458377,
	"grad_norm": 7.220107555389404,
	"learning_rate": 1.2698412698412699e-05,
	"loss": 2.4772,
	"step": 5
	},
	{
	"epoch": 0.002889303557455005,
	"grad_norm": 9.46921157836914,
	"learning_rate": 1.5873015873015872e-05,
	"loss": 2.0344,
	"step": 6
	},
	{
	"epoch": 0.0033708541503641725,
	"grad_norm": 7.574646949768066,
	"learning_rate": 1.9047619047619046e-05,
	"loss": 2.9056,
	"step": 7
	},
	{
	"epoch": 0.0038524047432733403,
	"grad_norm": 10.66491985321045,
	"learning_rate": 2.2222222222222223e-05,
	"loss": 2.5144,
	"step": 8
	},
	{
	"epoch": 0.004333955336182508,
	"grad_norm": 5.920387268066406,
	"learning_rate": 2.5396825396825397e-05,
	"loss": 2.1156,
	"step": 9
	},
	{
	"epoch": 0.004815505929091675,
	"grad_norm": 5.609206676483154,
	"learning_rate": 2.857142857142857e-05,
	"loss": 2.2243,
	"step": 10
	},
	{
	"epoch": 0.005297056522000842,
	"grad_norm": 6.140780448913574,
	"learning_rate": 3.1746031746031745e-05,
	"loss": 2.0492,
	"step": 11
	},
	{
	"epoch": 0.00577860711491001,
	"grad_norm": 6.842236042022705,
	"learning_rate": 3.492063492063492e-05,
	"loss": 1.2736,
	"step": 12
	},
	{
	"epoch": 0.006260157707819178,
	"grad_norm": 9.14393424987793,
	"learning_rate": 3.809523809523809e-05,
	"loss": 1.8261,
	"step": 13
	},
	{
	"epoch": 0.006741708300728345,
	"grad_norm": 8.761028289794922,
	"learning_rate": 4.126984126984127e-05,
	"loss": 1.5156,
	"step": 14
	},
	{
	"epoch": 0.007223258893637513,
	"grad_norm": 7.065422534942627,
	"learning_rate": 4.4444444444444447e-05,
	"loss": 2.0811,
	"step": 15
	},
	{
	"epoch": 0.0077048094865466805,
	"grad_norm": 5.040136814117432,
	"learning_rate": 4.761904761904762e-05,
	"loss": 1.8507,
	"step": 16
	},
	{
	"epoch": 0.008186360079455848,
	"grad_norm": 5.030274868011475,
	"learning_rate": 5.0793650793650794e-05,
	"loss": 1.3307,
	"step": 17
	},
	{
	"epoch": 0.008667910672365015,
	"grad_norm": 6.842209339141846,
	"learning_rate": 5.396825396825397e-05,
	"loss": 1.6775,
	"step": 18
	},
	{
	"epoch": 0.009149461265274182,
	"grad_norm": 3.107877016067505,
	"learning_rate": 5.714285714285714e-05,
	"loss": 1.6994,
	"step": 19
	},
	{
	"epoch": 0.00963101185818335,
	"grad_norm": 8.357791900634766,
	"learning_rate": 6.0317460317460316e-05,
	"loss": 2.017,
	"step": 20
	},
	{
	"epoch": 0.010112562451092518,
	"grad_norm": 5.375106334686279,
	"learning_rate": 6.349206349206349e-05,
	"loss": 1.2195,
	"step": 21
	},
	{
	"epoch": 0.010594113044001685,
	"grad_norm": 8.254910469055176,
	"learning_rate": 6.666666666666667e-05,
	"loss": 1.7287,
	"step": 22
	},
	{
	"epoch": 0.011075663636910853,
	"grad_norm": 1.862160086631775,
	"learning_rate": 6.984126984126984e-05,
	"loss": 0.9883,
	"step": 23
	},
	{
	"epoch": 0.01155721422982002,
	"grad_norm": 4.388360500335693,
	"learning_rate": 7.301587301587302e-05,
	"loss": 0.8999,
	"step": 24
	},
	{
	"epoch": 0.012038764822729187,
	"grad_norm": 3.909996509552002,
	"learning_rate": 7.619047619047618e-05,
	"loss": 1.0851,
	"step": 25
	},
	{
	"epoch": 0.012520315415638356,
	"grad_norm": 6.695411205291748,
	"learning_rate": 7.936507936507937e-05,
	"loss": 1.3302,
	"step": 26
	},
	{
	"epoch": 0.013001866008547523,
	"grad_norm": 5.960219383239746,
	"learning_rate": 8.253968253968255e-05,
	"loss": 1.111,
	"step": 27
	},
	{
	"epoch": 0.01348341660145669,
	"grad_norm": 4.400773525238037,
	"learning_rate": 8.571428571428571e-05,
	"loss": 0.7887,
	"step": 28
	},
	{
	"epoch": 0.013964967194365858,
	"grad_norm": 5.381070137023926,
	"learning_rate": 8.888888888888889e-05,
	"loss": 2.0415,
	"step": 29
	},
	{
	"epoch": 0.014446517787275025,
	"grad_norm": 2.605618715286255,
	"learning_rate": 9.206349206349206e-05,
	"loss": 1.0637,
	"step": 30
	},
	{
	"epoch": 0.014928068380184192,
	"grad_norm": 4.758686065673828,
	"learning_rate": 9.523809523809524e-05,
	"loss": 0.815,
	"step": 31
	},
	{
	"epoch": 0.015409618973093361,
	"grad_norm": 3.952995777130127,
	"learning_rate": 9.841269841269841e-05,
	"loss": 1.5256,
	"step": 32
	},
	{
	"epoch": 0.01589116956600253,
	"grad_norm": 6.927408695220947,
	"learning_rate": 0.00010158730158730159,
	"loss": 1.2863,
	"step": 33
	},
	{
	"epoch": 0.016372720158911697,
	"grad_norm": 3.77854323387146,
	"learning_rate": 0.00010476190476190477,
	"loss": 1.2303,
	"step": 34
	},
	{
	"epoch": 0.016854270751820864,
	"grad_norm": 3.4671757221221924,
	"learning_rate": 0.00010793650793650794,
	"loss": 0.8535,
	"step": 35
	},
	{
	"epoch": 0.01733582134473003,
	"grad_norm": 3.3394312858581543,
	"learning_rate": 0.00011111111111111112,
	"loss": 1.7921,
	"step": 36
	},
	{
	"epoch": 0.017817371937639197,
	"grad_norm": 3.3127732276916504,
	"learning_rate": 0.00011428571428571428,
	"loss": 1.0765,
	"step": 37
	},
	{
	"epoch": 0.018298922530548364,
	"grad_norm": 3.850238800048828,
	"learning_rate": 0.00011746031746031746,
	"loss": 1.2267,
	"step": 38
	},
	{
	"epoch": 0.018780473123457535,
	"grad_norm": 2.822495460510254,
	"learning_rate": 0.00012063492063492063,
	"loss": 0.9256,
	"step": 39
	},
	{
	"epoch": 0.0192620237163667,
	"grad_norm": 3.967958688735962,
	"learning_rate": 0.0001238095238095238,
	"loss": 1.9019,
	"step": 40
	},
	{
	"epoch": 0.01974357430927587,
	"grad_norm": 3.5611379146575928,
	"learning_rate": 0.00012698412698412698,
	"loss": 0.8901,
	"step": 41
	},
	{
	"epoch": 0.020225124902185036,
	"grad_norm": 40.8494758605957,
	"learning_rate": 0.00013015873015873017,
	"loss": 1.0527,
	"step": 42
	},
	{
	"epoch": 0.020706675495094203,
	"grad_norm": 3.99414324760437,
	"learning_rate": 0.00013333333333333334,
	"loss": 1.1458,
	"step": 43
	},
	{
	"epoch": 0.02118822608800337,
	"grad_norm": 3.9491536617279053,
	"learning_rate": 0.0001365079365079365,
	"loss": 1.125,
	"step": 44
	},
	{
	"epoch": 0.02166977668091254,
	"grad_norm": 2.9987173080444336,
	"learning_rate": 0.00013968253968253967,
	"loss": 1.0546,
	"step": 45
	},
	{
	"epoch": 0.022151327273821707,
	"grad_norm": 3.976832628250122,
	"learning_rate": 0.00014285714285714287,
	"loss": 1.1407,
	"step": 46
	},
	{
	"epoch": 0.022632877866730874,
	"grad_norm": 10.798648834228516,
	"learning_rate": 0.00014603174603174603,
	"loss": 0.6169,
	"step": 47
	},
	{
	"epoch": 0.02311442845964004,
	"grad_norm": 3.4334845542907715,
	"learning_rate": 0.00014920634920634923,
	"loss": 1.1717,
	"step": 48
	},
	{
	"epoch": 0.023595979052549208,
	"grad_norm": 4.682596206665039,
	"learning_rate": 0.00015238095238095237,
	"loss": 0.9471,
	"step": 49
	},
	{
	"epoch": 0.024077529645458375,
	"grad_norm": 6.495236396789551,
	"learning_rate": 0.00015555555555555556,
	"loss": 1.2659,
	"step": 50
	},
	{
	"epoch": 0.024559080238367545,
	"grad_norm": 4.563192367553711,
	"learning_rate": 0.00015873015873015873,
	"loss": 1.5277,
	"step": 51
	},
	{
	"epoch": 0.025040630831276712,
	"grad_norm": 2.5679526329040527,
	"learning_rate": 0.00016190476190476192,
	"loss": 1.5084,
	"step": 52
	},
	{
	"epoch": 0.02552218142418588,
	"grad_norm": 3.2736315727233887,
	"learning_rate": 0.0001650793650793651,
	"loss": 0.925,
	"step": 53
	},
	{
	"epoch": 0.026003732017095046,
	"grad_norm": 3.326249837875366,
	"learning_rate": 0.00016825396825396826,
	"loss": 0.7348,
	"step": 54
	},
	{
	"epoch": 0.026485282610004213,
	"grad_norm": 4.137823104858398,
	"learning_rate": 0.00017142857142857143,
	"loss": 1.5321,
	"step": 55
	},
	{
	"epoch": 0.02696683320291338,
	"grad_norm": 5.634565353393555,
	"learning_rate": 0.00017460317460317462,
	"loss": 0.8261,
	"step": 56
	},
	{
	"epoch": 0.02744838379582255,
	"grad_norm": 5.409355163574219,
	"learning_rate": 0.00017777777777777779,
	"loss": 1.1813,
	"step": 57
	},
	{
	"epoch": 0.027929934388731717,
	"grad_norm": 3.174149513244629,
	"learning_rate": 0.00018095238095238095,
	"loss": 0.979,
	"step": 58
	},
	{
	"epoch": 0.028411484981640884,
	"grad_norm": 3.773308038711548,
	"learning_rate": 0.00018412698412698412,
	"loss": 0.9456,
	"step": 59
	},
	{
	"epoch": 0.02889303557455005,
	"grad_norm": 4.3084306716918945,
	"learning_rate": 0.00018730158730158731,
	"loss": 1.0249,
	"step": 60
	},
	{
	"epoch": 0.029374586167459218,
	"grad_norm": 1.6852308511734009,
	"learning_rate": 0.00019047619047619048,
	"loss": 0.9553,
	"step": 61
	},
	{
	"epoch": 0.029856136760368385,
	"grad_norm": 4.570802211761475,
	"learning_rate": 0.00019365079365079365,
	"loss": 0.8408,
	"step": 62
	},
	{
	"epoch": 0.030337687353277555,
	"grad_norm": 4.909348011016846,
	"learning_rate": 0.00019682539682539682,
	"loss": 1.1777,
	"step": 63
	},
	{
	"epoch": 0.030819237946186722,
	"grad_norm": 3.654968500137329,
	"learning_rate": 0.0002,
	"loss": 1.04,
	"step": 64
	},
	{
	"epoch": 0.03130078853909589,
	"grad_norm": 3.1528286933898926,
	"learning_rate": 0.00019999987833918285,
	"loss": 0.824,
	"step": 65
	},
	{
	"epoch": 0.03178233913200506,
	"grad_norm": 4.7957024574279785,
	"learning_rate": 0.00019999951335702735,
	"loss": 1.0617,
	"step": 66
	},
	{
	"epoch": 0.03226388972491422,
	"grad_norm": 3.9064583778381348,
	"learning_rate": 0.00019999890505442158,
	"loss": 0.854,
	"step": 67
	},
	{
	"epoch": 0.03274544031782339,
	"grad_norm": 4.40252161026001,
	"learning_rate": 0.0001999980534328457,
	"loss": 1.219,
	"step": 68
	},
	{
	"epoch": 0.03322699091073256,
	"grad_norm": 10.936585426330566,
	"learning_rate": 0.0001999969584943719,
	"loss": 1.3314,
	"step": 69
	},
	{
	"epoch": 0.03370854150364173,
	"grad_norm": 5.040441036224365,
	"learning_rate": 0.00019999562024166438,
	"loss": 0.7201,
	"step": 70
	},
	{
	"epoch": 0.03419009209655089,
	"grad_norm": 3.9674477577209473,
	"learning_rate": 0.0001999940386779794,
	"loss": 0.6603,
	"step": 71
	},
	{
	"epoch": 0.03467164268946006,
	"grad_norm": 2.814729928970337,
	"learning_rate": 0.00019999221380716527,
	"loss": 0.829,
	"step": 72
	},
	{
	"epoch": 0.03515319328236923,
	"grad_norm": 4.869168281555176,
	"learning_rate": 0.00019999014563366226,
	"loss": 1.014,
	"step": 73
	},
	{
	"epoch": 0.035634743875278395,
	"grad_norm": 6.23966121673584,
	"learning_rate": 0.00019998783416250268,
	"loss": 1.0555,
	"step": 74
	},
	{
	"epoch": 0.036116294468187565,
	"grad_norm": 4.9502458572387695,
	"learning_rate": 0.0001999852793993109,
	"loss": 1.2713,
	"step": 75
	},
	{
	"epoch": 0.03659784506109673,
	"grad_norm": 4.012925148010254,
	"learning_rate": 0.00019998248135030315,
	"loss": 1.0993,
	"step": 76
	},
	{
	"epoch": 0.0370793956540059,
	"grad_norm": 7.407429218292236,
	"learning_rate": 0.00019997944002228774,
	"loss": 1.2082,
	"step": 77
	},
	{
	"epoch": 0.03756094624691507,
	"grad_norm": 6.095324993133545,
	"learning_rate": 0.00019997615542266482,
	"loss": 0.9925,
	"step": 78
	},
	{
	"epoch": 0.03804249683982423,
	"grad_norm": 3.40785813331604,
	"learning_rate": 0.00019997262755942655,
	"loss": 0.7746,
	"step": 79
	},
	{
	"epoch": 0.0385240474327334,
	"grad_norm": 9.24764633178711,
	"learning_rate": 0.000199968856441157,
	"loss": 0.7769,
	"step": 80
	},
	{
	"epoch": 0.03900559802564257,
	"grad_norm": 7.706933975219727,
	"learning_rate": 0.0001999648420770321,
	"loss": 1.0516,
	"step": 81
	},
	{
	"epoch": 0.03948714861855174,
	"grad_norm": 3.0405354499816895,
	"learning_rate": 0.0001999605844768197,
	"loss": 1.1086,
	"step": 82
	},
	{
	"epoch": 0.0399686992114609,
	"grad_norm": 5.7575225830078125,
	"learning_rate": 0.00019995608365087946,
	"loss": 1.1402,
	"step": 83
	},
	{
	"epoch": 0.04045024980437007,
	"grad_norm": 4.292261600494385,
	"learning_rate": 0.0001999513396101628,
	"loss": 1.0188,
	"step": 84
	},
	{
	"epoch": 0.04093180039727924,
	"grad_norm": 4.21382474899292,
	"learning_rate": 0.00019994635236621306,
	"loss": 0.82,
	"step": 85
	},
	{
	"epoch": 0.041413350990188405,
	"grad_norm": 2.9905776977539062,
	"learning_rate": 0.00019994112193116528,
	"loss": 1.0823,
	"step": 86
	},
	{
	"epoch": 0.041894901583097575,
	"grad_norm": 2.917043685913086,
	"learning_rate": 0.00019993564831774618,
	"loss": 0.9047,
	"step": 87
	},
	{
	"epoch": 0.04237645217600674,
	"grad_norm": 2.6178548336029053,
	"learning_rate": 0.00019992993153927432,
	"loss": 0.8579,
	"step": 88
	},
	{
	"epoch": 0.04285800276891591,
	"grad_norm": 5.876251220703125,
	"learning_rate": 0.00019992397160965982,
	"loss": 0.8726,
	"step": 89
	},
	{
	"epoch": 0.04333955336182508,
	"grad_norm": 15.610270500183105,
	"learning_rate": 0.0001999177685434045,
	"loss": 1.3112,
	"step": 90
	},
	{
	"epoch": 0.04382110395473424,
	"grad_norm": 3.1741201877593994,
	"learning_rate": 0.00019991132235560176,
	"loss": 1.0837,
	"step": 91
	},
	{
	"epoch": 0.044302654547643414,
	"grad_norm": 3.8556501865386963,
	"learning_rate": 0.00019990463306193652,
	"loss": 0.947,
	"step": 92
	},
	{
	"epoch": 0.04478420514055258,
	"grad_norm": 4.010826110839844,
	"learning_rate": 0.00019989770067868533,
	"loss": 0.726,
	"step": 93
	},
	{
	"epoch": 0.04526575573346175,
	"grad_norm": 3.4739131927490234,
	"learning_rate": 0.00019989052522271622,
	"loss": 0.9653,
	"step": 94
	},
	{
	"epoch": 0.04574730632637091,
	"grad_norm": 2.507850170135498,
	"learning_rate": 0.00019988310671148848,
	"loss": 0.8077,
	"step": 95
	},
	{
	"epoch": 0.04622885691928008,
	"grad_norm": 3.4519567489624023,
	"learning_rate": 0.00019987544516305311,
	"loss": 0.897,
	"step": 96
	},
	{
	"epoch": 0.04671040751218925,
	"grad_norm": 3.6631219387054443,
	"learning_rate": 0.00019986754059605222,
	"loss": 0.912,
	"step": 97
	},
	{
	"epoch": 0.047191958105098415,
	"grad_norm": 3.770810842514038,
	"learning_rate": 0.00019985939302971938,
	"loss": 0.8759,
	"step": 98
	},
	{
	"epoch": 0.047673508698007586,
	"grad_norm": 5.926509857177734,
	"learning_rate": 0.00019985100248387933,
	"loss": 1.2971,
	"step": 99
	},
	{
	"epoch": 0.04815505929091675,
	"grad_norm": 3.8625011444091797,
	"learning_rate": 0.00019984236897894816,
	"loss": 0.5941,
	"step": 100
	},
	{
	"epoch": 0.04863660988382592,
	"grad_norm": 5.259921073913574,
	"learning_rate": 0.000199833492535933,
	"loss": 0.8503,
	"step": 101
	},
	{
	"epoch": 0.04911816047673509,
	"grad_norm": 4.988558769226074,
	"learning_rate": 0.00019982437317643217,
	"loss": 1.1631,
	"step": 102
	},
	{
	"epoch": 0.04959971106964425,
	"grad_norm": 7.225802898406982,
	"learning_rate": 0.00019981501092263503,
	"loss": 1.3616,
	"step": 103
	},
	{
	"epoch": 0.050081261662553424,
	"grad_norm": 3.8858981132507324,
	"learning_rate": 0.00019980540579732196,
	"loss": 0.6591,
	"step": 104
	},
	{
	"epoch": 0.05056281225546259,
	"grad_norm": 3.4471282958984375,
	"learning_rate": 0.00019979555782386434,
	"loss": 0.7594,
	"step": 105
	},
	{
	"epoch": 0.05104436284837176,
	"grad_norm": 4.011205196380615,
	"learning_rate": 0.00019978546702622443,
	"loss": 1.0898,
	"step": 106
	},
	{
	"epoch": 0.05152591344128093,
	"grad_norm": 3.6832756996154785,
	"learning_rate": 0.00019977513342895532,
	"loss": 0.6194,
	"step": 107
	},
	{
	"epoch": 0.05200746403419009,
	"grad_norm": 6.4003376960754395,
	"learning_rate": 0.00019976455705720083,
	"loss": 0.7407,
	"step": 108
	},
	{
	"epoch": 0.05248901462709926,
	"grad_norm": 2.832329034805298,
	"learning_rate": 0.0001997537379366956,
	"loss": 1.4024,
	"step": 109
	},
	{
	"epoch": 0.052970565220008425,
	"grad_norm": 4.148341178894043,
	"learning_rate": 0.00019974267609376494,
	"loss": 1.3058,
	"step": 110
	},
	{
	"epoch": 0.053452115812917596,
	"grad_norm": 6.119814872741699,
	"learning_rate": 0.00019973137155532462,
	"loss": 0.8581,
	"step": 111
	},
	{
	"epoch": 0.05393366640582676,
	"grad_norm": 8.218791961669922,
	"learning_rate": 0.00019971982434888107,
	"loss": 0.9871,
	"step": 112
	},
	{
	"epoch": 0.05441521699873593,
	"grad_norm": 2.336301326751709,
	"learning_rate": 0.00019970803450253114,
	"loss": 1.2561,
	"step": 113
	},
	{
	"epoch": 0.0548967675916451,
	"grad_norm": 10.881673812866211,
	"learning_rate": 0.0001996960020449621,
	"loss": 1.3601,
	"step": 114
	},
	{
	"epoch": 0.055378318184554264,
	"grad_norm": 1.6918381452560425,
	"learning_rate": 0.00019968372700545145,
	"loss": 0.9491,
	"step": 115
	},
	{
	"epoch": 0.055859868777463434,
	"grad_norm": 3.8625855445861816,
	"learning_rate": 0.00019967120941386709,
	"loss": 1.3005,
	"step": 116
	},
	{
	"epoch": 0.0563414193703726,
	"grad_norm": 4.829530715942383,
	"learning_rate": 0.000199658449300667,
	"loss": 0.9137,
	"step": 117
	},
	{
	"epoch": 0.05682296996328177,
	"grad_norm": 6.8195719718933105,
	"learning_rate": 0.00019964544669689928,
	"loss": 1.6891,
	"step": 118
	},
	{
	"epoch": 0.05730452055619094,
	"grad_norm": 4.785418510437012,
	"learning_rate": 0.00019963220163420214,
	"loss": 1.4208,
	"step": 119
	},
	{
	"epoch": 0.0577860711491001,
	"grad_norm": 3.4616124629974365,
	"learning_rate": 0.0001996187141448036,
	"loss": 0.6869,
	"step": 120
	},
	{
	"epoch": 0.05826762174200927,
	"grad_norm": 4.4392924308776855,
	"learning_rate": 0.0001996049842615217,
	"loss": 0.7986,
	"step": 121
	},
	{
	"epoch": 0.058749172334918436,
	"grad_norm": 3.8779265880584717,
	"learning_rate": 0.0001995910120177642,
	"loss": 0.7087,
	"step": 122
	},
	{
	"epoch": 0.059230722927827606,
	"grad_norm": 6.253561496734619,
	"learning_rate": 0.00019957679744752859,
	"loss": 1.2118,
	"step": 123
	},
	{
	"epoch": 0.05971227352073677,
	"grad_norm": 5.026823043823242,
	"learning_rate": 0.00019956234058540195,
	"loss": 0.8501,
	"step": 124
	},
	{
	"epoch": 0.06019382411364594,
	"grad_norm": 4.044717788696289,
	"learning_rate": 0.00019954764146656105,
	"loss": 0.5874,
	"step": 125
	},
	{
	"epoch": 0.06067537470655511,
	"grad_norm": 3.3693103790283203,
	"learning_rate": 0.00019953270012677195,
	"loss": 0.5689,
	"step": 126
	},
	{
	"epoch": 0.061156925299464274,
	"grad_norm": 3.7752881050109863,
	"learning_rate": 0.00019951751660239015,
	"loss": 0.9354,
	"step": 127
	},
	{
	"epoch": 0.061638475892373444,
	"grad_norm": 4.32492208480835,
	"learning_rate": 0.00019950209093036052,
	"loss": 0.8582,
	"step": 128
	},
	{
	"epoch": 0.06212002648528261,
	"grad_norm": 4.800130367279053,
	"learning_rate": 0.000199486423148217,
	"loss": 1.0017,
	"step": 129
	},
	{
	"epoch": 0.06260157707819178,
	"grad_norm": 2.9463722705841064,
	"learning_rate": 0.00019947051329408276,
	"loss": 1.076,
	"step": 130
	},
	{
	"epoch": 0.06308312767110094,
	"grad_norm": 1.6262086629867554,
	"learning_rate": 0.00019945436140666981,
	"loss": 1.2582,
	"step": 131
	},
	{
	"epoch": 0.06356467826401012,
	"grad_norm": 7.490293979644775,
	"learning_rate": 0.0001994379675252793,
	"loss": 1.4111,
	"step": 132
	},
	{
	"epoch": 0.06404622885691928,
	"grad_norm": 2.8741366863250732,
	"learning_rate": 0.00019942133168980103,
	"loss": 1.4064,
	"step": 133
	},
	{
	"epoch": 0.06452777944982845,
	"grad_norm": 4.327383995056152,
	"learning_rate": 0.00019940445394071355,
	"loss": 1.0118,
	"step": 134
	},
	{
	"epoch": 0.06500933004273761,
	"grad_norm": 2.9505062103271484,
	"learning_rate": 0.0001993873343190842,
	"loss": 0.9643,
	"step": 135
	},
	{
	"epoch": 0.06549088063564679,
	"grad_norm": 3.9040603637695312,
	"learning_rate": 0.00019936997286656855,
	"loss": 1.075,
	"step": 136
	},
	{
	"epoch": 0.06597243122855595,
	"grad_norm": 4.2163896560668945,
	"learning_rate": 0.00019935236962541092,
	"loss": 1.1182,
	"step": 137
	},
	{
	"epoch": 0.06645398182146511,
	"grad_norm": 3.8160250186920166,
	"learning_rate": 0.00019933452463844376,
	"loss": 1.1374,
	"step": 138
	},
	{
	"epoch": 0.06693553241437429,
	"grad_norm": 4.615425109863281,
	"learning_rate": 0.00019931643794908772,
	"loss": 0.7229,
	"step": 139
	},
	{
	"epoch": 0.06741708300728345,
	"grad_norm": 5.407428741455078,
	"learning_rate": 0.00019929810960135172,
	"loss": 1.258,
	"step": 140
	},
	{
	"epoch": 0.06789863360019262,
	"grad_norm": 2.296764612197876,
	"learning_rate": 0.00019927953963983254,
	"loss": 0.8528,
	"step": 141
	},
	{
	"epoch": 0.06838018419310178,
	"grad_norm": 2.644909381866455,
	"learning_rate": 0.00019926072810971492,
	"loss": 1.2323,
	"step": 142
	},
	{
	"epoch": 0.06886173478601096,
	"grad_norm": 3.273026943206787,
	"learning_rate": 0.00019924167505677137,
	"loss": 1.1116,
	"step": 143
	},
	{
	"epoch": 0.06934328537892012,
	"grad_norm": 2.8930141925811768,
	"learning_rate": 0.00019922238052736215,
	"loss": 0.5674,
	"step": 144
	},
	{
	"epoch": 0.06982483597182929,
	"grad_norm": 11.064878463745117,
	"learning_rate": 0.00019920284456843498,
	"loss": 1.054,
	"step": 145
	},
	{
	"epoch": 0.07030638656473846,
	"grad_norm": 7.081421375274658,
	"learning_rate": 0.00019918306722752505,
	"loss": 1.2431,
	"step": 146
	},
	{
	"epoch": 0.07078793715764763,
	"grad_norm": 7.764263153076172,
	"learning_rate": 0.00019916304855275497,
	"loss": 1.4613,
	"step": 147
	},
	{
	"epoch": 0.07126948775055679,
	"grad_norm": 8.821840286254883,
	"learning_rate": 0.00019914278859283445,
	"loss": 1.0356,
	"step": 148
	},
	{
	"epoch": 0.07175103834346597,
	"grad_norm": 4.817983150482178,
	"learning_rate": 0.0001991222873970604,
	"loss": 0.6992,
	"step": 149
	},
	{
	"epoch": 0.07223258893637513,
	"grad_norm": 2.4728589057922363,
	"learning_rate": 0.00019910154501531663,
	"loss": 0.7112,
	"step": 150
	},
	{
	"epoch": 0.0727141395292843,
	"grad_norm": 9.176413536071777,
	"learning_rate": 0.0001990805614980739,
	"loss": 0.8131,
	"step": 151
	},
	{
	"epoch": 0.07319569012219346,
	"grad_norm": 4.006031036376953,
	"learning_rate": 0.00019905933689638955,
	"loss": 0.9721,
	"step": 152
	},
	{
	"epoch": 0.07367724071510263,
	"grad_norm": 3.409881114959717,
	"learning_rate": 0.00019903787126190772,
	"loss": 0.9142,
	"step": 153
	},
	{
	"epoch": 0.0741587913080118,
	"grad_norm": 4.4478373527526855,
	"learning_rate": 0.00019901616464685888,
	"loss": 0.9991,
	"step": 154
	},
	{
	"epoch": 0.07464034190092096,
	"grad_norm": 5.569812774658203,
	"learning_rate": 0.00019899421710405996,
	"loss": 0.9713,
	"step": 155
	},
	{
	"epoch": 0.07512189249383014,
	"grad_norm": 3.1611955165863037,
	"learning_rate": 0.00019897202868691407,
	"loss": 0.839,
	"step": 156
	},
	{
	"epoch": 0.0756034430867393,
	"grad_norm": 3.547825574874878,
	"learning_rate": 0.00019894959944941038,
	"loss": 0.6568,
	"step": 157
	},
	{
	"epoch": 0.07608499367964847,
	"grad_norm": 4.14447546005249,
	"learning_rate": 0.0001989269294461242,
	"loss": 0.7445,
	"step": 158
	},
	{
	"epoch": 0.07656654427255763,
	"grad_norm": 3.5642924308776855,
	"learning_rate": 0.0001989040187322164,
	"loss": 1.1176,
	"step": 159
	},
	{
	"epoch": 0.0770480948654668,
	"grad_norm": 3.2829089164733887,
	"learning_rate": 0.00019888086736343384,
	"loss": 1.2296,
	"step": 160
	},
	{
	"epoch": 0.07752964545837597,
	"grad_norm": 3.4885025024414062,
	"learning_rate": 0.0001988574753961087,
	"loss": 0.827,
	"step": 161
	},
	{
	"epoch": 0.07801119605128513,
	"grad_norm": 4.431697845458984,
	"learning_rate": 0.00019883384288715874,
	"loss": 0.9405,
	"step": 162
	},
	{
	"epoch": 0.07849274664419431,
	"grad_norm": 4.44993782043457,
	"learning_rate": 0.000198809969894087,
	"loss": 0.9186,
	"step": 163
	},
	{
	"epoch": 0.07897429723710347,
	"grad_norm": 1.9886109828948975,
	"learning_rate": 0.0001987858564749816,
	"loss": 1.2175,
	"step": 164
	},
	{
	"epoch": 0.07945584783001264,
	"grad_norm": 1.904360055923462,
	"learning_rate": 0.00019876150268851572,
	"loss": 0.9003,
	"step": 165
	},
	{
	"epoch": 0.0799373984229218,
	"grad_norm": 1.732532024383545,
	"learning_rate": 0.00019873690859394737,
	"loss": 0.8792,
	"step": 166
	},
	{
	"epoch": 0.08041894901583098,
	"grad_norm": 5.374270439147949,
	"learning_rate": 0.0001987120742511193,
	"loss": 1.2873,
	"step": 167
	},
	{
	"epoch": 0.08090049960874014,
	"grad_norm": 4.2816572189331055,
	"learning_rate": 0.0001986869997204589,
	"loss": 0.855,
	"step": 168
	},
	{
	"epoch": 0.0813820502016493,
	"grad_norm": 2.6494359970092773,
	"learning_rate": 0.00019866168506297788,
	"loss": 0.9457,
	"step": 169
	},
	{
	"epoch": 0.08186360079455848,
	"grad_norm": 4.345731258392334,
	"learning_rate": 0.00019863613034027224,
	"loss": 0.8582,
	"step": 170
	},
	{
	"epoch": 0.08234515138746765,
	"grad_norm": 4.3999342918396,
	"learning_rate": 0.00019861033561452223,
	"loss": 0.7079,
	"step": 171
	},
	{
	"epoch": 0.08282670198037681,
	"grad_norm": 4.363504409790039,
	"learning_rate": 0.00019858430094849195,
	"loss": 0.808,
	"step": 172
	},
	{
	"epoch": 0.08330825257328599,
	"grad_norm": 2.4355199337005615,
	"learning_rate": 0.0001985580264055294,
	"loss": 0.833,
	"step": 173
	},
	{
	"epoch": 0.08378980316619515,
	"grad_norm": 3.622019052505493,
	"learning_rate": 0.00019853151204956616,
	"loss": 0.8358,
	"step": 174
	},
	{
	"epoch": 0.08427135375910431,
	"grad_norm": 4.48416805267334,
	"learning_rate": 0.00019850475794511749,
	"loss": 0.9484,
	"step": 175
	},
	{
	"epoch": 0.08475290435201348,
	"grad_norm": 4.607672214508057,
	"learning_rate": 0.00019847776415728185,
	"loss": 0.7357,
	"step": 176
	},
	{
	"epoch": 0.08523445494492266,
	"grad_norm": 2.3138136863708496,
	"learning_rate": 0.000198450530751741,
	"loss": 1.2186,
	"step": 177
	},
	{
	"epoch": 0.08571600553783182,
	"grad_norm": 3.8758018016815186,
	"learning_rate": 0.00019842305779475968,
	"loss": 0.9932,
	"step": 178
	},
	{
	"epoch": 0.08619755613074098,
	"grad_norm": 3.34867000579834,
	"learning_rate": 0.00019839534535318558,
	"loss": 1.1485,
	"step": 179
	},
	{
	"epoch": 0.08667910672365016,
	"grad_norm": 1.9022514820098877,
	"learning_rate": 0.00019836739349444899,
	"loss": 0.7949,
	"step": 180
	},
	{
	"epoch": 0.08716065731655932,
	"grad_norm": 3.0250067710876465,
	"learning_rate": 0.00019833920228656292,
	"loss": 0.9396,
	"step": 181
	},
	{
	"epoch": 0.08764220790946849,
	"grad_norm": 3.734602689743042,
	"learning_rate": 0.0001983107717981226,
	"loss": 1.1539,
	"step": 182
	},
	{
	"epoch": 0.08812375850237765,
	"grad_norm": 2.532639741897583,
	"learning_rate": 0.00019828210209830562,
	"loss": 0.836,
	"step": 183
	},
	{
	"epoch": 0.08860530909528683,
	"grad_norm": 3.711615800857544,
	"learning_rate": 0.00019825319325687154,
	"loss": 0.9482,
	"step": 184
	},
	{
	"epoch": 0.08908685968819599,
	"grad_norm": 4.490362167358398,
	"learning_rate": 0.00019822404534416182,
	"loss": 1.0918,
	"step": 185
	},
	{
	"epoch": 0.08956841028110515,
	"grad_norm": 2.6988461017608643,
	"learning_rate": 0.00019819465843109963,
	"loss": 0.9532,
	"step": 186
	},
	{
	"epoch": 0.09004996087401433,
	"grad_norm": 4.821238040924072,
	"learning_rate": 0.00019816503258918969,
	"loss": 0.6697,
	"step": 187
	},
	{
	"epoch": 0.0905315114669235,
	"grad_norm": 5.357283592224121,
	"learning_rate": 0.00019813516789051808,
	"loss": 0.8587,
	"step": 188
	},
	{
	"epoch": 0.09101306205983266,
	"grad_norm": 4.648171901702881,
	"learning_rate": 0.0001981050644077521,
	"loss": 1.11,
	"step": 189
	},
	{
	"epoch": 0.09149461265274182,
	"grad_norm": 3.6666064262390137,
	"learning_rate": 0.00019807472221414002,
	"loss": 0.6605,
	"step": 190
	},
	{
	"epoch": 0.091976163245651,
	"grad_norm": 5.507065773010254,
	"learning_rate": 0.00019804414138351094,
	"loss": 0.9696,
	"step": 191
	},
	{
	"epoch": 0.09245771383856016,
	"grad_norm": 2.8789749145507812,
	"learning_rate": 0.00019801332199027467,
	"loss": 1.1445,
	"step": 192
	},
	{
	"epoch": 0.09293926443146933,
	"grad_norm": 3.538658380508423,
	"learning_rate": 0.00019798226410942146,
	"loss": 1.2154,
	"step": 193
	},
	{
	"epoch": 0.0934208150243785,
	"grad_norm": 5.083014011383057,
	"learning_rate": 0.00019795096781652182,
	"loss": 1.3403,
	"step": 194
	},
	{
	"epoch": 0.09390236561728767,
	"grad_norm": 4.016115665435791,
	"learning_rate": 0.00019791943318772643,
	"loss": 1.0433,
	"step": 195
	},
	{
	"epoch": 0.09438391621019683,
	"grad_norm": 3.3291447162628174,
	"learning_rate": 0.00019788766029976587,
	"loss": 1.0221,
	"step": 196
	},
	{
	"epoch": 0.09486546680310601,
	"grad_norm": 4.776575565338135,
	"learning_rate": 0.0001978556492299504,
	"loss": 0.9671,
	"step": 197
	},
	{
	"epoch": 0.09534701739601517,
	"grad_norm": 2.6845924854278564,
	"learning_rate": 0.00019782340005616996,
	"loss": 0.9646,
	"step": 198
	},
	{
	"epoch": 0.09582856798892433,
	"grad_norm": 3.3309099674224854,
	"learning_rate": 0.0001977909128568937,
	"loss": 0.8776,
	"step": 199
	},
	{
	"epoch": 0.0963101185818335,
	"grad_norm": 5.5021586418151855,
	"learning_rate": 0.00019775818771117,
	"loss": 1.0165,
	"step": 200
	},
	{
	"epoch": 0.09679166917474268,
	"grad_norm": 4.549814224243164,
	"learning_rate": 0.00019772522469862626,
	"loss": 0.803,
	"step": 201
	},
	{
	"epoch": 0.09727321976765184,
	"grad_norm": 4.1988067626953125,
	"learning_rate": 0.00019769202389946863,
	"loss": 1.2152,
	"step": 202
	},
	{
	"epoch": 0.097754770360561,
	"grad_norm": 2.6205949783325195,
	"learning_rate": 0.0001976585853944818,
	"loss": 0.8637,
	"step": 203
	},
	{
	"epoch": 0.09823632095347018,
	"grad_norm": 2.774397850036621,
	"learning_rate": 0.0001976249092650289,
	"loss": 1.0912,
	"step": 204
	},
	{
	"epoch": 0.09871787154637934,
	"grad_norm": 1.7028182744979858,
	"learning_rate": 0.00019759099559305124,
	"loss": 0.8919,
	"step": 205
	},
	{
	"epoch": 0.0991994221392885,
	"grad_norm": 3.3020026683807373,
	"learning_rate": 0.00019755684446106812,
	"loss": 1.0172,
	"step": 206
	},
	{
	"epoch": 0.09968097273219767,
	"grad_norm": 3.2804148197174072,
	"learning_rate": 0.00019752245595217662,
	"loss": 1.3593,
	"step": 207
	},
	{
	"epoch": 0.10016252332510685,
	"grad_norm": 2.095794677734375,
	"learning_rate": 0.00019748783015005144,
	"loss": 1.0033,
	"step": 208
	},
	{
	"epoch": 0.10064407391801601,
	"grad_norm": 5.280991077423096,
	"learning_rate": 0.00019745296713894465,
	"loss": 0.8974,
	"step": 209
	},
	{
	"epoch": 0.10112562451092517,
	"grad_norm": 2.9938979148864746,
	"learning_rate": 0.00019741786700368548,
	"loss": 0.8715,
	"step": 210
	},
	{
	"epoch": 0.10160717510383435,
	"grad_norm": 6.764945030212402,
	"learning_rate": 0.00019738252982968017,
	"loss": 0.8115,
	"step": 211
	},
	{
	"epoch": 0.10208872569674352,
	"grad_norm": 2.588499069213867,
	"learning_rate": 0.00019734695570291168,
	"loss": 0.9158,
	"step": 212
	},
	{
	"epoch": 0.10257027628965268,
	"grad_norm": 3.7589855194091797,
	"learning_rate": 0.00019731114470993962,
	"loss": 1.2896,
	"step": 213
	},
	{
	"epoch": 0.10305182688256186,
	"grad_norm": 6.584591865539551,
	"learning_rate": 0.0001972750969378998,
	"loss": 1.2588,
	"step": 214
	},
	{
	"epoch": 0.10353337747547102,
	"grad_norm": 4.74730110168457,
	"learning_rate": 0.00019723881247450434,
	"loss": 0.8416,
	"step": 215
	},
	{
	"epoch": 0.10401492806838018,
	"grad_norm": 3.6451597213745117,
	"learning_rate": 0.0001972022914080411,
	"loss": 1.3575,
	"step": 216
	},
	{
	"epoch": 0.10449647866128935,
	"grad_norm": 4.290178298950195,
	"learning_rate": 0.00019716553382737379,
	"loss": 0.8757,
	"step": 217
	},
	{
	"epoch": 0.10497802925419852,
	"grad_norm": 4.238255023956299,
	"learning_rate": 0.00019712853982194152,
	"loss": 0.928,
	"step": 218
	},
	{
	"epoch": 0.10545957984710769,
	"grad_norm": 4.077386856079102,
	"learning_rate": 0.00019709130948175876,
	"loss": 1.1232,
	"step": 219
	},
	{
	"epoch": 0.10594113044001685,
	"grad_norm": 2.5770697593688965,
	"learning_rate": 0.0001970538428974149,
	"loss": 0.867,
	"step": 220
	},
	{
	"epoch": 0.10642268103292603,
	"grad_norm": 3.7551944255828857,
	"learning_rate": 0.00019701614016007436,
	"loss": 1.0874,
	"step": 221
	},
	{
	"epoch": 0.10690423162583519,
	"grad_norm": 3.312821626663208,
	"learning_rate": 0.00019697820136147597,
	"loss": 0.7091,
	"step": 222
	},
	{
	"epoch": 0.10738578221874436,
	"grad_norm": 2.8703348636627197,
	"learning_rate": 0.00019694002659393305,
	"loss": 0.8432,
	"step": 223
	},
	{
	"epoch": 0.10786733281165352,
	"grad_norm": 2.452773332595825,
	"learning_rate": 0.0001969016159503331,
	"loss": 1.2779,
	"step": 224
	},
	{
	"epoch": 0.1083488834045627,
	"grad_norm": 2.704692840576172,
	"learning_rate": 0.00019686296952413747,
	"loss": 0.651,
	"step": 225
	},
	{
	"epoch": 0.10883043399747186,
	"grad_norm": 3.8419394493103027,
	"learning_rate": 0.0001968240874093813,
	"loss": 0.741,
	"step": 226
	},
	{
	"epoch": 0.10931198459038102,
	"grad_norm": 4.401157379150391,
	"learning_rate": 0.00019678496970067325,
	"loss": 0.8972,
	"step": 227
	},
	{
	"epoch": 0.1097935351832902,
	"grad_norm": 3.6739308834075928,
	"learning_rate": 0.0001967456164931951,
	"loss": 0.4634,
	"step": 228
	},
	{
	"epoch": 0.11027508577619936,
	"grad_norm": 5.284419059753418,
	"learning_rate": 0.0001967060278827017,
	"loss": 0.9738,
	"step": 229
	},
	{
	"epoch": 0.11075663636910853,
	"grad_norm": 4.1912360191345215,
	"learning_rate": 0.00019666620396552076,
	"loss": 1.0792,
	"step": 230
	},
	{
	"epoch": 0.11123818696201769,
	"grad_norm": 3.6482601165771484,
	"learning_rate": 0.00019662614483855246,
	"loss": 1.3046,
	"step": 231
	},
	{
	"epoch": 0.11171973755492687,
	"grad_norm": 6.06255578994751,
	"learning_rate": 0.00019658585059926934,
	"loss": 0.796,
	"step": 232
	},
	{
	"epoch": 0.11220128814783603,
	"grad_norm": 2.2104992866516113,
	"learning_rate": 0.00019654532134571594,
	"loss": 0.8634,
	"step": 233
	},
	{
	"epoch": 0.1126828387407452,
	"grad_norm": 5.140758991241455,
	"learning_rate": 0.00019650455717650878,
	"loss": 0.6197,
	"step": 234
	},
	{
	"epoch": 0.11316438933365437,
	"grad_norm": 3.7386419773101807,
	"learning_rate": 0.00019646355819083589,
	"loss": 0.8885,
	"step": 235
	},
	{
	"epoch": 0.11364593992656354,
	"grad_norm": 4.816273212432861,
	"learning_rate": 0.0001964223244884566,
	"loss": 0.9825,
	"step": 236
	},
	{
	"epoch": 0.1141274905194727,
	"grad_norm": 5.175439834594727,
	"learning_rate": 0.00019638085616970153,
	"loss": 1.0349,
	"step": 237
	},
	{
	"epoch": 0.11460904111238188,
	"grad_norm": 7.430823802947998,
	"learning_rate": 0.00019633915333547202,
	"loss": 0.7472,
	"step": 238
	},
	{
	"epoch": 0.11509059170529104,
	"grad_norm": 5.103323936462402,
	"learning_rate": 0.00019629721608724004,
	"loss": 1.0827,
	"step": 239
	},
	{
	"epoch": 0.1155721422982002,
	"grad_norm": 3.144728660583496,
	"learning_rate": 0.0001962550445270481,
	"loss": 0.6904,
	"step": 240
	},
	{
	"epoch": 0.11605369289110937,
	"grad_norm": 3.113306760787964,
	"learning_rate": 0.00019621263875750864,
	"loss": 1.2587,
	"step": 241
	},
	{
	"epoch": 0.11653524348401854,
	"grad_norm": 3.700697898864746,
	"learning_rate": 0.00019616999888180406,
	"loss": 0.8636,
	"step": 242
	},
	{
	"epoch": 0.11701679407692771,
	"grad_norm": 3.9976747035980225,
	"learning_rate": 0.0001961271250036865,
	"loss": 0.8097,
	"step": 243
	},
	{
	"epoch": 0.11749834466983687,
	"grad_norm": 3.022249937057495,
	"learning_rate": 0.0001960840172274773,
	"loss": 0.616,
	"step": 244
	},
	{
	"epoch": 0.11797989526274605,
	"grad_norm": 5.003868579864502,
	"learning_rate": 0.00019604067565806704,
	"loss": 0.9634,
	"step": 245
	},
	{
	"epoch": 0.11846144585565521,
	"grad_norm": 3.217082977294922,
	"learning_rate": 0.00019599710040091512,
	"loss": 0.8464,
	"step": 246
	},
	{
	"epoch": 0.11894299644856438,
	"grad_norm": 3.279885768890381,
	"learning_rate": 0.00019595329156204955,
	"loss": 0.9137,
	"step": 247
	},
	{
	"epoch": 0.11942454704147354,
	"grad_norm": 7.89387845993042,
	"learning_rate": 0.00019590924924806676,
	"loss": 0.7351,
	"step": 248
	},
	{
	"epoch": 0.11990609763438272,
	"grad_norm": 3.444643020629883,
	"learning_rate": 0.0001958649735661312,
	"loss": 1.1217,
	"step": 249
	},
	{
	"epoch": 0.12038764822729188,
	"grad_norm": 3.371429443359375,
	"learning_rate": 0.00019582046462397515,
	"loss": 0.6736,
	"step": 250
	},
	{
	"epoch": 0.12086919882020104,
	"grad_norm": 2.9329636096954346,
	"learning_rate": 0.00019577572252989854,
	"loss": 0.91,
	"step": 251
	},
	{
	"epoch": 0.12135074941311022,
	"grad_norm": 4.010715007781982,
	"learning_rate": 0.00019573074739276858,
	"loss": 1.1179,
	"step": 252
	},
	{
	"epoch": 0.12183230000601938,
	"grad_norm": 3.4689531326293945,
	"learning_rate": 0.00019568553932201947,
	"loss": 0.8237,
	"step": 253
	},
	{
	"epoch": 0.12231385059892855,
	"grad_norm": 2.893638849258423,
	"learning_rate": 0.00019564009842765225,
	"loss": 1.3797,
	"step": 254
	},
	{
	"epoch": 0.12279540119183771,
	"grad_norm": 3.4823315143585205,
	"learning_rate": 0.00019559442482023444,
	"loss": 1.0072,
	"step": 255
	},
	{
	"epoch": 0.12327695178474689,
	"grad_norm": 4.071844100952148,
	"learning_rate": 0.0001955485186108998,
	"loss": 0.9674,
	"step": 256
	},
	{
	"epoch": 0.12375850237765605,
	"grad_norm": 3.1438372135162354,
	"learning_rate": 0.00019550237991134805,
	"loss": 0.8143,
	"step": 257
	},
	{
	"epoch": 0.12424005297056522,
	"grad_norm": 1.9269888401031494,
	"learning_rate": 0.00019545600883384467,
	"loss": 0.9445,
	"step": 258
	},
	{
	"epoch": 0.12472160356347439,
	"grad_norm": 4.249316215515137,
	"learning_rate": 0.0001954094054912205,
	"loss": 0.9279,
	"step": 259
	},
	{
	"epoch": 0.12520315415638356,
	"grad_norm": 2.100592851638794,
	"learning_rate": 0.00019536256999687157,
	"loss": 0.566,
	"step": 260
	},
	{
	"epoch": 0.12568470474929272,
	"grad_norm": 2.685699462890625,
	"learning_rate": 0.00019531550246475876,
	"loss": 0.6129,
	"step": 261
	},
	{
	"epoch": 0.12616625534220188,
	"grad_norm": 4.864408493041992,
	"learning_rate": 0.00019526820300940756,
	"loss": 1.4781,
	"step": 262
	},
	{
	"epoch": 0.12664780593511105,
	"grad_norm": 5.040529727935791,
	"learning_rate": 0.00019522067174590778,
	"loss": 0.9867,
	"step": 263
	},
	{
	"epoch": 0.12712935652802024,
	"grad_norm": 4.969310760498047,
	"learning_rate": 0.00019517290878991324,
	"loss": 0.8467,
	"step": 264
	},
	{
	"epoch": 0.1276109071209294,
	"grad_norm": 4.031760215759277,
	"learning_rate": 0.0001951249142576416,
	"loss": 1.3096,
	"step": 265
	},
	{
	"epoch": 0.12809245771383856,
	"grad_norm": 4.020783424377441,
	"learning_rate": 0.00019507668826587387,
	"loss": 1.1767,
	"step": 266
	},
	{
	"epoch": 0.12857400830674773,
	"grad_norm": 4.138341426849365,
	"learning_rate": 0.0001950282309319544,
	"loss": 1.2766,
	"step": 267
	},
	{
	"epoch": 0.1290555588996569,
	"grad_norm": 2.7963201999664307,
	"learning_rate": 0.0001949795423737903,
	"loss": 0.9197,
	"step": 268
	},
	{
	"epoch": 0.12953710949256605,
	"grad_norm": 4.995326042175293,
	"learning_rate": 0.00019493062270985144,
	"loss": 1.0874,
	"step": 269
	},
	{
	"epoch": 0.13001866008547522,
	"grad_norm": 2.315774917602539,
	"learning_rate": 0.00019488147205916985,
	"loss": 0.7577,
	"step": 270
	},
	{
	"epoch": 0.1305002106783844,
	"grad_norm": 6.237738132476807,
	"learning_rate": 0.00019483209054133976,
	"loss": 1.0659,
	"step": 271
	},
	{
	"epoch": 0.13098176127129357,
	"grad_norm": 2.8713154792785645,
	"learning_rate": 0.00019478247827651708,
	"loss": 0.965,
	"step": 272
	},
	{
	"epoch": 0.13146331186420274,
	"grad_norm": 3.7494618892669678,
	"learning_rate": 0.00019473263538541914,
	"loss": 0.708,
	"step": 273
	},
	{
	"epoch": 0.1319448624571119,
	"grad_norm": 5.115126132965088,
	"learning_rate": 0.00019468256198932455,
	"loss": 1.1001,
	"step": 274
	},
	{
	"epoch": 0.13242641305002106,
	"grad_norm": 1.991768479347229,
	"learning_rate": 0.00019463225821007268,
	"loss": 1.5559,
	"step": 275
	},
	{
	"epoch": 0.13290796364293023,
	"grad_norm": 2.3148343563079834,
	"learning_rate": 0.00019458172417006347,
	"loss": 0.7299,
	"step": 276
	},
	{
	"epoch": 0.1333895142358394,
	"grad_norm": 5.019519329071045,
	"learning_rate": 0.00019453095999225726,
	"loss": 0.9006,
	"step": 277
	},
	{
	"epoch": 0.13387106482874858,
	"grad_norm": 2.933354616165161,
	"learning_rate": 0.0001944799658001742,
	"loss": 0.8045,
	"step": 278
	},
	{
	"epoch": 0.13435261542165775,
	"grad_norm": 3.3015851974487305,
	"learning_rate": 0.00019442874171789418,
	"loss": 0.8641,
	"step": 279
	},
	{
	"epoch": 0.1348341660145669,
	"grad_norm": 3.103376626968384,
	"learning_rate": 0.00019437728787005657,
	"loss": 1.2227,
	"step": 280
	},
	{
	"epoch": 0.13531571660747607,
	"grad_norm": 3.754467725753784,
	"learning_rate": 0.00019432560438185963,
	"loss": 0.6292,
	"step": 281
	},
	{
	"epoch": 0.13579726720038524,
	"grad_norm": 3.083435297012329,
	"learning_rate": 0.00019427369137906046,
	"loss": 1.0215,
	"step": 282
	},
	{
	"epoch": 0.1362788177932944,
	"grad_norm": 2.066274404525757,
	"learning_rate": 0.00019422154898797472,
	"loss": 0.9194,
	"step": 283
	},
	{
	"epoch": 0.13676036838620356,
	"grad_norm": 2.431302785873413,
	"learning_rate": 0.00019416917733547603,
	"loss": 1.2826,
	"step": 284
	},
	{
	"epoch": 0.13724191897911275,
	"grad_norm": 2.993353843688965,
	"learning_rate": 0.00019411657654899597,
	"loss": 0.8271,
	"step": 285
	},
	{
	"epoch": 0.13772346957202192,
	"grad_norm": 5.987199783325195,
	"learning_rate": 0.0001940637467565237,
	"loss": 1.0192,
	"step": 286
	},
	{
	"epoch": 0.13820502016493108,
	"grad_norm": 1.7869144678115845,
	"learning_rate": 0.00019401068808660546,
	"loss": 1.1415,
	"step": 287
	},
	{
	"epoch": 0.13868657075784024,
	"grad_norm": 1.75114905834198,
	"learning_rate": 0.0001939574006683445,
	"loss": 0.9718,
	"step": 288
	},
	{
	"epoch": 0.1391681213507494,
	"grad_norm": 3.034210443496704,
	"learning_rate": 0.00019390388463140065,
	"loss": 0.4041,
	"step": 289
	},
	{
	"epoch": 0.13964967194365857,
	"grad_norm": 7.397146224975586,
	"learning_rate": 0.00019385014010598998,
	"loss": 0.9913,
	"step": 290
	},
	{
	"epoch": 0.14013122253656773,
	"grad_norm": 2.123467206954956,
	"learning_rate": 0.00019379616722288456,
	"loss": 0.8688,
	"step": 291
	},
	{
	"epoch": 0.14061277312947693,
	"grad_norm": 3.545257329940796,
	"learning_rate": 0.0001937419661134121,
	"loss": 1.1841,
	"step": 292
	},
	{
	"epoch": 0.1410943237223861,
	"grad_norm": 3.6970181465148926,
	"learning_rate": 0.0001936875369094556,
	"loss": 1.241,
	"step": 293
	},
	{
	"epoch": 0.14157587431529525,
	"grad_norm": 2.9844279289245605,
	"learning_rate": 0.0001936328797434531,
	"loss": 0.9453,
	"step": 294
	},
	{
	"epoch": 0.14205742490820442,
	"grad_norm": 1.6827529668807983,
	"learning_rate": 0.00019357799474839735,
	"loss": 0.9734,
	"step": 295
	},
	{
	"epoch": 0.14253897550111358,
	"grad_norm": 3.7799909114837646,
	"learning_rate": 0.00019352288205783536,
	"loss": 0.7606,
	"step": 296
	},
	{
	"epoch": 0.14302052609402274,
	"grad_norm": 3.0586202144622803,
	"learning_rate": 0.00019346754180586825,
	"loss": 0.6152,
	"step": 297
	},
	{
	"epoch": 0.14350207668693193,
	"grad_norm": 1.6172605752944946,
	"learning_rate": 0.00019341197412715082,
	"loss": 0.7054,
	"step": 298
	},
	{
	"epoch": 0.1439836272798411,
	"grad_norm": 2.8775248527526855,
	"learning_rate": 0.00019335617915689128,
	"loss": 1.1397,
	"step": 299
	},
	{
	"epoch": 0.14446517787275026,
	"grad_norm": 3.2396507263183594,
	"learning_rate": 0.00019330015703085082,
	"loss": 0.6854,
	"step": 300
	},
	{
	"epoch": 0.14494672846565942,
	"grad_norm": 4.7253289222717285,
	"learning_rate": 0.00019324390788534343,
	"loss": 0.9446,
	"step": 301
	},
	{
	"epoch": 0.1454282790585686,
	"grad_norm": 5.362252712249756,
	"learning_rate": 0.00019318743185723546,
	"loss": 0.8447,
	"step": 302
	},
	{
	"epoch": 0.14590982965147775,
	"grad_norm": 2.6400959491729736,
	"learning_rate": 0.00019313072908394525,
	"loss": 0.8309,
	"step": 303
	},
	{
	"epoch": 0.14639138024438691,
	"grad_norm": 2.0676944255828857,
	"learning_rate": 0.00019307379970344294,
	"loss": 1.0386,
	"step": 304
	},
	{
	"epoch": 0.1468729308372961,
	"grad_norm": 2.806190013885498,
	"learning_rate": 0.00019301664385425004,
	"loss": 0.8906,
	"step": 305
	},
	{
	"epoch": 0.14735448143020527,
	"grad_norm": 2.275996446609497,
	"learning_rate": 0.0001929592616754391,
	"loss": 0.6389,
	"step": 306
	},
	{
	"epoch": 0.14783603202311443,
	"grad_norm": 1.520571231842041,
	"learning_rate": 0.00019290165330663336,
	"loss": 1.0456,
	"step": 307
	},
	{
	"epoch": 0.1483175826160236,
	"grad_norm": 2.8603453636169434,
	"learning_rate": 0.00019284381888800647,
	"loss": 0.9411,
	"step": 308
	},
	{
	"epoch": 0.14879913320893276,
	"grad_norm": 3.3105409145355225,
	"learning_rate": 0.00019278575856028206,
	"loss": 0.9477,
	"step": 309
	},
	{
	"epoch": 0.14928068380184192,
	"grad_norm": 2.5645644664764404,
	"learning_rate": 0.00019272747246473345,
	"loss": 0.54,
	"step": 310
	},
	{
	"epoch": 0.1497622343947511,
	"grad_norm": 10.714997291564941,
	"learning_rate": 0.00019266896074318334,
	"loss": 1.3273,
	"step": 311
	},
	{
	"epoch": 0.15024378498766028,
	"grad_norm": 3.5276308059692383,
	"learning_rate": 0.00019261022353800344,
	"loss": 0.9848,
	"step": 312
	},
	{
	"epoch": 0.15072533558056944,
	"grad_norm": 4.048092365264893,
	"learning_rate": 0.00019255126099211402,
	"loss": 0.7335,
	"step": 313
	},
	{
	"epoch": 0.1512068861734786,
	"grad_norm": 1.8157590627670288,
	"learning_rate": 0.00019249207324898376,
	"loss": 1.0381,
	"step": 314
	},
	{
	"epoch": 0.15168843676638777,
	"grad_norm": 3.7452712059020996,
	"learning_rate": 0.0001924326604526292,
	"loss": 0.6327,
	"step": 315
	},
	{
	"epoch": 0.15216998735929693,
	"grad_norm": 3.773587942123413,
	"learning_rate": 0.00019237302274761458,
	"loss": 0.5525,
	"step": 316
	},
	{
	"epoch": 0.1526515379522061,
	"grad_norm": 3.6880061626434326,
	"learning_rate": 0.0001923131602790513,
	"loss": 1.2273,
	"step": 317
	},
	{
	"epoch": 0.15313308854511526,
	"grad_norm": 3.083782196044922,
	"learning_rate": 0.00019225307319259768,
	"loss": 1.0963,
	"step": 318
	},
	{
	"epoch": 0.15361463913802445,
	"grad_norm": 5.022973537445068,
	"learning_rate": 0.00019219276163445862,
	"loss": 0.7697,
	"step": 319
	},
	{
	"epoch": 0.1540961897309336,
	"grad_norm": 3.2828280925750732,
	"learning_rate": 0.00019213222575138522,
	"loss": 1.069,
	"step": 320
	},
	{
	"epoch": 0.15457774032384278,
	"grad_norm": 4.293641567230225,
	"learning_rate": 0.00019207146569067435,
	"loss": 0.6866,
	"step": 321
	},
	{
	"epoch": 0.15505929091675194,
	"grad_norm": 4.453739166259766,
	"learning_rate": 0.00019201048160016838,
	"loss": 0.939,
	"step": 322
	},
	{
	"epoch": 0.1555408415096611,
	"grad_norm": 2.3290882110595703,
	"learning_rate": 0.00019194927362825478,
	"loss": 0.8467,
	"step": 323
	},
	{
	"epoch": 0.15602239210257027,
	"grad_norm": 4.609375476837158,
	"learning_rate": 0.00019188784192386587,
	"loss": 1.064,
	"step": 324
	},
	{
	"epoch": 0.15650394269547943,
	"grad_norm": 2.483145236968994,
	"learning_rate": 0.00019182618663647817,
	"loss": 0.6174,
	"step": 325
	},
	{
	"epoch": 0.15698549328838862,
	"grad_norm": 4.485541820526123,
	"learning_rate": 0.0001917643079161124,
	"loss": 0.9137,
	"step": 326
	},
	{
	"epoch": 0.15746704388129779,
	"grad_norm": 4.135148525238037,
	"learning_rate": 0.00019170220591333283,
	"loss": 0.7697,
	"step": 327
	},
	{
	"epoch": 0.15794859447420695,
	"grad_norm": 4.6411333084106445,
	"learning_rate": 0.00019163988077924713,
	"loss": 1.1936,
	"step": 328
	},
	{
	"epoch": 0.1584301450671161,
	"grad_norm": 3.8218069076538086,
	"learning_rate": 0.00019157733266550575,
	"loss": 0.7865,
	"step": 329
	},
	{
	"epoch": 0.15891169566002528,
	"grad_norm": 5.044341564178467,
	"learning_rate": 0.00019151456172430183,
	"loss": 1.0328,
	"step": 330
	},
	{
	"epoch": 0.15939324625293444,
	"grad_norm": 5.211885929107666,
	"learning_rate": 0.0001914515681083707,
	"loss": 1.096,
	"step": 331
	},
	{
	"epoch": 0.1598747968458436,
	"grad_norm": 4.778816223144531,
	"learning_rate": 0.00019138835197098937,
	"loss": 0.7164,
	"step": 332
	},
	{
	"epoch": 0.1603563474387528,
	"grad_norm": 3.1588540077209473,
	"learning_rate": 0.00019132491346597643,
	"loss": 1.1062,
	"step": 333
	},
	{
	"epoch": 0.16083789803166196,
	"grad_norm": 2.6961734294891357,
	"learning_rate": 0.00019126125274769145,
	"loss": 0.7453,
	"step": 334
	},
	{
	"epoch": 0.16131944862457112,
	"grad_norm": 2.4815406799316406,
	"learning_rate": 0.00019119736997103476,
	"loss": 0.9451,
	"step": 335
	},
	{
	"epoch": 0.16180099921748028,
	"grad_norm": 1.7432652711868286,
	"learning_rate": 0.000191133265291447,
	"loss": 0.8675,
	"step": 336
	},
	{
	"epoch": 0.16228254981038945,
	"grad_norm": 2.9783449172973633,
	"learning_rate": 0.00019106893886490864,
	"loss": 1.1438,
	"step": 337
	},
	{
	"epoch": 0.1627641004032986,
	"grad_norm": 3.031538248062134,
	"learning_rate": 0.00019100439084793989,
	"loss": 0.7219,
	"step": 338
	},
	{
	"epoch": 0.1632456509962078,
	"grad_norm": 6.661623001098633,
	"learning_rate": 0.00019093962139759998,
	"loss": 1.3528,
	"step": 339
	},
	{
	"epoch": 0.16372720158911697,
	"grad_norm": 3.402763843536377,
	"learning_rate": 0.000190874630671487,
	"loss": 0.9113,
	"step": 340
	},
	{
	"epoch": 0.16420875218202613,
	"grad_norm": 3.233682632446289,
	"learning_rate": 0.00019080941882773745,
	"loss": 1.1422,
	"step": 341
	},
	{
	"epoch": 0.1646903027749353,
	"grad_norm": 3.8870816230773926,
	"learning_rate": 0.00019074398602502584,
	"loss": 0.6947,
	"step": 342
	},
	{
	"epoch": 0.16517185336784446,
	"grad_norm": 4.976253509521484,
	"learning_rate": 0.00019067833242256442,
	"loss": 1.066,
	"step": 343
	},
	{
	"epoch": 0.16565340396075362,
	"grad_norm": 3.8390371799468994,
	"learning_rate": 0.0001906124581801025,
	"loss": 0.7719,
	"step": 344
	},
	{
	"epoch": 0.16613495455366278,
	"grad_norm": 6.181506633758545,
	"learning_rate": 0.0001905463634579264,
	"loss": 0.6594,
	"step": 345
	},
	{
	"epoch": 0.16661650514657197,
	"grad_norm": 2.2047243118286133,
	"learning_rate": 0.00019048004841685888,
	"loss": 0.8504,
	"step": 346
	},
	{
	"epoch": 0.16709805573948114,
	"grad_norm": 7.163036346435547,
	"learning_rate": 0.00019041351321825883,
	"loss": 1.1279,
	"step": 347
	},
	{
	"epoch": 0.1675796063323903,
	"grad_norm": 3.1026716232299805,
	"learning_rate": 0.00019034675802402068,
	"loss": 1.1557,
	"step": 348
	},
	{
	"epoch": 0.16806115692529947,
	"grad_norm": 2.942558526992798,
	"learning_rate": 0.00019027978299657436,
	"loss": 0.4541,
	"step": 349
	},
	{
	"epoch": 0.16854270751820863,
	"grad_norm": 2.2551536560058594,
	"learning_rate": 0.00019021258829888456,
	"loss": 0.7569,
	"step": 350
	},
	{
	"epoch": 0.1690242581111178,
	"grad_norm": 3.777118682861328,
	"learning_rate": 0.00019014517409445052,
	"loss": 1.0268,
	"step": 351
	},
	{
	"epoch": 0.16950580870402696,
	"grad_norm": 2.533811330795288,
	"learning_rate": 0.00019007754054730554,
	"loss": 0.6828,
	"step": 352
	},
	{
	"epoch": 0.16998735929693615,
	"grad_norm": 3.8600101470947266,
	"learning_rate": 0.00019000968782201675,
	"loss": 0.5744,
	"step": 353
	},
	{
	"epoch": 0.1704689098898453,
	"grad_norm": 3.22756028175354,
	"learning_rate": 0.00018994161608368448,
	"loss": 1.3307,
	"step": 354
	},
	{
	"epoch": 0.17095046048275447,
	"grad_norm": 2.4903550148010254,
	"learning_rate": 0.00018987332549794196,
	"loss": 1.0393,
	"step": 355
	},
	{
	"epoch": 0.17143201107566364,
	"grad_norm": 2.447472333908081,
	"learning_rate": 0.00018980481623095502,
	"loss": 0.7863,
	"step": 356
	},
	{
	"epoch": 0.1719135616685728,
	"grad_norm": 4.140078544616699,
	"learning_rate": 0.00018973608844942148,
	"loss": 0.8217,
	"step": 357
	},
	{
	"epoch": 0.17239511226148196,
	"grad_norm": 2.152505874633789,
	"learning_rate": 0.00018966714232057094,
	"loss": 1.1535,
	"step": 358
	},
	{
	"epoch": 0.17287666285439113,
	"grad_norm": 3.840864419937134,
	"learning_rate": 0.00018959797801216418,
	"loss": 0.6864,
	"step": 359
	},
	{
	"epoch": 0.17335821344730032,
	"grad_norm": 4.183706283569336,
	"learning_rate": 0.000189528595692493,
	"loss": 0.7509,
	"step": 360
	},
	{
	"epoch": 0.17383976404020948,
	"grad_norm": 3.3056836128234863,
	"learning_rate": 0.00018945899553037956,
	"loss": 0.8942,
	"step": 361
	},
	{
	"epoch": 0.17432131463311865,
	"grad_norm": 4.057851314544678,
	"learning_rate": 0.00018938917769517613,
	"loss": 1.4257,
	"step": 362
	},
	{
	"epoch": 0.1748028652260278,
	"grad_norm": 2.97546648979187,
	"learning_rate": 0.00018931914235676458,
	"loss": 0.9936,
	"step": 363
	},
	{
	"epoch": 0.17528441581893697,
	"grad_norm": 0.8970528841018677,
	"learning_rate": 0.00018924888968555606,
	"loss": 0.6008,
	"step": 364
	},
	{
	"epoch": 0.17576596641184614,
	"grad_norm": 4.94012975692749,
	"learning_rate": 0.00018917841985249055,
	"loss": 0.985,
	"step": 365
	},
	{
	"epoch": 0.1762475170047553,
	"grad_norm": 3.416455030441284,
	"learning_rate": 0.0001891077330290363,
	"loss": 0.9864,
	"step": 366
	},
	{
	"epoch": 0.1767290675976645,
	"grad_norm": 2.498899221420288,
	"learning_rate": 0.00018903682938718977,
	"loss": 0.8499,
	"step": 367
	},
	{
	"epoch": 0.17721061819057365,
	"grad_norm": 3.770181655883789,
	"learning_rate": 0.00018896570909947475,
	"loss": 0.8731,
	"step": 368
	},
	{
	"epoch": 0.17769216878348282,
	"grad_norm": 4.896841049194336,
	"learning_rate": 0.00018889437233894234,
	"loss": 1.0454,
	"step": 369
	},
	{
	"epoch": 0.17817371937639198,
	"grad_norm": 2.264261245727539,
	"learning_rate": 0.0001888228192791703,
	"loss": 1.0061,
	"step": 370
	},
	{
	"epoch": 0.17865526996930114,
	"grad_norm": 3.097073793411255,
	"learning_rate": 0.00018875105009426272,
	"loss": 0.6645,
	"step": 371
	},
	{
	"epoch": 0.1791368205622103,
	"grad_norm": 1.4488924741744995,
	"learning_rate": 0.00018867906495884955,
	"loss": 1.1148,
	"step": 372
	},
	{
	"epoch": 0.17961837115511947,
	"grad_norm": 3.17714524269104,
	"learning_rate": 0.0001886068640480862,
	"loss": 0.7299,
	"step": 373
	},
	{
	"epoch": 0.18009992174802866,
	"grad_norm": 5.30600118637085,
	"learning_rate": 0.00018853444753765306,
	"loss": 0.797,
	"step": 374
	},
	{
	"epoch": 0.18058147234093783,
	"grad_norm": 2.218719959259033,
	"learning_rate": 0.00018846181560375525,
	"loss": 0.6802,
	"step": 375
	},
	{
	"epoch": 0.181063022933847,
	"grad_norm": 1.5951119661331177,
	"learning_rate": 0.0001883889684231219,
	"loss": 0.9983,
	"step": 376
	},
	{
	"epoch": 0.18154457352675615,
	"grad_norm": 4.742170810699463,
	"learning_rate": 0.000188315906173006,
	"loss": 1.3679,
	"step": 377
	},
	{
	"epoch": 0.18202612411966532,
	"grad_norm": 4.083590507507324,
	"learning_rate": 0.0001882426290311838,
	"loss": 0.6986,
	"step": 378
	},
	{
	"epoch": 0.18250767471257448,
	"grad_norm": 4.625772476196289,
	"learning_rate": 0.00018816913717595445,
	"loss": 0.9917,
	"step": 379
	},
	{
	"epoch": 0.18298922530548364,
	"grad_norm": 3.811450958251953,
	"learning_rate": 0.00018809543078613953,
	"loss": 0.7278,
	"step": 380
	},
	{
	"epoch": 0.18347077589839283,
	"grad_norm": 2.0530614852905273,
	"learning_rate": 0.00018802151004108263,
	"loss": 0.574,
	"step": 381
	},
	{
	"epoch": 0.183952326491302,
	"grad_norm": 3.4029970169067383,
	"learning_rate": 0.0001879473751206489,
	"loss": 0.8849,
	"step": 382
	},
	{
	"epoch": 0.18443387708421116,
	"grad_norm": 5.864663124084473,
	"learning_rate": 0.00018787302620522467,
	"loss": 0.5902,
	"step": 383
	},
	{
	"epoch": 0.18491542767712033,
	"grad_norm": 2.628844976425171,
	"learning_rate": 0.00018779846347571693,
	"loss": 0.9543,
	"step": 384
	},
	{
	"epoch": 0.1853969782700295,
	"grad_norm": 5.040539741516113,
	"learning_rate": 0.0001877236871135529,
	"loss": 0.7266,
	"step": 385
	},
	{
	"epoch": 0.18587852886293865,
	"grad_norm": 3.538259983062744,
	"learning_rate": 0.00018764869730067968,
	"loss": 1.0102,
	"step": 386
	},
	{
	"epoch": 0.18636007945584784,
	"grad_norm": 3.6347954273223877,
	"learning_rate": 0.0001875734942195637,
	"loss": 0.4102,
	"step": 387
	},
	{
	"epoch": 0.186841630048757,
	"grad_norm": 3.6020264625549316,
	"learning_rate": 0.0001874980780531903,
	"loss": 1.0311,
	"step": 388
	},
	{
	"epoch": 0.18732318064166617,
	"grad_norm": 3.3939337730407715,
	"learning_rate": 0.00018742244898506337,
	"loss": 0.8185,
	"step": 389
	},
	{
	"epoch": 0.18780473123457533,
	"grad_norm": 3.194336175918579,
	"learning_rate": 0.00018734660719920475,
	"loss": 1.0069,
	"step": 390
	},
	{
	"epoch": 0.1882862818274845,
	"grad_norm": 3.444998025894165,
	"learning_rate": 0.00018727055288015397,
	"loss": 1.0042,
	"step": 391
	},
	{
	"epoch": 0.18876783242039366,
	"grad_norm": 1.6734000444412231,
	"learning_rate": 0.00018719428621296764,
	"loss": 1.1157,
	"step": 392
	},
	{
	"epoch": 0.18924938301330282,
	"grad_norm": 3.978752374649048,
	"learning_rate": 0.00018711780738321897,
	"loss": 0.5787,
	"step": 393
	},
	{
	"epoch": 0.18973093360621202,
	"grad_norm": 3.7722957134246826,
	"learning_rate": 0.00018704111657699758,
	"loss": 0.7111,
	"step": 394
	},
	{
	"epoch": 0.19021248419912118,
	"grad_norm": 1.650201678276062,
	"learning_rate": 0.0001869642139809088,
	"loss": 1.1438,
	"step": 395
	},
	{
	"epoch": 0.19069403479203034,
	"grad_norm": 8.692386627197266,
	"learning_rate": 0.00018688709978207323,
	"loss": 0.8174,
	"step": 396
	},
	{
	"epoch": 0.1911755853849395,
	"grad_norm": 1.9491546154022217,
	"learning_rate": 0.00018680977416812644,
	"loss": 1.0687,
	"step": 397
	},
	{
	"epoch": 0.19165713597784867,
	"grad_norm": 2.265296459197998,
	"learning_rate": 0.00018673223732721837,
	"loss": 0.739,
	"step": 398
	},
	{
	"epoch": 0.19213868657075783,
	"grad_norm": 3.7249464988708496,
	"learning_rate": 0.0001866544894480129,
	"loss": 1.0551,
	"step": 399
	},
	{
	"epoch": 0.192620237163667,
	"grad_norm": 2.0633394718170166,
	"learning_rate": 0.00018657653071968747,
	"loss": 0.8448,
	"step": 400
	},
	{
	"epoch": 0.1931017877565762,
	"grad_norm": 3.448591947555542,
	"learning_rate": 0.00018649836133193253,
	"loss": 0.7965,
	"step": 401
	},
	{
	"epoch": 0.19358333834948535,
	"grad_norm": 1.4229322671890259,
	"learning_rate": 0.00018641998147495112,
	"loss": 0.4359,
	"step": 402
	},
	{
	"epoch": 0.19406488894239451,
	"grad_norm": 2.978297233581543,
	"learning_rate": 0.00018634139133945837,
	"loss": 0.632,
	"step": 403
	},
	{
	"epoch": 0.19454643953530368,
	"grad_norm": 2.407181978225708,
	"learning_rate": 0.00018626259111668105,
	"loss": 1.0891,
	"step": 404
	},
	{
	"epoch": 0.19502799012821284,
	"grad_norm": 3.066446542739868,
	"learning_rate": 0.00018618358099835723,
	"loss": 1.3327,
	"step": 405
	},
	{
	"epoch": 0.195509540721122,
	"grad_norm": 1.5488284826278687,
	"learning_rate": 0.00018610436117673555,
	"loss": 0.42,
	"step": 406
	},
	{
	"epoch": 0.19599109131403117,
	"grad_norm": 1.5752339363098145,
	"learning_rate": 0.00018602493184457505,
	"loss": 0.6942,
	"step": 407
	},
	{
	"epoch": 0.19647264190694036,
	"grad_norm": 3.5753307342529297,
	"learning_rate": 0.00018594529319514437,
	"loss": 0.9352,
	"step": 408
	},
	{
	"epoch": 0.19695419249984952,
	"grad_norm": 2.1090338230133057,
	"learning_rate": 0.00018586544542222169,
	"loss": 0.8635,
	"step": 409
	},
	{
	"epoch": 0.1974357430927587,
	"grad_norm": 4.924689292907715,
	"learning_rate": 0.00018578538872009384,
	"loss": 0.6627,
	"step": 410
	},
	{
	"epoch": 0.19791729368566785,
	"grad_norm": 0.7094942331314087,
	"learning_rate": 0.00018570512328355612,
	"loss": 0.4438,
	"step": 411
	},
	{
	"epoch": 0.198398844278577,
	"grad_norm": 3.230691909790039,
	"learning_rate": 0.00018562464930791167,
	"loss": 0.7968,
	"step": 412
	},
	{
	"epoch": 0.19888039487148618,
	"grad_norm": 3.516850709915161,
	"learning_rate": 0.00018554396698897116,
	"loss": 0.7121,
	"step": 413
	},
	{
	"epoch": 0.19936194546439534,
	"grad_norm": 2.1137967109680176,
	"learning_rate": 0.00018546307652305205,
	"loss": 0.8463,
	"step": 414
	},
	{
	"epoch": 0.19984349605730453,
	"grad_norm": 2.3233392238616943,
	"learning_rate": 0.00018538197810697842,
	"loss": 0.8193,
	"step": 415
	},
	{
	"epoch": 0.2003250466502137,
	"grad_norm": 6.277778625488281,
	"learning_rate": 0.0001853006719380802,
	"loss": 0.8697,
	"step": 416
	},
	{
	"epoch": 0.20080659724312286,
	"grad_norm": 3.4568729400634766,
	"learning_rate": 0.00018521915821419284,
	"loss": 0.5947,
	"step": 417
	},
	{
	"epoch": 0.20128814783603202,
	"grad_norm": 2.8246490955352783,
	"learning_rate": 0.00018513743713365698,
	"loss": 0.8121,
	"step": 418
	},
	{
	"epoch": 0.20176969842894119,
	"grad_norm": 3.8738162517547607,
	"learning_rate": 0.00018505550889531765,
	"loss": 0.7239,
	"step": 419
	},
	{
	"epoch": 0.20225124902185035,
	"grad_norm": 3.2353687286376953,
	"learning_rate": 0.00018497337369852395,
	"loss": 0.6751,
	"step": 420
	},
	{
	"epoch": 0.2027327996147595,
	"grad_norm": 5.0770039558410645,
	"learning_rate": 0.0001848910317431286,
	"loss": 1.2165,
	"step": 421
	},
	{
	"epoch": 0.2032143502076687,
	"grad_norm": 3.751051902770996,
	"learning_rate": 0.00018480848322948739,
	"loss": 0.661,
	"step": 422
	},
	{
	"epoch": 0.20369590080057787,
	"grad_norm": 3.767159938812256,
	"learning_rate": 0.00018472572835845873,
	"loss": 0.7486,
	"step": 423
	},
	{
	"epoch": 0.20417745139348703,
	"grad_norm": 2.8906075954437256,
	"learning_rate": 0.00018464276733140306,
	"loss": 0.7135,
	"step": 424
	},
	{
	"epoch": 0.2046590019863962,
	"grad_norm": 2.5436739921569824,
	"learning_rate": 0.0001845596003501826,
	"loss": 0.7165,
	"step": 425
	},
	{
	"epoch": 0.20514055257930536,
	"grad_norm": 2.0007944107055664,
	"learning_rate": 0.00018447622761716057,
	"loss": 0.8495,
	"step": 426
	},
	{
	"epoch": 0.20562210317221452,
	"grad_norm": 1.651319146156311,
	"learning_rate": 0.00018439264933520084,
	"loss": 0.9067,
	"step": 427
	},
	{
	"epoch": 0.2061036537651237,
	"grad_norm": 1.483290433883667,
	"learning_rate": 0.00018430886570766747,
	"loss": 0.7198,
	"step": 428
	},
	{
	"epoch": 0.20658520435803288,
	"grad_norm": 4.108277797698975,
	"learning_rate": 0.0001842248769384242,
	"loss": 1.5626,
	"step": 429
	},
	{
	"epoch": 0.20706675495094204,
	"grad_norm": 2.4852449893951416,
	"learning_rate": 0.00018414068323183375,
	"loss": 0.7769,
	"step": 430
	},
	{
	"epoch": 0.2075483055438512,
	"grad_norm": 4.069541931152344,
	"learning_rate": 0.00018405628479275775,
	"loss": 0.663,
	"step": 431
	},
	{
	"epoch": 0.20802985613676037,
	"grad_norm": 2.137789011001587,
	"learning_rate": 0.00018397168182655583,
	"loss": 0.5468,
	"step": 432
	},
	{
	"epoch": 0.20851140672966953,
	"grad_norm": 3.0423202514648438,
	"learning_rate": 0.00018388687453908527,
	"loss": 0.8064,
	"step": 433
	},
	{
	"epoch": 0.2089929573225787,
	"grad_norm": 2.8734021186828613,
	"learning_rate": 0.00018380186313670058,
	"loss": 1.0275,
	"step": 434
	},
	{
	"epoch": 0.20947450791548788,
	"grad_norm": 2.459599733352661,
	"learning_rate": 0.00018371664782625287,
	"loss": 0.7337,
	"step": 435
	},
	{
	"epoch": 0.20995605850839705,
	"grad_norm": 4.528045177459717,
	"learning_rate": 0.00018363122881508945,
	"loss": 1.0966,
	"step": 436
	},
	{
	"epoch": 0.2104376091013062,
	"grad_norm": 2.942962169647217,
	"learning_rate": 0.00018354560631105328,
	"loss": 0.6714,
	"step": 437
	},
	{
	"epoch": 0.21091915969421537,
	"grad_norm": 5.867403507232666,
	"learning_rate": 0.00018345978052248233,
	"loss": 0.9835,
	"step": 438
	},
	{
	"epoch": 0.21140071028712454,
	"grad_norm": 1.642223834991455,
	"learning_rate": 0.00018337375165820944,
	"loss": 0.8807,
	"step": 439
	},
	{
	"epoch": 0.2118822608800337,
	"grad_norm": 5.436519145965576,
	"learning_rate": 0.00018328751992756137,
	"loss": 0.8824,
	"step": 440
	},
	{
	"epoch": 0.21236381147294286,
	"grad_norm": 5.564542770385742,
	"learning_rate": 0.0001832010855403586,
	"loss": 1.0045,
	"step": 441
	},
	{
	"epoch": 0.21284536206585206,
	"grad_norm": 2.1191842555999756,
	"learning_rate": 0.0001831144487069147,
	"loss": 0.7834,
	"step": 442
	},
	{
	"epoch": 0.21332691265876122,
	"grad_norm": 2.1237781047821045,
	"learning_rate": 0.0001830276096380358,
	"loss": 0.3141,
	"step": 443
	},
	{
	"epoch": 0.21380846325167038,
	"grad_norm": 4.568416118621826,
	"learning_rate": 0.0001829405685450202,
	"loss": 1.2394,
	"step": 444
	},
	{
	"epoch": 0.21429001384457955,
	"grad_norm": 2.3887364864349365,
	"learning_rate": 0.00018285332563965765,
	"loss": 1.1355,
	"step": 445
	},
	{
	"epoch": 0.2147715644374887,
	"grad_norm": 4.617615222930908,
	"learning_rate": 0.00018276588113422905,
	"loss": 0.8803,
	"step": 446
	},
	{
	"epoch": 0.21525311503039787,
	"grad_norm": 2.1679558753967285,
	"learning_rate": 0.00018267823524150575,
	"loss": 0.9606,
	"step": 447
	},
	{
	"epoch": 0.21573466562330704,
	"grad_norm": 2.1338422298431396,
	"learning_rate": 0.00018259038817474923,
	"loss": 0.9403,
	"step": 448
	},
	{
	"epoch": 0.21621621621621623,
	"grad_norm": 2.041907548904419,
	"learning_rate": 0.0001825023401477104,
	"loss": 0.6543,
	"step": 449
	},
	{
	"epoch": 0.2166977668091254,
	"grad_norm": 1.8490098714828491,
	"learning_rate": 0.0001824140913746291,
	"loss": 0.9038,
	"step": 450
	},
	{
	"epoch": 0.21717931740203456,
	"grad_norm": 3.1015329360961914,
	"learning_rate": 0.00018232564207023376,
	"loss": 0.6252,
	"step": 451
	},
	{
	"epoch": 0.21766086799494372,
	"grad_norm": 2.5276334285736084,
	"learning_rate": 0.00018223699244974064,
	"loss": 0.653,
	"step": 452
	},
	{
	"epoch": 0.21814241858785288,
	"grad_norm": 2.063218116760254,
	"learning_rate": 0.00018214814272885343,
	"loss": 0.8085,
	"step": 453
	},
	{
	"epoch": 0.21862396918076205,
	"grad_norm": 2.227787494659424,
	"learning_rate": 0.00018205909312376276,
	"loss": 0.9719,
	"step": 454
	},
	{
	"epoch": 0.2191055197736712,
	"grad_norm": 2.54950213432312,
	"learning_rate": 0.00018196984385114554,
	"loss": 0.9854,
	"step": 455
	},
	{
	"epoch": 0.2195870703665804,
	"grad_norm": 2.7499783039093018,
	"learning_rate": 0.0001818803951281646,
	"loss": 0.7189,
	"step": 456
	},
	{
	"epoch": 0.22006862095948956,
	"grad_norm": 4.1418070793151855,
	"learning_rate": 0.000181790747172468,
	"loss": 0.7065,
	"step": 457
	},
	{
	"epoch": 0.22055017155239873,
	"grad_norm": 1.8730262517929077,
	"learning_rate": 0.00018170090020218864,
	"loss": 1.164,
	"step": 458
	},
	{
	"epoch": 0.2210317221453079,
	"grad_norm": 2.4339802265167236,
	"learning_rate": 0.00018161085443594365,
	"loss": 0.614,
	"step": 459
	},
	{
	"epoch": 0.22151327273821705,
	"grad_norm": 1.8113713264465332,
	"learning_rate": 0.00018152061009283382,
	"loss": 0.5136,
	"step": 460
	},
	{
	"epoch": 0.22199482333112622,
	"grad_norm": 4.9851250648498535,
	"learning_rate": 0.00018143016739244314,
	"loss": 0.9962,
	"step": 461
	},
	{
	"epoch": 0.22247637392403538,
	"grad_norm": 3.5249273777008057,
	"learning_rate": 0.0001813395265548383,
	"loss": 0.7784,
	"step": 462
	},
	{
	"epoch": 0.22295792451694457,
	"grad_norm": 4.574691295623779,
	"learning_rate": 0.00018124868780056814,
	"loss": 0.9796,
	"step": 463
	},
	{
	"epoch": 0.22343947510985374,
	"grad_norm": 4.666108131408691,
	"learning_rate": 0.0001811576513506629,
	"loss": 0.749,
	"step": 464
	},
	{
	"epoch": 0.2239210257027629,
	"grad_norm": 2.94307541847229,
	"learning_rate": 0.00018106641742663397,
	"loss": 0.9141,
	"step": 465
	},
	{
	"epoch": 0.22440257629567206,
	"grad_norm": 3.553006887435913,
	"learning_rate": 0.00018097498625047328,
	"loss": 0.7818,
	"step": 466
	},
	{
	"epoch": 0.22488412688858123,
	"grad_norm": 3.156838893890381,
	"learning_rate": 0.00018088335804465258,
	"loss": 0.8416,
	"step": 467
	},
	{
	"epoch": 0.2253656774814904,
	"grad_norm": 3.0316860675811768,
	"learning_rate": 0.00018079153303212318,
	"loss": 0.777,
	"step": 468
	},
	{
	"epoch": 0.22584722807439955,
	"grad_norm": 4.4010443687438965,
	"learning_rate": 0.0001806995114363152,
	"loss": 1.5936,
	"step": 469
	},
	{
	"epoch": 0.22632877866730874,
	"grad_norm": 2.7999866008758545,
	"learning_rate": 0.00018060729348113707,
	"loss": 0.662,
	"step": 470
	},
	{
	"epoch": 0.2268103292602179,
	"grad_norm": 1.6529977321624756,
	"learning_rate": 0.00018051487939097505,
	"loss": 0.6979,
	"step": 471
	},
	{
	"epoch": 0.22729187985312707,
	"grad_norm": 9.427469253540039,
	"learning_rate": 0.00018042226939069255,
	"loss": 1.0829,
	"step": 472
	},
	{
	"epoch": 0.22777343044603623,
	"grad_norm": 3.0257017612457275,
	"learning_rate": 0.00018032946370562982,
	"loss": 0.8094,
	"step": 473
	},
	{
	"epoch": 0.2282549810389454,
	"grad_norm": 3.00710391998291,
	"learning_rate": 0.00018023646256160313,
	"loss": 0.4987,
	"step": 474
	},
	{
	"epoch": 0.22873653163185456,
	"grad_norm": 3.28983473777771,
	"learning_rate": 0.00018014326618490437,
	"loss": 0.7542,
	"step": 475
	},
	{
	"epoch": 0.22921808222476375,
	"grad_norm": 4.3024210929870605,
	"learning_rate": 0.0001800498748023005,
	"loss": 1.2465,
	"step": 476
	},
	{
	"epoch": 0.22969963281767292,
	"grad_norm": 1.8468772172927856,
	"learning_rate": 0.000179956288641033,
	"loss": 0.5073,
	"step": 477
	},
	{
	"epoch": 0.23018118341058208,
	"grad_norm": 6.304190635681152,
	"learning_rate": 0.00017986250792881718,
	"loss": 0.4624,
	"step": 478
	},
	{
	"epoch": 0.23066273400349124,
	"grad_norm": 2.0171825885772705,
	"learning_rate": 0.00017976853289384184,
	"loss": 0.886,
	"step": 479
	},
	{
	"epoch": 0.2311442845964004,
	"grad_norm": 3.8221256732940674,
	"learning_rate": 0.00017967436376476855,
	"loss": 1.182,
	"step": 480
	},
	{
	"epoch": 0.23162583518930957,
	"grad_norm": 2.4394781589508057,
	"learning_rate": 0.0001795800007707312,
	"loss": 0.8758,
	"step": 481
	},
	{
	"epoch": 0.23210738578221873,
	"grad_norm": 3.8338496685028076,
	"learning_rate": 0.00017948544414133534,
	"loss": 0.4596,
	"step": 482
	},
	{
	"epoch": 0.23258893637512792,
	"grad_norm": 6.3291239738464355,
	"learning_rate": 0.00017939069410665773,
	"loss": 0.7862,
	"step": 483
	},
	{
	"epoch": 0.2330704869680371,
	"grad_norm": 3.5340969562530518,
	"learning_rate": 0.0001792957508972457,
	"loss": 0.857,
	"step": 484
	},
	{
	"epoch": 0.23355203756094625,
	"grad_norm": 3.2382187843322754,
	"learning_rate": 0.00017920061474411658,
	"loss": 0.4476,
	"step": 485
	},
	{
	"epoch": 0.23403358815385542,
	"grad_norm": 2.6530380249023438,
	"learning_rate": 0.00017910528587875729,
	"loss": 0.7092,
	"step": 486
	},
	{
	"epoch": 0.23451513874676458,
	"grad_norm": 2.1890785694122314,
	"learning_rate": 0.00017900976453312352,
	"loss": 0.6607,
	"step": 487
	},
	{
	"epoch": 0.23499668933967374,
	"grad_norm": 3.7793309688568115,
	"learning_rate": 0.00017891405093963938,
	"loss": 0.805,
	"step": 488
	},
	{
	"epoch": 0.2354782399325829,
	"grad_norm": 3.355741500854492,
	"learning_rate": 0.00017881814533119675,
	"loss": 1.1384,
	"step": 489
	},
	{
	"epoch": 0.2359597905254921,
	"grad_norm": 4.145727157592773,
	"learning_rate": 0.00017872204794115474,
	"loss": 0.8834,
	"step": 490
	},
	{
	"epoch": 0.23644134111840126,
	"grad_norm": 2.3569817543029785,
	"learning_rate": 0.0001786257590033391,
	"loss": 0.678,
	"step": 491
	},
	{
	"epoch": 0.23692289171131042,
	"grad_norm": 3.5754706859588623,
	"learning_rate": 0.00017852927875204163,
	"loss": 1.1182,
	"step": 492
	},
	{
	"epoch": 0.2374044423042196,
	"grad_norm": 4.857983112335205,
	"learning_rate": 0.00017843260742201963,
	"loss": 0.8987,
	"step": 493
	},
	{
	"epoch": 0.23788599289712875,
	"grad_norm": 1.9549206495285034,
	"learning_rate": 0.00017833574524849535,
	"loss": 0.8174,
	"step": 494
	},
	{
	"epoch": 0.23836754349003791,
	"grad_norm": 6.188207626342773,
	"learning_rate": 0.00017823869246715553,
	"loss": 1.0608,
	"step": 495
	},
	{
	"epoch": 0.23884909408294708,
	"grad_norm": 5.574507713317871,
	"learning_rate": 0.00017814144931415043,
	"loss": 1.1371,
	"step": 496
	},
	{
	"epoch": 0.23933064467585627,
	"grad_norm": 4.148524284362793,
	"learning_rate": 0.0001780440160260938,
	"loss": 1.213,
	"step": 497
	},
	{
	"epoch": 0.23981219526876543,
	"grad_norm": 1.1119147539138794,
	"learning_rate": 0.00017794639284006184,
	"loss": 0.9442,
	"step": 498
	},
	{
	"epoch": 0.2402937458616746,
	"grad_norm": 2.266169548034668,
	"learning_rate": 0.0001778485799935929,
	"loss": 0.8459,
	"step": 499
	},
	{
	"epoch": 0.24077529645458376,
	"grad_norm": 3.1551809310913086,
	"learning_rate": 0.00017775057772468679,
	"loss": 1.3422,
	"step": 500
	},
	{
	"epoch": 0.24125684704749292,
	"grad_norm": 1.7529497146606445,
	"learning_rate": 0.00017765238627180424,
	"loss": 0.5388,
	"step": 501
	},
	{
	"epoch": 0.2417383976404021,
	"grad_norm": 1.8274682760238647,
	"learning_rate": 0.00017755400587386632,
	"loss": 0.679,
	"step": 502
	},
	{
	"epoch": 0.24221994823331125,
	"grad_norm": 3.1747913360595703,
	"learning_rate": 0.00017745543677025378,
	"loss": 1.0639,
	"step": 503
	},
	{
	"epoch": 0.24270149882622044,
	"grad_norm": 3.80859637260437,
	"learning_rate": 0.00017735667920080661,
	"loss": 0.9085,
	"step": 504
	},
	{
	"epoch": 0.2431830494191296,
	"grad_norm": 3.507260322570801,
	"learning_rate": 0.0001772577334058233,
	"loss": 1.2649,
	"step": 505
	},
	{
	"epoch": 0.24366460001203877,
	"grad_norm": 2.216651201248169,
	"learning_rate": 0.00017715859962606043,
	"loss": 0.6616,
	"step": 506
	},
	{
	"epoch": 0.24414615060494793,
	"grad_norm": 2.3983209133148193,
	"learning_rate": 0.00017705927810273187,
	"loss": 0.5882,
	"step": 507
	},
	{
	"epoch": 0.2446277011978571,
	"grad_norm": 2.6623141765594482,
	"learning_rate": 0.00017695976907750844,
	"loss": 0.5488,
	"step": 508
	},
	{
	"epoch": 0.24510925179076626,
	"grad_norm": 2.3083388805389404,
	"learning_rate": 0.00017686007279251706,
	"loss": 0.5942,
	"step": 509
	},
	{
	"epoch": 0.24559080238367542,
	"grad_norm": 2.294562578201294,
	"learning_rate": 0.00017676018949034045,
	"loss": 0.9633,
	"step": 510
	},
	{
	"epoch": 0.2460723529765846,
	"grad_norm": 3.9046053886413574,
	"learning_rate": 0.0001766601194140162,
	"loss": 1.0614,
	"step": 511
	},
	{
	"epoch": 0.24655390356949378,
	"grad_norm": 2.0208675861358643,
	"learning_rate": 0.0001765598628070365,
	"loss": 0.8143,
	"step": 512
	},
	{
	"epoch": 0.24703545416240294,
	"grad_norm": 4.851849555969238,
	"learning_rate": 0.00017645941991334732,
	"loss": 1.0167,
	"step": 513
	},
	{
	"epoch": 0.2475170047553121,
	"grad_norm": 2.7783102989196777,
	"learning_rate": 0.00017635879097734804,
	"loss": 0.9836,
	"step": 514
	},
	{
	"epoch": 0.24799855534822127,
	"grad_norm": 3.420109510421753,
	"learning_rate": 0.00017625797624389055,
	"loss": 0.8772,
	"step": 515
	},
	{
	"epoch": 0.24848010594113043,
	"grad_norm": 2.0368940830230713,
	"learning_rate": 0.00017615697595827897,
	"loss": 0.6702,
	"step": 516
	},
	{
	"epoch": 0.2489616565340396,
	"grad_norm": 3.138749599456787,
	"learning_rate": 0.0001760557903662688,
	"loss": 0.8044,
	"step": 517
	},
	{
	"epoch": 0.24944320712694878,
	"grad_norm": 3.4814579486846924,
	"learning_rate": 0.00017595441971406648,
	"loss": 1.1824,
	"step": 518
	},
	{
	"epoch": 0.24992475771985795,
	"grad_norm": 2.961376190185547,
	"learning_rate": 0.00017585286424832874,
	"loss": 0.9286,
	"step": 519
	},
	{
	"epoch": 0.2504063083127671,
	"grad_norm": 2.2734553813934326,
	"learning_rate": 0.00017575112421616202,
	"loss": 0.8274,
	"step": 520
	},
	{
	"epoch": 0.2508878589056763,
	"grad_norm": 2.522198438644409,
	"learning_rate": 0.0001756491998651218,
	"loss": 0.8287,
	"step": 521
	},
	{
	"epoch": 0.25136940949858544,
	"grad_norm": 3.1421101093292236,
	"learning_rate": 0.0001755470914432121,
	"loss": 1.0712,
	"step": 522
	},
	{
	"epoch": 0.2518509600914946,
	"grad_norm": 2.7851064205169678,
	"learning_rate": 0.0001754447991988848,
	"loss": 0.7893,
	"step": 523
	},
	{
	"epoch": 0.25233251068440377,
	"grad_norm": 3.5308754444122314,
	"learning_rate": 0.00017534232338103903,
	"loss": 1.0271,
	"step": 524
	},
	{
	"epoch": 0.25281406127731293,
	"grad_norm": 2.5698599815368652,
	"learning_rate": 0.0001752396642390207,
	"loss": 1.2494,
	"step": 525
	},
	{
	"epoch": 0.2532956118702221,
	"grad_norm": 3.077817440032959,
	"learning_rate": 0.00017513682202262163,
	"loss": 1.2176,
	"step": 526
	},
	{
	"epoch": 0.25377716246313126,
	"grad_norm": 2.7685060501098633,
	"learning_rate": 0.00017503379698207918,
	"loss": 1.017,
	"step": 527
	},
	{
	"epoch": 0.2542587130560405,
	"grad_norm": 1.3066401481628418,
	"learning_rate": 0.00017493058936807562,
	"loss": 0.9528,
	"step": 528
	},
	{
	"epoch": 0.25474026364894964,
	"grad_norm": 2.54870343208313,
	"learning_rate": 0.00017482719943173739,
	"loss": 0.8665,
	"step": 529
	},
	{
	"epoch": 0.2552218142418588,
	"grad_norm": 1.8381497859954834,
	"learning_rate": 0.00017472362742463455,
	"loss": 0.6502,
	"step": 530
	},
	{
	"epoch": 0.25570336483476797,
	"grad_norm": 3.0634922981262207,
	"learning_rate": 0.0001746198735987802,
	"loss": 0.7654,
	"step": 531
	},
	{
	"epoch": 0.25618491542767713,
	"grad_norm": 2.2630629539489746,
	"learning_rate": 0.00017451593820662988,
	"loss": 0.6992,
	"step": 532
	},
	{
	"epoch": 0.2566664660205863,
	"grad_norm": 2.220201015472412,
	"learning_rate": 0.00017441182150108086,
	"loss": 1.0391,
	"step": 533
	},
	{
	"epoch": 0.25714801661349546,
	"grad_norm": 1.3255223035812378,
	"learning_rate": 0.0001743075237354716,
	"loss": 1.1717,
	"step": 534
	},
	{
	"epoch": 0.2576295672064046,
	"grad_norm": 0.7325202822685242,
	"learning_rate": 0.00017420304516358113,
	"loss": 0.6,
	"step": 535
	},
	{
	"epoch": 0.2581111177993138,
	"grad_norm": 2.993429660797119,
	"learning_rate": 0.00017409838603962843,
	"loss": 0.8133,
	"step": 536
	},
	{
	"epoch": 0.25859266839222295,
	"grad_norm": 1.611694097518921,
	"learning_rate": 0.00017399354661827178,
	"loss": 0.8367,
	"step": 537
	},
	{
	"epoch": 0.2590742189851321,
	"grad_norm": 4.344441890716553,
	"learning_rate": 0.00017388852715460819,
	"loss": 0.7335,
	"step": 538
	},
	{
	"epoch": 0.2595557695780413,
	"grad_norm": 2.118799924850464,
	"learning_rate": 0.00017378332790417273,
	"loss": 0.8942,
	"step": 539
	},
	{
	"epoch": 0.26003732017095044,
	"grad_norm": 3.4612903594970703,
	"learning_rate": 0.00017367794912293794,
	"loss": 0.997,
	"step": 540
	},
	{
	"epoch": 0.26051887076385966,
	"grad_norm": 2.6911776065826416,
	"learning_rate": 0.00017357239106731317,
	"loss": 0.59,
	"step": 541
	},
	{
	"epoch": 0.2610004213567688,
	"grad_norm": 3.0154871940612793,
	"learning_rate": 0.00017346665399414405,
	"loss": 0.8628,
	"step": 542
	},
	{
	"epoch": 0.261481971949678,
	"grad_norm": 2.9015610218048096,
	"learning_rate": 0.00017336073816071168,
	"loss": 0.7398,
	"step": 543
	},
	{
	"epoch": 0.26196352254258715,
	"grad_norm": 3.7456789016723633,
	"learning_rate": 0.00017325464382473226,
	"loss": 1.2309,
	"step": 544
	},
	{
	"epoch": 0.2624450731354963,
	"grad_norm": 3.10002064704895,
	"learning_rate": 0.00017314837124435622,
	"loss": 0.8035,
	"step": 545
	},
	{
	"epoch": 0.2629266237284055,
	"grad_norm": 3.767289876937866,
	"learning_rate": 0.00017304192067816782,
	"loss": 0.9462,
	"step": 546
	},
	{
	"epoch": 0.26340817432131464,
	"grad_norm": 1.6517798900604248,
	"learning_rate": 0.00017293529238518422,
	"loss": 0.7511,
	"step": 547
	},
	{
	"epoch": 0.2638897249142238,
	"grad_norm": 3.3719449043273926,
	"learning_rate": 0.0001728284866248552,
	"loss": 0.8092,
	"step": 548
	},
	{
	"epoch": 0.26437127550713296,
	"grad_norm": 2.3722736835479736,
	"learning_rate": 0.00017272150365706224,
	"loss": 0.8951,
	"step": 549
	},
	{
	"epoch": 0.2648528261000421,
	"grad_norm": 4.863512992858887,
	"learning_rate": 0.00017261434374211802,
	"loss": 0.6823,
	"step": 550
	},
	{
	"epoch": 0.2653343766929513,
	"grad_norm": 2.9136412143707275,
	"learning_rate": 0.00017250700714076586,
	"loss": 0.7394,
	"step": 551
	},
	{
	"epoch": 0.26581592728586045,
	"grad_norm": 1.59123957157135,
	"learning_rate": 0.00017239949411417888,
	"loss": 0.9012,
	"step": 552
	},
	{
	"epoch": 0.2662974778787696,
	"grad_norm": 3.566239356994629,
	"learning_rate": 0.0001722918049239596,
	"loss": 1.3038,
	"step": 553
	},
	{
	"epoch": 0.2667790284716788,
	"grad_norm": 1.6607437133789062,
	"learning_rate": 0.00017218393983213902,
	"loss": 0.5825,
	"step": 554
	},
	{
	"epoch": 0.267260579064588,
	"grad_norm": 2.2083866596221924,
	"learning_rate": 0.00017207589910117634,
	"loss": 0.6033,
	"step": 555
	},
	{
	"epoch": 0.26774212965749716,
	"grad_norm": 6.2903523445129395,
	"learning_rate": 0.00017196768299395797,
	"loss": 0.9105,
	"step": 556
	},
	{
	"epoch": 0.2682236802504063,
	"grad_norm": 1.9703174829483032,
	"learning_rate": 0.00017185929177379714,
	"loss": 0.7654,
	"step": 557
	},
	{
	"epoch": 0.2687052308433155,
	"grad_norm": 3.087989568710327,
	"learning_rate": 0.00017175072570443312,
	"loss": 0.9272,
	"step": 558
	},
	{
	"epoch": 0.26918678143622465,
	"grad_norm": 4.169714450836182,
	"learning_rate": 0.00017164198505003066,
	"loss": 0.6082,
	"step": 559
	},
	{
	"epoch": 0.2696683320291338,
	"grad_norm": 2.0831711292266846,
	"learning_rate": 0.0001715330700751793,
	"loss": 0.5273,
	"step": 560
	},
	{
	"epoch": 0.270149882622043,
	"grad_norm": 2.3498497009277344,
	"learning_rate": 0.00017142398104489273,
	"loss": 0.6253,
	"step": 561
	},
	{
	"epoch": 0.27063143321495214,
	"grad_norm": 3.0425565242767334,
	"learning_rate": 0.00017131471822460814,
	"loss": 0.8767,
	"step": 562
	},
	{
	"epoch": 0.2711129838078613,
	"grad_norm": 3.1900765895843506,
	"learning_rate": 0.00017120528188018565,
	"loss": 0.8782,
	"step": 563
	},
	{
	"epoch": 0.27159453440077047,
	"grad_norm": 4.279084205627441,
	"learning_rate": 0.00017109567227790754,
	"loss": 0.5321,
	"step": 564
	},
	{
	"epoch": 0.27207608499367963,
	"grad_norm": 1.9123119115829468,
	"learning_rate": 0.00017098588968447766,
	"loss": 1.0175,
	"step": 565
	},
	{
	"epoch": 0.2725576355865888,
	"grad_norm": 3.689532995223999,
	"learning_rate": 0.00017087593436702084,
	"loss": 0.9917,
	"step": 566
	},
	{
	"epoch": 0.27303918617949796,
	"grad_norm": 2.9351844787597656,
	"learning_rate": 0.00017076580659308222,
	"loss": 1.1911,
	"step": 567
	},
	{
	"epoch": 0.2735207367724071,
	"grad_norm": 1.8407542705535889,
	"learning_rate": 0.00017065550663062634,
	"loss": 0.6958,
	"step": 568
	},
	{
	"epoch": 0.27400228736531634,
	"grad_norm": 3.2096939086914062,
	"learning_rate": 0.00017054503474803702,
	"loss": 1.0163,
	"step": 569
	},
	{
	"epoch": 0.2744838379582255,
	"grad_norm": 4.783024311065674,
	"learning_rate": 0.00017043439121411618,
	"loss": 0.9086,
	"step": 570
	},
	{
	"epoch": 0.27496538855113467,
	"grad_norm": 3.336801290512085,
	"learning_rate": 0.0001703235762980835,
	"loss": 0.6395,
	"step": 571
	},
	{
	"epoch": 0.27544693914404383,
	"grad_norm": 6.958205223083496,
	"learning_rate": 0.00017021259026957567,
	"loss": 1.0467,
	"step": 572
	},
	{
	"epoch": 0.275928489736953,
	"grad_norm": 3.0236618518829346,
	"learning_rate": 0.00017010143339864562,
	"loss": 0.8194,
	"step": 573
	},
	{
	"epoch": 0.27641004032986216,
	"grad_norm": 2.2934484481811523,
	"learning_rate": 0.0001699901059557621,
	"loss": 0.7926,
	"step": 574
	},
	{
	"epoch": 0.2768915909227713,
	"grad_norm": 3.157127618789673,
	"learning_rate": 0.00016987860821180895,
	"loss": 0.849,
	"step": 575
	},
	{
	"epoch": 0.2773731415156805,
	"grad_norm": 2.1495730876922607,
	"learning_rate": 0.00016976694043808416,
	"loss": 0.9138,
	"step": 576
	},
	{
	"epoch": 0.27785469210858965,
	"grad_norm": 2.7366628646850586,
	"learning_rate": 0.00016965510290629972,
	"loss": 0.7798,
	"step": 577
	},
	{
	"epoch": 0.2783362427014988,
	"grad_norm": 1.7467342615127563,
	"learning_rate": 0.00016954309588858044,
	"loss": 0.5792,
	"step": 578
	},
	{
	"epoch": 0.278817793294408,
	"grad_norm": 2.1908276081085205,
	"learning_rate": 0.00016943091965746366,
	"loss": 0.6596,
	"step": 579
	},
	{
	"epoch": 0.27929934388731714,
	"grad_norm": 4.674194812774658,
	"learning_rate": 0.00016931857448589845,
	"loss": 0.9044,
	"step": 580
	},
	{
	"epoch": 0.2797808944802263,
	"grad_norm": 1.4668383598327637,
	"learning_rate": 0.00016920606064724488,
	"loss": 0.5977,
	"step": 581
	},
	{
	"epoch": 0.28026244507313547,
	"grad_norm": 1.8525112867355347,
	"learning_rate": 0.00016909337841527344,
	"loss": 0.7542,
	"step": 582
	},
	{
	"epoch": 0.2807439956660447,
	"grad_norm": 2.2113821506500244,
	"learning_rate": 0.00016898052806416444,
	"loss": 0.8215,
	"step": 583
	},
	{
	"epoch": 0.28122554625895385,
	"grad_norm": 3.6429834365844727,
	"learning_rate": 0.00016886750986850718,
	"loss": 1.0825,
	"step": 584
	},
	{
	"epoch": 0.281707096851863,
	"grad_norm": 2.043597459793091,
	"learning_rate": 0.00016875432410329934,
	"loss": 0.5168,
	"step": 585
	},
	{
	"epoch": 0.2821886474447722,
	"grad_norm": 3.5646705627441406,
	"learning_rate": 0.0001686409710439464,
	"loss": 0.8117,
	"step": 586
	},
	{
	"epoch": 0.28267019803768134,
	"grad_norm": 2.080101251602173,
	"learning_rate": 0.00016852745096626088,
	"loss": 1.0391,
	"step": 587
	},
	{
	"epoch": 0.2831517486305905,
	"grad_norm": 4.150976181030273,
	"learning_rate": 0.0001684137641464617,
	"loss": 1.3853,
	"step": 588
	},
	{
	"epoch": 0.28363329922349967,
	"grad_norm": 3.107977867126465,
	"learning_rate": 0.0001682999108611735,
	"loss": 0.6732,
	"step": 589
	},
	{
	"epoch": 0.28411484981640883,
	"grad_norm": 1.3921109437942505,
	"learning_rate": 0.00016818589138742587,
	"loss": 0.5883,
	"step": 590
	},
	{
	"epoch": 0.284596400409318,
	"grad_norm": 3.5491769313812256,
	"learning_rate": 0.00016807170600265296,
	"loss": 0.7489,
	"step": 591
	},
	{
	"epoch": 0.28507795100222716,
	"grad_norm": 2.4986155033111572,
	"learning_rate": 0.00016795735498469246,
	"loss": 0.9951,
	"step": 592
	},
	{
	"epoch": 0.2855595015951363,
	"grad_norm": 2.9514119625091553,
	"learning_rate": 0.00016784283861178513,
	"loss": 0.9067,
	"step": 593
	},
	{
	"epoch": 0.2860410521880455,
	"grad_norm": 2.5975046157836914,
	"learning_rate": 0.00016772815716257412,
	"loss": 0.6998,
	"step": 594
	},
	{
	"epoch": 0.28652260278095465,
	"grad_norm": 3.2922167778015137,
	"learning_rate": 0.00016761331091610416,
	"loss": 1.1803,
	"step": 595
	},
	{
	"epoch": 0.28700415337386387,
	"grad_norm": 2.982027053833008,
	"learning_rate": 0.00016749830015182107,
	"loss": 0.9353,
	"step": 596
	},
	{
	"epoch": 0.28748570396677303,
	"grad_norm": 1.3692221641540527,
	"learning_rate": 0.00016738312514957086,
	"loss": 0.5095,
	"step": 597
	},
	{
	"epoch": 0.2879672545596822,
	"grad_norm": 2.186056613922119,
	"learning_rate": 0.00016726778618959926,
	"loss": 0.8501,
	"step": 598
	},
	{
	"epoch": 0.28844880515259136,
	"grad_norm": 1.2103948593139648,
	"learning_rate": 0.00016715228355255093,
	"loss": 0.5927,
	"step": 599
	},
	{
	"epoch": 0.2889303557455005,
	"grad_norm": 2.340742349624634,
	"learning_rate": 0.00016703661751946874,
	"loss": 0.7416,
	"step": 600
	},
	{
	"epoch": 0.2894119063384097,
	"grad_norm": 3.421393871307373,
	"learning_rate": 0.00016692078837179318,
	"loss": 1.001,
	"step": 601
	},
	{
	"epoch": 0.28989345693131885,
	"grad_norm": 2.6955082416534424,
	"learning_rate": 0.00016680479639136163,
	"loss": 0.7825,
	"step": 602
	},
	{
	"epoch": 0.290375007524228,
	"grad_norm": 2.7069554328918457,
	"learning_rate": 0.0001666886418604077,
	"loss": 0.5969,
	"step": 603
	},
	{
	"epoch": 0.2908565581171372,
	"grad_norm": 2.855012893676758,
	"learning_rate": 0.0001665723250615604,
	"loss": 0.6626,
	"step": 604
	},
	{
	"epoch": 0.29133810871004634,
	"grad_norm": 3.311091423034668,
	"learning_rate": 0.00016645584627784381,
	"loss": 1.0357,
	"step": 605
	},
	{
	"epoch": 0.2918196593029555,
	"grad_norm": 3.6477015018463135,
	"learning_rate": 0.0001663392057926759,
	"loss": 0.8369,
	"step": 606
	},
	{
	"epoch": 0.29230120989586467,
	"grad_norm": 4.4921345710754395,
	"learning_rate": 0.00016622240388986824,
	"loss": 0.6446,
	"step": 607
	},
	{
	"epoch": 0.29278276048877383,
	"grad_norm": 7.8272013664245605,
	"learning_rate": 0.0001661054408536251,
	"loss": 1.0073,
	"step": 608
	},
	{
	"epoch": 0.293264311081683,
	"grad_norm": 3.6183552742004395,
	"learning_rate": 0.00016598831696854288,
	"loss": 1.0384,
	"step": 609
	},
	{
	"epoch": 0.2937458616745922,
	"grad_norm": 3.8852734565734863,
	"learning_rate": 0.00016587103251960937,
	"loss": 0.9137,
	"step": 610
	},
	{
	"epoch": 0.2942274122675014,
	"grad_norm": 4.892505645751953,
	"learning_rate": 0.00016575358779220294,
	"loss": 0.8421,
	"step": 611
	},
	{
	"epoch": 0.29470896286041054,
	"grad_norm": 5.816843032836914,
	"learning_rate": 0.00016563598307209204,
	"loss": 0.9884,
	"step": 612
	},
	{
	"epoch": 0.2951905134533197,
	"grad_norm": 3.2219488620758057,
	"learning_rate": 0.0001655182186454344,
	"loss": 0.549,
	"step": 613
	},
	{
	"epoch": 0.29567206404622887,
	"grad_norm": 1.8293180465698242,
	"learning_rate": 0.00016540029479877638,
	"loss": 1.0032,
	"step": 614
	},
	{
	"epoch": 0.29615361463913803,
	"grad_norm": 2.564122200012207,
	"learning_rate": 0.00016528221181905217,
	"loss": 0.8074,
	"step": 615
	},
	{
	"epoch": 0.2966351652320472,
	"grad_norm": 2.1164186000823975,
	"learning_rate": 0.00016516396999358322,
	"loss": 0.7476,
	"step": 616
	},
	{
	"epoch": 0.29711671582495636,
	"grad_norm": 1.6424773931503296,
	"learning_rate": 0.00016504556961007748,
	"loss": 0.505,
	"step": 617
	},
	{
	"epoch": 0.2975982664178655,
	"grad_norm": 2.669262170791626,
	"learning_rate": 0.00016492701095662866,
	"loss": 0.5681,
	"step": 618
	},
	{
	"epoch": 0.2980798170107747,
	"grad_norm": 3.94612455368042,
	"learning_rate": 0.00016480829432171564,
	"loss": 1.0503,
	"step": 619
	},
	{
	"epoch": 0.29856136760368385,
	"grad_norm": 1.6237881183624268,
	"learning_rate": 0.0001646894199942017,
	"loss": 0.7969,
	"step": 620
	},
	{
	"epoch": 0.299042918196593,
	"grad_norm": 1.9715096950531006,
	"learning_rate": 0.0001645703882633338,
	"loss": 0.9146,
	"step": 621
	},
	{
	"epoch": 0.2995244687895022,
	"grad_norm": 4.666619300842285,
	"learning_rate": 0.00016445119941874183,
	"loss": 0.4521,
	"step": 622
	},
	{
	"epoch": 0.30000601938241134,
	"grad_norm": 5.420642375946045,
	"learning_rate": 0.00016433185375043809,
	"loss": 0.8339,
	"step": 623
	},
	{
	"epoch": 0.30048756997532056,
	"grad_norm": 3.869925022125244,
	"learning_rate": 0.00016421235154881638,
	"loss": 0.9278,
	"step": 624
	},
	{
	"epoch": 0.3009691205682297,
	"grad_norm": 2.435767889022827,
	"learning_rate": 0.00016409269310465146,
	"loss": 0.4431,
	"step": 625
	},
	{
	"epoch": 0.3014506711611389,
	"grad_norm": 3.733633041381836,
	"learning_rate": 0.00016397287870909813,
	"loss": 1.1122,
	"step": 626
	},
	{
	"epoch": 0.30193222175404805,
	"grad_norm": 2.7327606678009033,
	"learning_rate": 0.00016385290865369079,
	"loss": 1.8192,
	"step": 627
	},
	{
	"epoch": 0.3024137723469572,
	"grad_norm": 2.777545213699341,
	"learning_rate": 0.00016373278323034255,
	"loss": 0.762,
	"step": 628
	},
	{
	"epoch": 0.3028953229398664,
	"grad_norm": 3.036078691482544,
	"learning_rate": 0.0001636125027313445,
	"loss": 0.9722,
	"step": 629
	},
	{
	"epoch": 0.30337687353277554,
	"grad_norm": 1.9548509120941162,
	"learning_rate": 0.00016349206744936518,
	"loss": 0.909,
	"step": 630
	},
	{
	"epoch": 0.3038584241256847,
	"grad_norm": 1.862438678741455,
	"learning_rate": 0.00016337147767744967,
	"loss": 0.9632,
	"step": 631
	},
	{
	"epoch": 0.30433997471859386,
	"grad_norm": 3.5347414016723633,
	"learning_rate": 0.0001632507337090189,
	"loss": 0.6586,
	"step": 632
	},
	{
	"epoch": 0.304821525311503,
	"grad_norm": 3.137190818786621,
	"learning_rate": 0.0001631298358378692,
	"loss": 0.8476,
	"step": 633
	},
	{
	"epoch": 0.3053030759044122,
	"grad_norm": 4.065056800842285,
	"learning_rate": 0.00016300878435817113,
	"loss": 0.9919,
	"step": 634
	},
	{
	"epoch": 0.30578462649732135,
	"grad_norm": 3.0076253414154053,
	"learning_rate": 0.00016288757956446918,
	"loss": 0.8693,
	"step": 635
	},
	{
	"epoch": 0.3062661770902305,
	"grad_norm": 4.386903762817383,
	"learning_rate": 0.00016276622175168083,
	"loss": 0.5319,
	"step": 636
	},
	{
	"epoch": 0.30674772768313974,
	"grad_norm": 2.739208459854126,
	"learning_rate": 0.0001626447112150959,
	"loss": 1.0968,
	"step": 637
	},
	{
	"epoch": 0.3072292782760489,
	"grad_norm": 2.428248882293701,
	"learning_rate": 0.00016252304825037576,
	"loss": 0.9611,
	"step": 638
	},
	{
	"epoch": 0.30771082886895806,
	"grad_norm": 4.173031330108643,
	"learning_rate": 0.0001624012331535528,
	"loss": 1.0045,
	"step": 639
	},
	{
	"epoch": 0.3081923794618672,
	"grad_norm": 2.0858750343322754,
	"learning_rate": 0.00016227926622102947,
	"loss": 0.4524,
	"step": 640
	},
	{
	"epoch": 0.3086739300547764,
	"grad_norm": 3.544163703918457,
	"learning_rate": 0.00016215714774957772,
	"loss": 1.0543,
	"step": 641
	},
	{
	"epoch": 0.30915548064768555,
	"grad_norm": 5.18956995010376,
	"learning_rate": 0.00016203487803633822,
	"loss": 1.7808,
	"step": 642
	},
	{
	"epoch": 0.3096370312405947,
	"grad_norm": 4.177835464477539,
	"learning_rate": 0.00016191245737881956,
	"loss": 0.6678,
	"step": 643
	},
	{
	"epoch": 0.3101185818335039,
	"grad_norm": 2.31697678565979,
	"learning_rate": 0.00016178988607489777,
	"loss": 0.8606,
	"step": 644
	},
	{
	"epoch": 0.31060013242641304,
	"grad_norm": 3.908705949783325,
	"learning_rate": 0.00016166716442281528,
	"loss": 0.9769,
	"step": 645
	},
	{
	"epoch": 0.3110816830193222,
	"grad_norm": 3.6292724609375,
	"learning_rate": 0.0001615442927211805,
	"loss": 0.7383,
	"step": 646
	},
	{
	"epoch": 0.31156323361223137,
	"grad_norm": 1.9753597974777222,
	"learning_rate": 0.0001614212712689668,
	"loss": 0.6893,
	"step": 647
	},
	{
	"epoch": 0.31204478420514054,
	"grad_norm": 2.6053380966186523,
	"learning_rate": 0.00016129810036551198,
	"loss": 0.8903,
	"step": 648
	},
	{
	"epoch": 0.3125263347980497,
	"grad_norm": 4.439826965332031,
	"learning_rate": 0.00016117478031051755,
	"loss": 0.6233,
	"step": 649
	},
	{
	"epoch": 0.31300788539095886,
	"grad_norm": 2.590153694152832,
	"learning_rate": 0.00016105131140404787,
	"loss": 0.8171,
	"step": 650
	},
	{
	"epoch": 0.3134894359838681,
	"grad_norm": 2.094128370285034,
	"learning_rate": 0.00016092769394652947,
	"loss": 0.9461,
	"step": 651
	},
	{
	"epoch": 0.31397098657677724,
	"grad_norm": 2.2599637508392334,
	"learning_rate": 0.0001608039282387504,
	"loss": 0.7661,
	"step": 652
	},
	{
	"epoch": 0.3144525371696864,
	"grad_norm": 2.008302927017212,
	"learning_rate": 0.00016068001458185936,
	"loss": 0.7038,
	"step": 653
	},
	{
	"epoch": 0.31493408776259557,
	"grad_norm": 1.2377398014068604,
	"learning_rate": 0.0001605559532773651,
	"loss": 0.7293,
	"step": 654
	},
	{
	"epoch": 0.31541563835550473,
	"grad_norm": 1.3407477140426636,
	"learning_rate": 0.00016043174462713566,
	"loss": 0.4857,
	"step": 655
	},
	{
	"epoch": 0.3158971889484139,
	"grad_norm": 1.853411078453064,
	"learning_rate": 0.00016030738893339753,
	"loss": 0.8002,
	"step": 656
	},
	{
	"epoch": 0.31637873954132306,
	"grad_norm": 2.6167731285095215,
	"learning_rate": 0.00016018288649873497,
	"loss": 0.8817,
	"step": 657
	},
	{
	"epoch": 0.3168602901342322,
	"grad_norm": 2.0071139335632324,
	"learning_rate": 0.0001600582376260894,
	"loss": 0.7619,
	"step": 658
	},
	{
	"epoch": 0.3173418407271414,
	"grad_norm": 1.7096420526504517,
	"learning_rate": 0.00015993344261875847,
	"loss": 0.3737,
	"step": 659
	},
	{
	"epoch": 0.31782339132005055,
	"grad_norm": 1.3771498203277588,
	"learning_rate": 0.00015980850178039547,
	"loss": 0.8174,
	"step": 660
	},
	{
	"epoch": 0.3183049419129597,
	"grad_norm": 1.803589105606079,
	"learning_rate": 0.00015968341541500842,
	"loss": 0.886,
	"step": 661
	},
	{
	"epoch": 0.3187864925058689,
	"grad_norm": 2.595926523208618,
	"learning_rate": 0.00015955818382695953,
	"loss": 0.6081,
	"step": 662
	},
	{
	"epoch": 0.31926804309877804,
	"grad_norm": 3.571012496948242,
	"learning_rate": 0.00015943280732096438,
	"loss": 0.8304,
	"step": 663
	},
	{
	"epoch": 0.3197495936916872,
	"grad_norm": 3.7733047008514404,
	"learning_rate": 0.00015930728620209113,
	"loss": 0.9931,
	"step": 664
	},
	{
	"epoch": 0.3202311442845964,
	"grad_norm": 2.0570201873779297,
	"learning_rate": 0.00015918162077575976,
	"loss": 0.6084,
	"step": 665
	},
	{
	"epoch": 0.3207126948775056,
	"grad_norm": 2.1481802463531494,
	"learning_rate": 0.00015905581134774153,
	"loss": 0.637,
	"step": 666
	},
	{
	"epoch": 0.32119424547041475,
	"grad_norm": 2.172410488128662,
	"learning_rate": 0.0001589298582241579,
	"loss": 0.8891,
	"step": 667
	},
	{
	"epoch": 0.3216757960633239,
	"grad_norm": 1.3450566530227661,
	"learning_rate": 0.00015880376171148014,
	"loss": 0.743,
	"step": 668
	},
	{
	"epoch": 0.3221573466562331,
	"grad_norm": 2.8985812664031982,
	"learning_rate": 0.00015867752211652831,
	"loss": 1.0136,
	"step": 669
	},
	{
	"epoch": 0.32263889724914224,
	"grad_norm": 2.9778859615325928,
	"learning_rate": 0.00015855113974647068,
	"loss": 1.1762,
	"step": 670
	},
	{
	"epoch": 0.3231204478420514,
	"grad_norm": 2.839142322540283,
	"learning_rate": 0.0001584246149088229,
	"loss": 0.8616,
	"step": 671
	},
	{
	"epoch": 0.32360199843496057,
	"grad_norm": 2.5461061000823975,
	"learning_rate": 0.0001582979479114472,
	"loss": 0.883,
	"step": 672
	},
	{
	"epoch": 0.32408354902786973,
	"grad_norm": 2.5526607036590576,
	"learning_rate": 0.0001581711390625519,
	"loss": 1.0749,
	"step": 673
	},
	{
	"epoch": 0.3245650996207789,
	"grad_norm": 1.8686296939849854,
	"learning_rate": 0.0001580441886706903,
	"loss": 0.5283,
	"step": 674
	},
	{
	"epoch": 0.32504665021368806,
	"grad_norm": 3.071974277496338,
	"learning_rate": 0.00015791709704476015,
	"loss": 0.8488,
	"step": 675
	},
	{
	"epoch": 0.3255282008065972,
	"grad_norm": 2.5691075325012207,
	"learning_rate": 0.00015778986449400292,
	"loss": 0.6839,
	"step": 676
	},
	{
	"epoch": 0.3260097513995064,
	"grad_norm": 3.5297348499298096,
	"learning_rate": 0.00015766249132800292,
	"loss": 0.7555,
	"step": 677
	},
	{
	"epoch": 0.3264913019924156,
	"grad_norm": 1.9047622680664062,
	"learning_rate": 0.00015753497785668663,
	"loss": 0.8286,
	"step": 678
	},
	{
	"epoch": 0.32697285258532477,
	"grad_norm": 1.899084210395813,
	"learning_rate": 0.00015740732439032187,
	"loss": 0.8663,
	"step": 679
	},
	{
	"epoch": 0.32745440317823393,
	"grad_norm": 1.7221845388412476,
	"learning_rate": 0.00015727953123951716,
	"loss": 1.0307,
	"step": 680
	},
	{
	"epoch": 0.3279359537711431,
	"grad_norm": 2.8478286266326904,
	"learning_rate": 0.00015715159871522086,
	"loss": 0.9206,
	"step": 681
	},
	{
	"epoch": 0.32841750436405226,
	"grad_norm": 2.7343266010284424,
	"learning_rate": 0.00015702352712872056,
	"loss": 0.7745,
	"step": 682
	},
	{
	"epoch": 0.3288990549569614,
	"grad_norm": 2.272082567214966,
	"learning_rate": 0.00015689531679164204,
	"loss": 0.7247,
	"step": 683
	},
	{
	"epoch": 0.3293806055498706,
	"grad_norm": 3.404125690460205,
	"learning_rate": 0.00015676696801594886,
	"loss": 1.185,
	"step": 684
	},
	{
	"epoch": 0.32986215614277975,
	"grad_norm": 2.005213499069214,
	"learning_rate": 0.00015663848111394132,
	"loss": 1.0382,
	"step": 685
	},
	{
	"epoch": 0.3303437067356889,
	"grad_norm": 3.435062885284424,
	"learning_rate": 0.00015650985639825585,
	"loss": 0.937,
	"step": 686
	},
	{
	"epoch": 0.3308252573285981,
	"grad_norm": 2.289677858352661,
	"learning_rate": 0.00015638109418186424,
	"loss": 0.5748,
	"step": 687
	},
	{
	"epoch": 0.33130680792150724,
	"grad_norm": 3.6423025131225586,
	"learning_rate": 0.00015625219477807277,
	"loss": 0.9767,
	"step": 688
	},
	{
	"epoch": 0.3317883585144164,
	"grad_norm": 2.872910737991333,
	"learning_rate": 0.00015612315850052166,
	"loss": 0.6958,
	"step": 689
	},
	{
	"epoch": 0.33226990910732557,
	"grad_norm": 3.16129469871521,
	"learning_rate": 0.00015599398566318396,
	"loss": 1.0489,
	"step": 690
	},
	{
	"epoch": 0.33275145970023473,
	"grad_norm": 2.4782161712646484,
	"learning_rate": 0.00015586467658036524,
	"loss": 0.6644,
	"step": 691
	},
	{
	"epoch": 0.33323301029314395,
	"grad_norm": 4.447420597076416,
	"learning_rate": 0.00015573523156670244,
	"loss": 1.2536,
	"step": 692
	},
	{
	"epoch": 0.3337145608860531,
	"grad_norm": 4.194264888763428,
	"learning_rate": 0.0001556056509371633,
	"loss": 0.9997,
	"step": 693
	},
	{
	"epoch": 0.3341961114789623,
	"grad_norm": 3.0863115787506104,
	"learning_rate": 0.00015547593500704547,
	"loss": 0.9827,
	"step": 694
	},
	{
	"epoch": 0.33467766207187144,
	"grad_norm": 7.232437610626221,
	"learning_rate": 0.00015534608409197592,
	"loss": 0.5336,
	"step": 695
	},
	{
	"epoch": 0.3351592126647806,
	"grad_norm": 2.40484881401062,
	"learning_rate": 0.00015521609850791004,
	"loss": 0.4391,
	"step": 696
	},
	{
	"epoch": 0.33564076325768977,
	"grad_norm": 2.6389102935791016,
	"learning_rate": 0.0001550859785711308,
	"loss": 0.8676,
	"step": 697
	},
	{
	"epoch": 0.33612231385059893,
	"grad_norm": 1.548851490020752,
	"learning_rate": 0.0001549557245982482,
	"loss": 0.8892,
	"step": 698
	},
	{
	"epoch": 0.3366038644435081,
	"grad_norm": 1.7563083171844482,
	"learning_rate": 0.00015482533690619837,
	"loss": 0.755,
	"step": 699
	},
	{
	"epoch": 0.33708541503641726,
	"grad_norm": 4.792996883392334,
	"learning_rate": 0.00015469481581224272,
	"loss": 0.6721,
	"step": 700
	},
	{
	"epoch": 0.3375669656293264,
	"grad_norm": 2.4070699214935303,
	"learning_rate": 0.0001545641616339673,
	"loss": 0.8127,
	"step": 701
	},
	{
	"epoch": 0.3380485162222356,
	"grad_norm": 3.3054072856903076,
	"learning_rate": 0.00015443337468928206,
	"loss": 0.5389,
	"step": 702
	},
	{
	"epoch": 0.33853006681514475,
	"grad_norm": 2.826061725616455,
	"learning_rate": 0.00015430245529641986,
	"loss": 0.58,
	"step": 703
	},
	{
	"epoch": 0.3390116174080539,
	"grad_norm": 2.2573275566101074,
	"learning_rate": 0.00015417140377393596,
	"loss": 0.9465,
	"step": 704
	},
	{
	"epoch": 0.3394931680009631,
	"grad_norm": 2.5977699756622314,
	"learning_rate": 0.00015404022044070704,
	"loss": 0.707,
	"step": 705
	},
	{
	"epoch": 0.3399747185938723,
	"grad_norm": 4.191378116607666,
	"learning_rate": 0.00015390890561593052,
	"loss": 0.8705,
	"step": 706
	},
	{
	"epoch": 0.34045626918678146,
	"grad_norm": 2.229558229446411,
	"learning_rate": 0.0001537774596191238,
	"loss": 0.6722,
	"step": 707
	},
	{
	"epoch": 0.3409378197796906,
	"grad_norm": 3.5231106281280518,
	"learning_rate": 0.00015364588277012344,
	"loss": 0.6765,
	"step": 708
	},
	{
	"epoch": 0.3414193703725998,
	"grad_norm": 2.406374216079712,
	"learning_rate": 0.00015351417538908435,
	"loss": 1.1367,
	"step": 709
	},
	{
	"epoch": 0.34190092096550895,
	"grad_norm": 2.4399731159210205,
	"learning_rate": 0.0001533823377964791,
	"loss": 0.8311,
	"step": 710
	},
	{
	"epoch": 0.3423824715584181,
	"grad_norm": 3.170764207839966,
	"learning_rate": 0.00015325037031309704,
	"loss": 1.1677,
	"step": 711
	},
	{
	"epoch": 0.3428640221513273,
	"grad_norm": 2.4215619564056396,
	"learning_rate": 0.00015311827326004363,
	"loss": 0.7897,
	"step": 712
	},
	{
	"epoch": 0.34334557274423644,
	"grad_norm": 2.092327117919922,
	"learning_rate": 0.0001529860469587396,
	"loss": 0.6389,
	"step": 713
	},
	{
	"epoch": 0.3438271233371456,
	"grad_norm": 2.5935378074645996,
	"learning_rate": 0.00015285369173092015,
	"loss": 0.6437,
	"step": 714
	},
	{
	"epoch": 0.34430867393005476,
	"grad_norm": 2.7213246822357178,
	"learning_rate": 0.00015272120789863413,
	"loss": 1.2337,
	"step": 715
	},
	{
	"epoch": 0.34479022452296393,
	"grad_norm": 2.3775582313537598,
	"learning_rate": 0.00015258859578424342,
	"loss": 0.9354,
	"step": 716
	},
	{
	"epoch": 0.3452717751158731,
	"grad_norm": 2.3401012420654297,
	"learning_rate": 0.00015245585571042194,
	"loss": 0.573,
	"step": 717
	},
	{
	"epoch": 0.34575332570878226,
	"grad_norm": 2.1838347911834717,
	"learning_rate": 0.00015232298800015506,
	"loss": 0.5484,
	"step": 718
	},
	{
	"epoch": 0.3462348763016914,
	"grad_norm": 2.1311140060424805,
	"learning_rate": 0.00015218999297673862,
	"loss": 0.695,
	"step": 719
	},
	{
	"epoch": 0.34671642689460064,
	"grad_norm": 3.2175323963165283,
	"learning_rate": 0.0001520568709637783,
	"loss": 0.8665,
	"step": 720
	},
	{
	"epoch": 0.3471979774875098,
	"grad_norm": 2.6574606895446777,
	"learning_rate": 0.00015192362228518875,
	"loss": 0.6068,
	"step": 721
	},
	{
	"epoch": 0.34767952808041896,
	"grad_norm": 3.131312847137451,
	"learning_rate": 0.00015179024726519284,
	"loss": 1.0317,
	"step": 722
	},
	{
	"epoch": 0.34816107867332813,
	"grad_norm": 2.886371612548828,
	"learning_rate": 0.00015165674622832085,
	"loss": 0.6881,
	"step": 723
	},
	{
	"epoch": 0.3486426292662373,
	"grad_norm": 2.9966607093811035,
	"learning_rate": 0.0001515231194994097,
	"loss": 1.6059,
	"step": 724
	},
	{
	"epoch": 0.34912417985914646,
	"grad_norm": 5.8844404220581055,
	"learning_rate": 0.00015138936740360207,
	"loss": 0.8733,
	"step": 725
	},
	{
	"epoch": 0.3496057304520556,
	"grad_norm": 2.432682752609253,
	"learning_rate": 0.00015125549026634585,
	"loss": 0.4045,
	"step": 726
	},
	{
	"epoch": 0.3500872810449648,
	"grad_norm": 2.9346506595611572,
	"learning_rate": 0.00015112148841339295,
	"loss": 0.6577,
	"step": 727
	},
	{
	"epoch": 0.35056883163787395,
	"grad_norm": 3.6856017112731934,
	"learning_rate": 0.000150987362170799,
	"loss": 0.7203,
	"step": 728
	},
	{
	"epoch": 0.3510503822307831,
	"grad_norm": 3.772768974304199,
	"learning_rate": 0.00015085311186492206,
	"loss": 0.961,
	"step": 729
	},
	{
	"epoch": 0.3515319328236923,
	"grad_norm": 2.937117576599121,
	"learning_rate": 0.00015071873782242223,
	"loss": 0.5519,
	"step": 730
	},
	{
	"epoch": 0.35201348341660144,
	"grad_norm": 3.9652099609375,
	"learning_rate": 0.0001505842403702606,
	"loss": 0.9024,
	"step": 731
	},
	{
	"epoch": 0.3524950340095106,
	"grad_norm": 2.1614396572113037,
	"learning_rate": 0.00015044961983569856,
	"loss": 0.6737,
	"step": 732
	},
	{
	"epoch": 0.3529765846024198,
	"grad_norm": 2.625931978225708,
	"learning_rate": 0.00015031487654629702,
	"loss": 0.6265,
	"step": 733
	},
	{
	"epoch": 0.353458135195329,
	"grad_norm": 3.378445863723755,
	"learning_rate": 0.00015018001082991553,
	"loss": 0.6916,
	"step": 734
	},
	{
	"epoch": 0.35393968578823815,
	"grad_norm": 1.6671521663665771,
	"learning_rate": 0.0001500450230147116,
	"loss": 0.5809,
	"step": 735
	},
	{
	"epoch": 0.3544212363811473,
	"grad_norm": 2.095771074295044,
	"learning_rate": 0.00014990991342913974,
	"loss": 1.0634,
	"step": 736
	},
	{
	"epoch": 0.35490278697405647,
	"grad_norm": 2.0476694107055664,
	"learning_rate": 0.00014977468240195084,
	"loss": 0.7652,
	"step": 737
	},
	{
	"epoch": 0.35538433756696564,
	"grad_norm": 2.9106428623199463,
	"learning_rate": 0.0001496393302621912,
	"loss": 1.1553,
	"step": 738
	},
	{
	"epoch": 0.3558658881598748,
	"grad_norm": 2.1478304862976074,
	"learning_rate": 0.00014950385733920188,
	"loss": 0.7608,
	"step": 739
	},
	{
	"epoch": 0.35634743875278396,
	"grad_norm": 1.8038551807403564,
	"learning_rate": 0.00014936826396261783,
	"loss": 0.6694,
	"step": 740
	},
	{
	"epoch": 0.3568289893456931,
	"grad_norm": 3.3769569396972656,
	"learning_rate": 0.00014923255046236705,
	"loss": 1.2689,
	"step": 741
	},
	{
	"epoch": 0.3573105399386023,
	"grad_norm": 4.4860334396362305,
	"learning_rate": 0.00014909671716866984,
	"loss": 0.852,
	"step": 742
	},
	{
	"epoch": 0.35779209053151145,
	"grad_norm": 4.017233371734619,
	"learning_rate": 0.00014896076441203802,
	"loss": 0.8332,
	"step": 743
	},
	{
	"epoch": 0.3582736411244206,
	"grad_norm": 2.824586868286133,
	"learning_rate": 0.000148824692523274,
	"loss": 1.215,
	"step": 744
	},
	{
	"epoch": 0.3587551917173298,
	"grad_norm": 3.6129872798919678,
	"learning_rate": 0.0001486885018334702,
	"loss": 1.1055,
	"step": 745
	},
	{
	"epoch": 0.35923674231023894,
	"grad_norm": 3.1985294818878174,
	"learning_rate": 0.00014855219267400797,
	"loss": 0.8963,
	"step": 746
	},
	{
	"epoch": 0.35971829290314816,
	"grad_norm": 3.0053601264953613,
	"learning_rate": 0.00014841576537655705,
	"loss": 0.8728,
	"step": 747
	},
	{
	"epoch": 0.3601998434960573,
	"grad_norm": 2.2497479915618896,
	"learning_rate": 0.00014827922027307451,
	"loss": 0.9084,
	"step": 748
	},
	{
	"epoch": 0.3606813940889665,
	"grad_norm": 3.9402804374694824,
	"learning_rate": 0.00014814255769580415,
	"loss": 0.609,
	"step": 749
	},
	{
	"epoch": 0.36116294468187565,
	"grad_norm": 2.3622281551361084,
	"learning_rate": 0.00014800577797727558,
	"loss": 1.0189,
	"step": 750
	},
	{
	"epoch": 0.3616444952747848,
	"grad_norm": 1.9683716297149658,
	"learning_rate": 0.00014786888145030343,
	"loss": 0.8275,
	"step": 751
	},
	{
	"epoch": 0.362126045867694,
	"grad_norm": 0.9872303009033203,
	"learning_rate": 0.0001477318684479866,
	"loss": 0.3827,
	"step": 752
	},
	{
	"epoch": 0.36260759646060314,
	"grad_norm": 3.7244014739990234,
	"learning_rate": 0.00014759473930370736,
	"loss": 0.6359,
	"step": 753
	},
	{
	"epoch": 0.3630891470535123,
	"grad_norm": 1.6438435316085815,
	"learning_rate": 0.0001474574943511306,
	"loss": 0.879,
	"step": 754
	},
	{
	"epoch": 0.36357069764642147,
	"grad_norm": 2.410429000854492,
	"learning_rate": 0.0001473201339242029,
	"loss": 1.2406,
	"step": 755
	},
	{
	"epoch": 0.36405224823933063,
	"grad_norm": 5.812607765197754,
	"learning_rate": 0.000147182658357152,
	"loss": 0.4273,
	"step": 756
	},
	{
	"epoch": 0.3645337988322398,
	"grad_norm": 3.4380412101745605,
	"learning_rate": 0.00014704506798448566,
	"loss": 0.5765,
	"step": 757
	},
	{
	"epoch": 0.36501534942514896,
	"grad_norm": 10.264464378356934,
	"learning_rate": 0.00014690736314099101,
	"loss": 0.8553,
	"step": 758
	},
	{
	"epoch": 0.3654969000180581,
	"grad_norm": 2.479084014892578,
	"learning_rate": 0.00014676954416173373,
	"loss": 0.8062,
	"step": 759
	},
	{
	"epoch": 0.3659784506109673,
	"grad_norm": 3.285261392593384,
	"learning_rate": 0.00014663161138205724,
	"loss": 0.9467,
	"step": 760
	},
	{
	"epoch": 0.3664600012038765,
	"grad_norm": 2.4864413738250732,
	"learning_rate": 0.00014649356513758176,
	"loss": 0.8893,
	"step": 761
	},
	{
	"epoch": 0.36694155179678567,
	"grad_norm": 1.9362248182296753,
	"learning_rate": 0.00014635540576420374,
	"loss": 0.6744,
	"step": 762
	},
	{
	"epoch": 0.36742310238969483,
	"grad_norm": 2.7063558101654053,
	"learning_rate": 0.0001462171335980948,
	"loss": 0.4627,
	"step": 763
	},
	{
	"epoch": 0.367904652982604,
	"grad_norm": 1.6518278121948242,
	"learning_rate": 0.00014607874897570105,
	"loss": 0.6235,
	"step": 764
	},
	{
	"epoch": 0.36838620357551316,
	"grad_norm": 1.9559590816497803,
	"learning_rate": 0.0001459402522337422,
	"loss": 0.6709,
	"step": 765
	},
	{
	"epoch": 0.3688677541684223,
	"grad_norm": 2.930201292037964,
	"learning_rate": 0.00014580164370921078,
	"loss": 0.5976,
	"step": 766
	},
	{
	"epoch": 0.3693493047613315,
	"grad_norm": 2.146150588989258,
	"learning_rate": 0.0001456629237393713,
	"loss": 0.7809,
	"step": 767
	},
	{
	"epoch": 0.36983085535424065,
	"grad_norm": 3.9922261238098145,
	"learning_rate": 0.00014552409266175952,
	"loss": 0.6659,
	"step": 768
	},
	{
	"epoch": 0.3703124059471498,
	"grad_norm": 1.5614209175109863,
	"learning_rate": 0.00014538515081418142,
	"loss": 0.6743,
	"step": 769
	},
	{
	"epoch": 0.370793956540059,
	"grad_norm": 4.6989970207214355,
	"learning_rate": 0.00014524609853471264,
	"loss": 0.8936,
	"step": 770
	},
	{
	"epoch": 0.37127550713296814,
	"grad_norm": 3.576082229614258,
	"learning_rate": 0.00014510693616169741,
	"loss": 0.9577,
	"step": 771
	},
	{
	"epoch": 0.3717570577258773,
	"grad_norm": 0.9595773816108704,
	"learning_rate": 0.0001449676640337479,
	"loss": 0.7148,
	"step": 772
	},
	{
	"epoch": 0.37223860831878647,
	"grad_norm": 4.762831687927246,
	"learning_rate": 0.00014482828248974335,
	"loss": 0.4595,
	"step": 773
	},
	{
	"epoch": 0.3727201589116957,
	"grad_norm": 3.842872381210327,
	"learning_rate": 0.00014468879186882916,
	"loss": 1.3252,
	"step": 774
	},
	{
	"epoch": 0.37320170950460485,
	"grad_norm": 1.4984766244888306,
	"learning_rate": 0.00014454919251041622,
	"loss": 0.5666,
	"step": 775
	},
	{
	"epoch": 0.373683260097514,
	"grad_norm": 2.2089576721191406,
	"learning_rate": 0.00014440948475418,
	"loss": 1.341,
	"step": 776
	},
	{
	"epoch": 0.3741648106904232,
	"grad_norm": 1.6905694007873535,
	"learning_rate": 0.00014426966894005966,
	"loss": 0.6712,
	"step": 777
	},
	{
	"epoch": 0.37464636128333234,
	"grad_norm": 2.767066478729248,
	"learning_rate": 0.0001441297454082573,
	"loss": 0.9909,
	"step": 778
	},
	{
	"epoch": 0.3751279118762415,
	"grad_norm": 2.3299660682678223,
	"learning_rate": 0.00014398971449923722,
	"loss": 0.5103,
	"step": 779
	},
	{
	"epoch": 0.37560946246915067,
	"grad_norm": 1.6824946403503418,
	"learning_rate": 0.00014384957655372483,
	"loss": 0.6759,
	"step": 780
	},
	{
	"epoch": 0.37609101306205983,
	"grad_norm": 1.1821125745773315,
	"learning_rate": 0.00014370933191270617,
	"loss": 0.5259,
	"step": 781
	},
	{
	"epoch": 0.376572563654969,
	"grad_norm": 1.904584288597107,
	"learning_rate": 0.0001435689809174267,
	"loss": 0.9894,
	"step": 782
	},
	{
	"epoch": 0.37705411424787816,
	"grad_norm": 2.2008063793182373,
	"learning_rate": 0.0001434285239093908,
	"loss": 1.3456,
	"step": 783
	},
	{
	"epoch": 0.3775356648407873,
	"grad_norm": 2.202802896499634,
	"learning_rate": 0.00014328796123036071,
	"loss": 0.4833,
	"step": 784
	},
	{
	"epoch": 0.3780172154336965,
	"grad_norm": 1.7282605171203613,
	"learning_rate": 0.0001431472932223559,
	"loss": 0.9194,
	"step": 785
	},
	{
	"epoch": 0.37849876602660565,
	"grad_norm": 1.2373805046081543,
	"learning_rate": 0.00014300652022765207,
	"loss": 0.466,
	"step": 786
	},
	{
	"epoch": 0.3789803166195148,
	"grad_norm": 4.87825870513916,
	"learning_rate": 0.00014286564258878033,
	"loss": 0.9176,
	"step": 787
	},
	{
	"epoch": 0.37946186721242403,
	"grad_norm": 3.325873613357544,
	"learning_rate": 0.00014272466064852644,
	"loss": 0.4595,
	"step": 788
	},
	{
	"epoch": 0.3799434178053332,
	"grad_norm": 5.119507789611816,
	"learning_rate": 0.00014258357474993,
	"loss": 0.8462,
	"step": 789
	},
	{
	"epoch": 0.38042496839824236,
	"grad_norm": 4.062798976898193,
	"learning_rate": 0.0001424423852362835,
	"loss": 0.7553,
	"step": 790
	},
	{
	"epoch": 0.3809065189911515,
	"grad_norm": 1.8997843265533447,
	"learning_rate": 0.00014230109245113158,
	"loss": 0.968,
	"step": 791
	},
	{
	"epoch": 0.3813880695840607,
	"grad_norm": 3.648345470428467,
	"learning_rate": 0.00014215969673827018,
	"loss": 0.7866,
	"step": 792
	},
	{
	"epoch": 0.38186962017696985,
	"grad_norm": 3.1891438961029053,
	"learning_rate": 0.00014201819844174564,
	"loss": 0.7841,
	"step": 793
	},
	{
	"epoch": 0.382351170769879,
	"grad_norm": 3.960712432861328,
	"learning_rate": 0.0001418765979058539,
	"loss": 0.8922,
	"step": 794
	},
	{
	"epoch": 0.3828327213627882,
	"grad_norm": 1.958216667175293,
	"learning_rate": 0.00014173489547513973,
	"loss": 0.9929,
	"step": 795
	},
	{
	"epoch": 0.38331427195569734,
	"grad_norm": 2.851674795150757,
	"learning_rate": 0.00014159309149439582,
	"loss": 0.7668,
	"step": 796
	},
	{
	"epoch": 0.3837958225486065,
	"grad_norm": 2.4043354988098145,
	"learning_rate": 0.00014145118630866187,
	"loss": 0.5076,
	"step": 797
	},
	{
	"epoch": 0.38427737314151567,
	"grad_norm": 2.548334836959839,
	"learning_rate": 0.000141309180263224,
	"loss": 0.5664,
	"step": 798
	},
	{
	"epoch": 0.38475892373442483,
	"grad_norm": 7.332959175109863,
	"learning_rate": 0.0001411670737036135,
	"loss": 0.6663,
	"step": 799
	},
	{
	"epoch": 0.385240474327334,
	"grad_norm": 2.5418202877044678,
	"learning_rate": 0.0001410248669756065,
	"loss": 0.6912,
	"step": 800
	},
	{
	"epoch": 0.38572202492024316,
	"grad_norm": 1.9166046380996704,
	"learning_rate": 0.00014088256042522264,
	"loss": 0.5785,
	"step": 801
	},
	{
	"epoch": 0.3862035755131524,
	"grad_norm": 1.9484670162200928,
	"learning_rate": 0.00014074015439872458,
	"loss": 0.7789,
	"step": 802
	},
	{
	"epoch": 0.38668512610606154,
	"grad_norm": 1.2977544069290161,
	"learning_rate": 0.00014059764924261703,
	"loss": 0.448,
	"step": 803
	},
	{
	"epoch": 0.3871666766989707,
	"grad_norm": 3.7243142127990723,
	"learning_rate": 0.00014045504530364584,
	"loss": 0.3638,
	"step": 804
	},
	{
	"epoch": 0.38764822729187987,
	"grad_norm": 2.931234836578369,
	"learning_rate": 0.00014031234292879725,
	"loss": 0.6048,
	"step": 805
	},
	{
	"epoch": 0.38812977788478903,
	"grad_norm": 2.235635757446289,
	"learning_rate": 0.00014016954246529696,
	"loss": 0.741,
	"step": 806
	},
	{
	"epoch": 0.3886113284776982,
	"grad_norm": 2.4995760917663574,
	"learning_rate": 0.00014002664426060942,
	"loss": 0.8794,
	"step": 807
	},
	{
	"epoch": 0.38909287907060736,
	"grad_norm": 5.880919456481934,
	"learning_rate": 0.00013988364866243693,
	"loss": 0.8441,
	"step": 808
	},
	{
	"epoch": 0.3895744296635165,
	"grad_norm": 2.27232027053833,
	"learning_rate": 0.00013974055601871868,
	"loss": 0.5837,
	"step": 809
	},
	{
	"epoch": 0.3900559802564257,
	"grad_norm": 1.846911907196045,
	"learning_rate": 0.00013959736667762998,
	"loss": 0.9346,
	"step": 810
	},
	{
	"epoch": 0.39053753084933485,
	"grad_norm": 1.3983654975891113,
	"learning_rate": 0.00013945408098758156,
	"loss": 1.0296,
	"step": 811
	},
	{
	"epoch": 0.391019081442244,
	"grad_norm": 1.4359188079833984,
	"learning_rate": 0.0001393106992972184,
	"loss": 0.5791,
	"step": 812
	},
	{
	"epoch": 0.3915006320351532,
	"grad_norm": 2.0418739318847656,
	"learning_rate": 0.00013916722195541926,
	"loss": 0.5045,
	"step": 813
	},
	{
	"epoch": 0.39198218262806234,
	"grad_norm": 3.6216964721679688,
	"learning_rate": 0.00013902364931129557,
	"loss": 0.748,
	"step": 814
	},
	{
	"epoch": 0.39246373322097156,
	"grad_norm": 2.5726840496063232,
	"learning_rate": 0.00013887998171419058,
	"loss": 0.8588,
	"step": 815
	},
	{
	"epoch": 0.3929452838138807,
	"grad_norm": 2.4166088104248047,
	"learning_rate": 0.00013873621951367862,
	"loss": 0.8306,
	"step": 816
	},
	{
	"epoch": 0.3934268344067899,
	"grad_norm": 2.8156697750091553,
	"learning_rate": 0.00013859236305956425,
	"loss": 0.7893,
	"step": 817
	},
	{
	"epoch": 0.39390838499969905,
	"grad_norm": 2.6449344158172607,
	"learning_rate": 0.00013844841270188132,
	"loss": 0.7843,
	"step": 818
	},
	{
	"epoch": 0.3943899355926082,
	"grad_norm": 1.73700749874115,
	"learning_rate": 0.00013830436879089228,
	"loss": 0.7855,
	"step": 819
	},
	{
	"epoch": 0.3948714861855174,
	"grad_norm": 2.2804300785064697,
	"learning_rate": 0.00013816023167708704,
	"loss": 0.6568,
	"step": 820
	},
	{
	"epoch": 0.39535303677842654,
	"grad_norm": 2.1226370334625244,
	"learning_rate": 0.00013801600171118244,
	"loss": 0.5294,
	"step": 821
	},
	{
	"epoch": 0.3958345873713357,
	"grad_norm": 2.894469738006592,
	"learning_rate": 0.00013787167924412112,
	"loss": 0.8773,
	"step": 822
	},
	{
	"epoch": 0.39631613796424486,
	"grad_norm": 2.5202245712280273,
	"learning_rate": 0.0001377272646270709,
	"loss": 0.381,
	"step": 823
	},
	{
	"epoch": 0.396797688557154,
	"grad_norm": 3.8328442573547363,
	"learning_rate": 0.00013758275821142382,
	"loss": 0.5329,
	"step": 824
	},
	{
	"epoch": 0.3972792391500632,
	"grad_norm": 2.541353940963745,
	"learning_rate": 0.00013743816034879523,
	"loss": 0.5578,
	"step": 825
	},
	{
	"epoch": 0.39776078974297235,
	"grad_norm": 2.1383888721466064,
	"learning_rate": 0.000137293471391023,
	"loss": 0.8876,
	"step": 826
	},
	{
	"epoch": 0.3982423403358815,
	"grad_norm": 2.633044719696045,
	"learning_rate": 0.00013714869169016667,
	"loss": 0.4708,
	"step": 827
	},
	{
	"epoch": 0.3987238909287907,
	"grad_norm": 4.365309715270996,
	"learning_rate": 0.00013700382159850656,
	"loss": 0.4944,
	"step": 828
	},
	{
	"epoch": 0.3992054415216999,
	"grad_norm": 3.652635097503662,
	"learning_rate": 0.00013685886146854297,
	"loss": 0.8842,
	"step": 829
	},
	{
	"epoch": 0.39968699211460906,
	"grad_norm": 2.1585693359375,
	"learning_rate": 0.00013671381165299525,
	"loss": 0.7996,
	"step": 830
	},
	{
	"epoch": 0.4001685427075182,
	"grad_norm": 2.429353952407837,
	"learning_rate": 0.00013656867250480098,
	"loss": 0.8529,
	"step": 831
	},
	{
	"epoch": 0.4006500933004274,
	"grad_norm": 2.0044384002685547,
	"learning_rate": 0.00013642344437711512,
	"loss": 0.5831,
	"step": 832
	},
	{
	"epoch": 0.40113164389333655,
	"grad_norm": 3.845720052719116,
	"learning_rate": 0.00013627812762330912,
	"loss": 1.0989,
	"step": 833
	},
	{
	"epoch": 0.4016131944862457,
	"grad_norm": 2.4020707607269287,
	"learning_rate": 0.00013613272259697007,
	"loss": 0.734,
	"step": 834
	},
	{
	"epoch": 0.4020947450791549,
	"grad_norm": 1.2454372644424438,
	"learning_rate": 0.00013598722965189986,
	"loss": 1.1921,
	"step": 835
	},
	{
	"epoch": 0.40257629567206404,
	"grad_norm": 2.901397228240967,
	"learning_rate": 0.0001358416491421143,
	"loss": 0.919,
	"step": 836
	},
	{
	"epoch": 0.4030578462649732,
	"grad_norm": 2.4206631183624268,
	"learning_rate": 0.00013569598142184225,
	"loss": 0.8408,
	"step": 837
	},
	{
	"epoch": 0.40353939685788237,
	"grad_norm": 3.595640182495117,
	"learning_rate": 0.00013555022684552483,
	"loss": 1.245,
	"step": 838
	},
	{
	"epoch": 0.40402094745079153,
	"grad_norm": 1.4770573377609253,
	"learning_rate": 0.00013540438576781441,
	"loss": 0.4859,
	"step": 839
	},
	{
	"epoch": 0.4045024980437007,
	"grad_norm": 2.7039146423339844,
	"learning_rate": 0.0001352584585435739,
	"loss": 0.9747,
	"step": 840
	},
	{
	"epoch": 0.40498404863660986,
	"grad_norm": 2.615344524383545,
	"learning_rate": 0.00013511244552787583,
	"loss": 0.6801,
	"step": 841
	},
	{
	"epoch": 0.405465599229519,
	"grad_norm": 3.7409796714782715,
	"learning_rate": 0.00013496634707600147,
	"loss": 1.1876,
	"step": 842
	},
	{
	"epoch": 0.40594714982242824,
	"grad_norm": 2.508939743041992,
	"learning_rate": 0.0001348201635434399,
	"loss": 0.7794,
	"step": 843
	},
	{
	"epoch": 0.4064287004153374,
	"grad_norm": 3.5549421310424805,
	"learning_rate": 0.0001346738952858873,
	"loss": 1.2157,
	"step": 844
	},
	{
	"epoch": 0.40691025100824657,
	"grad_norm": 0.9292539954185486,
	"learning_rate": 0.000134527542659246,
	"loss": 1.4636,
	"step": 845
	},
	{
	"epoch": 0.40739180160115573,
	"grad_norm": 2.5853211879730225,
	"learning_rate": 0.00013438110601962362,
	"loss": 0.6864,
	"step": 846
	},
	{
	"epoch": 0.4078733521940649,
	"grad_norm": 2.0483453273773193,
	"learning_rate": 0.00013423458572333214,
	"loss": 0.816,
	"step": 847
	},
	{
	"epoch": 0.40835490278697406,
	"grad_norm": 1.1379551887512207,
	"learning_rate": 0.0001340879821268872,
	"loss": 0.3914,
	"step": 848
	},
	{
	"epoch": 0.4088364533798832,
	"grad_norm": 4.26398229598999,
	"learning_rate": 0.000133941295587007,
	"loss": 0.7028,
	"step": 849
	},
	{
	"epoch": 0.4093180039727924,
	"grad_norm": 2.3732380867004395,
	"learning_rate": 0.00013379452646061164,
	"loss": 0.5584,
	"step": 850
	},
	{
	"epoch": 0.40979955456570155,
	"grad_norm": 2.4338433742523193,
	"learning_rate": 0.0001336476751048222,
	"loss": 0.9941,
	"step": 851
	},
	{
	"epoch": 0.4102811051586107,
	"grad_norm": 2.9080970287323,
	"learning_rate": 0.00013350074187695979,
	"loss": 1.3025,
	"step": 852
	},
	{
	"epoch": 0.4107626557515199,
	"grad_norm": 2.6538901329040527,
	"learning_rate": 0.00013335372713454467,
	"loss": 0.7301,
	"step": 853
	},
	{
	"epoch": 0.41124420634442904,
	"grad_norm": 2.5008554458618164,
	"learning_rate": 0.0001332066312352956,
	"loss": 0.7566,
	"step": 854
	},
	{
	"epoch": 0.4117257569373382,
	"grad_norm": 2.0874619483947754,
	"learning_rate": 0.00013305945453712868,
	"loss": 0.9365,
	"step": 855
	},
	{
	"epoch": 0.4122073075302474,
	"grad_norm": 2.2397348880767822,
	"learning_rate": 0.0001329121973981567,
	"loss": 0.9617,
	"step": 856
	},
	{
	"epoch": 0.4126888581231566,
	"grad_norm": 2.2722036838531494,
	"learning_rate": 0.00013276486017668807,
	"loss": 0.4104,
	"step": 857
	},
	{
	"epoch": 0.41317040871606575,
	"grad_norm": 2.11865234375,
	"learning_rate": 0.0001326174432312262,
	"loss": 0.7596,
	"step": 858
	},
	{
	"epoch": 0.4136519593089749,
	"grad_norm": 2.2710089683532715,
	"learning_rate": 0.00013246994692046836,
	"loss": 0.9763,
	"step": 859
	},
	{
	"epoch": 0.4141335099018841,
	"grad_norm": 3.1512913703918457,
	"learning_rate": 0.000132322371603305,
	"loss": 0.8637,
	"step": 860
	},
	{
	"epoch": 0.41461506049479324,
	"grad_norm": 5.3608622550964355,
	"learning_rate": 0.0001321747176388188,
	"loss": 0.4573,
	"step": 861
	},
	{
	"epoch": 0.4150966110877024,
	"grad_norm": 1.7726064920425415,
	"learning_rate": 0.00013202698538628376,
	"loss": 1.0072,
	"step": 862
	},
	{
	"epoch": 0.41557816168061157,
	"grad_norm": 1.9994593858718872,
	"learning_rate": 0.00013187917520516448,
	"loss": 0.6097,
	"step": 863
	},
	{
	"epoch": 0.41605971227352073,
	"grad_norm": 2.665196418762207,
	"learning_rate": 0.00013173128745511508,
	"loss": 0.8823,
	"step": 864
	},
	{
	"epoch": 0.4165412628664299,
	"grad_norm": 1.9170902967453003,
	"learning_rate": 0.0001315833224959784,
	"loss": 0.7834,
	"step": 865
	},
	{
	"epoch": 0.41702281345933906,
	"grad_norm": 1.7230511903762817,
	"learning_rate": 0.00013143528068778525,
	"loss": 1.2682,
	"step": 866
	},
	{
	"epoch": 0.4175043640522482,
	"grad_norm": 1.5421873331069946,
	"learning_rate": 0.00013128716239075338,
	"loss": 0.4533,
	"step": 867
	},
	{
	"epoch": 0.4179859146451574,
	"grad_norm": 2.3257434368133545,
	"learning_rate": 0.00013113896796528664,
	"loss": 0.7117,
	"step": 868
	},
	{
	"epoch": 0.41846746523806655,
	"grad_norm": 2.053032398223877,
	"learning_rate": 0.00013099069777197412,
	"loss": 0.8121,
	"step": 869
	},
	{
	"epoch": 0.41894901583097577,
	"grad_norm": 5.06425142288208,
	"learning_rate": 0.0001308423521715893,
	"loss": 0.7408,
	"step": 870
	},
	{
	"epoch": 0.41943056642388493,
	"grad_norm": 1.0767431259155273,
	"learning_rate": 0.00013069393152508906,
	"loss": 0.6687,
	"step": 871
	},
	{
	"epoch": 0.4199121170167941,
	"grad_norm": 1.6607890129089355,
	"learning_rate": 0.00013054543619361303,
	"loss": 0.6322,
	"step": 872
	},
	{
	"epoch": 0.42039366760970326,
	"grad_norm": 1.7440071105957031,
	"learning_rate": 0.0001303968665384824,
	"loss": 0.889,
	"step": 873
	},
	{
	"epoch": 0.4208752182026124,
	"grad_norm": 1.4886302947998047,
	"learning_rate": 0.00013024822292119934,
	"loss": 0.7009,
	"step": 874
	},
	{
	"epoch": 0.4213567687955216,
	"grad_norm": 3.8570821285247803,
	"learning_rate": 0.0001300995057034459,
	"loss": 0.7772,
	"step": 875
	},
	{
	"epoch": 0.42183831938843075,
	"grad_norm": 2.4680871963500977,
	"learning_rate": 0.00012995071524708325,
	"loss": 0.7877,
	"step": 876
	},
	{
	"epoch": 0.4223198699813399,
	"grad_norm": 2.5244038105010986,
	"learning_rate": 0.00012980185191415074,
	"loss": 0.5928,
	"step": 877
	},
	{
	"epoch": 0.4228014205742491,
	"grad_norm": 1.7207748889923096,
	"learning_rate": 0.0001296529160668651,
	"loss": 0.7075,
	"step": 878
	},
	{
	"epoch": 0.42328297116715824,
	"grad_norm": 1.7171252965927124,
	"learning_rate": 0.00012950390806761944,
	"loss": 0.8689,
	"step": 879
	},
	{
	"epoch": 0.4237645217600674,
	"grad_norm": 1.759418249130249,
	"learning_rate": 0.0001293548282789825,
	"loss": 0.4545,
	"step": 880
	},
	{
	"epoch": 0.42424607235297657,
	"grad_norm": 1.7909297943115234,
	"learning_rate": 0.00012920567706369758,
	"loss": 1.3034,
	"step": 881
	},
	{
	"epoch": 0.42472762294588573,
	"grad_norm": 1.4877880811691284,
	"learning_rate": 0.00012905645478468192,
	"loss": 0.3629,
	"step": 882
	},
	{
	"epoch": 0.4252091735387949,
	"grad_norm": 3.6452713012695312,
	"learning_rate": 0.00012890716180502564,
	"loss": 0.6314,
	"step": 883
	},
	{
	"epoch": 0.4256907241317041,
	"grad_norm": 2.424837589263916,
	"learning_rate": 0.00012875779848799078,
	"loss": 0.9437,
	"step": 884
	},
	{
	"epoch": 0.4261722747246133,
	"grad_norm": 2.7232539653778076,
	"learning_rate": 0.00012860836519701063,
	"loss": 0.9839,
	"step": 885
	},
	{
	"epoch": 0.42665382531752244,
	"grad_norm": 1.2569257020950317,
	"learning_rate": 0.00012845886229568873,
	"loss": 0.8196,
	"step": 886
	},
	{
	"epoch": 0.4271353759104316,
	"grad_norm": 2.5531201362609863,
	"learning_rate": 0.00012830929014779797,
	"loss": 0.8545,
	"step": 887
	},
	{
	"epoch": 0.42761692650334077,
	"grad_norm": 6.38144588470459,
	"learning_rate": 0.0001281596491172797,
	"loss": 0.5451,
	"step": 888
	},
	{
	"epoch": 0.42809847709624993,
	"grad_norm": 1.4901047945022583,
	"learning_rate": 0.00012800993956824303,
	"loss": 0.9357,
	"step": 889
	},
	{
	"epoch": 0.4285800276891591,
	"grad_norm": 2.5083794593811035,
	"learning_rate": 0.00012786016186496358,
	"loss": 0.9034,
	"step": 890
	},
	{
	"epoch": 0.42906157828206826,
	"grad_norm": 2.4690587520599365,
	"learning_rate": 0.000127710316371883,
	"loss": 0.7518,
	"step": 891
	},
	{
	"epoch": 0.4295431288749774,
	"grad_norm": 1.5724667310714722,
	"learning_rate": 0.0001275604034536077,
	"loss": 0.4883,
	"step": 892
	},
	{
	"epoch": 0.4300246794678866,
	"grad_norm": 1.8662641048431396,
	"learning_rate": 0.0001274104234749083,
	"loss": 0.6713,
	"step": 893
	},
	{
	"epoch": 0.43050623006079575,
	"grad_norm": 2.4959278106689453,
	"learning_rate": 0.00012726037680071853,
	"loss": 0.6975,
	"step": 894
	},
	{
	"epoch": 0.4309877806537049,
	"grad_norm": 2.854491710662842,
	"learning_rate": 0.00012711026379613434,
	"loss": 0.5982,
	"step": 895
	},
	{
	"epoch": 0.4314693312466141,
	"grad_norm": 2.513932704925537,
	"learning_rate": 0.00012696008482641325,
	"loss": 0.6691,
	"step": 896
	},
	{
	"epoch": 0.43195088183952324,
	"grad_norm": 2.1765429973602295,
	"learning_rate": 0.00012680984025697313,
	"loss": 0.4283,
	"step": 897
	},
	{
	"epoch": 0.43243243243243246,
	"grad_norm": 3.172271966934204,
	"learning_rate": 0.00012665953045339152,
	"loss": 1.1573,
	"step": 898
	},
	{
	"epoch": 0.4329139830253416,
	"grad_norm": 1.5411570072174072,
	"learning_rate": 0.0001265091557814047,
	"loss": 0.603,
	"step": 899
	},
	{
	"epoch": 0.4333955336182508,
	"grad_norm": 4.4379496574401855,
	"learning_rate": 0.00012635871660690676,
	"loss": 0.4462,
	"step": 900
	},
	{
	"epoch": 0.43387708421115995,
	"grad_norm": 4.627134799957275,
	"learning_rate": 0.0001262082132959488,
	"loss": 0.7033,
	"step": 901
	},
	{
	"epoch": 0.4343586348040691,
	"grad_norm": 5.416947364807129,
	"learning_rate": 0.00012605764621473792,
	"loss": 1.0499,
	"step": 902
	},
	{
	"epoch": 0.4348401853969783,
	"grad_norm": 2.7960314750671387,
	"learning_rate": 0.00012590701572963642,
	"loss": 1.0619,
	"step": 903
	},
	{
	"epoch": 0.43532173598988744,
	"grad_norm": 4.81326961517334,
	"learning_rate": 0.00012575632220716078,
	"loss": 0.925,
	"step": 904
	},
	{
	"epoch": 0.4358032865827966,
	"grad_norm": 1.967151403427124,
	"learning_rate": 0.000125605566013981,
	"loss": 0.8593,
	"step": 905
	},
	{
	"epoch": 0.43628483717570576,
	"grad_norm": 3.3491618633270264,
	"learning_rate": 0.00012545474751691953,
	"loss": 0.884,
	"step": 906
	},
	{
	"epoch": 0.4367663877686149,
	"grad_norm": 1.8660701513290405,
	"learning_rate": 0.00012530386708295036,
	"loss": 0.7831,
	"step": 907
	},
	{
	"epoch": 0.4372479383615241,
	"grad_norm": 2.808755874633789,
	"learning_rate": 0.00012515292507919829,
	"loss": 0.8822,
	"step": 908
	},
	{
	"epoch": 0.43772948895443325,
	"grad_norm": 1.8173489570617676,
	"learning_rate": 0.0001250019218729378,
	"loss": 0.6888,
	"step": 909
	},
	{
	"epoch": 0.4382110395473424,
	"grad_norm": 2.033569097518921,
	"learning_rate": 0.00012485085783159238,
	"loss": 0.3951,
	"step": 910
	},
	{
	"epoch": 0.43869259014025164,
	"grad_norm": 2.698009729385376,
	"learning_rate": 0.00012469973332273354,
	"loss": 0.5259,
	"step": 911
	},
	{
	"epoch": 0.4391741407331608,
	"grad_norm": 1.5774399042129517,
	"learning_rate": 0.00012454854871407994,
	"loss": 0.4619,
	"step": 912
	},
	{
	"epoch": 0.43965569132606996,
	"grad_norm": 1.6870514154434204,
	"learning_rate": 0.00012439730437349635,
	"loss": 0.4305,
	"step": 913
	},
	{
	"epoch": 0.4401372419189791,
	"grad_norm": 1.871408224105835,
	"learning_rate": 0.00012424600066899302,
	"loss": 0.6,
	"step": 914
	},
	{
	"epoch": 0.4406187925118883,
	"grad_norm": 2.036029577255249,
	"learning_rate": 0.00012409463796872464,
	"loss": 1.0179,
	"step": 915
	},
	{
	"epoch": 0.44110034310479745,
	"grad_norm": 1.8108471632003784,
	"learning_rate": 0.0001239432166409893,
	"loss": 0.8426,
	"step": 916
	},
	{
	"epoch": 0.4415818936977066,
	"grad_norm": 3.0200490951538086,
	"learning_rate": 0.00012379173705422795,
	"loss": 0.9675,
	"step": 917
	},
	{
	"epoch": 0.4420634442906158,
	"grad_norm": 2.3675003051757812,
	"learning_rate": 0.00012364019957702315,
	"loss": 0.6689,
	"step": 918
	},
	{
	"epoch": 0.44254499488352494,
	"grad_norm": 3.1368930339813232,
	"learning_rate": 0.00012348860457809838,
	"loss": 0.8873,
	"step": 919
	},
	{
	"epoch": 0.4430265454764341,
	"grad_norm": 1.4245840311050415,
	"learning_rate": 0.00012333695242631705,
	"loss": 0.8424,
	"step": 920
	},
	{
	"epoch": 0.44350809606934327,
	"grad_norm": 3.4238197803497314,
	"learning_rate": 0.0001231852434906817,
	"loss": 1.2067,
	"step": 921
	},
	{
	"epoch": 0.44398964666225244,
	"grad_norm": 2.398881196975708,
	"learning_rate": 0.00012303347814033292,
	"loss": 0.7952,
	"step": 922
	},
	{
	"epoch": 0.4444711972551616,
	"grad_norm": 2.153909206390381,
	"learning_rate": 0.0001228816567445487,
	"loss": 0.9368,
	"step": 923
	},
	{
	"epoch": 0.44495274784807076,
	"grad_norm": 2.978900194168091,
	"learning_rate": 0.0001227297796727433,
	"loss": 0.6853,
	"step": 924
	},
	{
	"epoch": 0.44543429844098,
	"grad_norm": 1.9314982891082764,
	"learning_rate": 0.00012257784729446656,
	"loss": 0.7932,
	"step": 925
	},
	{
	"epoch": 0.44591584903388914,
	"grad_norm": 2.3480403423309326,
	"learning_rate": 0.00012242585997940275,
	"loss": 0.7998,
	"step": 926
	},
	{
	"epoch": 0.4463973996267983,
	"grad_norm": 4.056048393249512,
	"learning_rate": 0.0001222738180973699,
	"loss": 0.6552,
	"step": 927
	},
	{
	"epoch": 0.44687895021970747,
	"grad_norm": 3.2654383182525635,
	"learning_rate": 0.00012212172201831885,
	"loss": 0.6561,
	"step": 928
	},
	{
	"epoch": 0.44736050081261663,
	"grad_norm": 3.2897989749908447,
	"learning_rate": 0.00012196957211233222,
	"loss": 1.0814,
	"step": 929
	},
	{
	"epoch": 0.4478420514055258,
	"grad_norm": 1.9905271530151367,
	"learning_rate": 0.00012181736874962371,
	"loss": 1.0158,
	"step": 930
	},
	{
	"epoch": 0.44832360199843496,
	"grad_norm": 2.3979315757751465,
	"learning_rate": 0.00012166511230053696,
	"loss": 1.0173,
	"step": 931
	},
	{
	"epoch": 0.4488051525913441,
	"grad_norm": 2.0289697647094727,
	"learning_rate": 0.00012151280313554486,
	"loss": 0.9401,
	"step": 932
	},
	{
	"epoch": 0.4492867031842533,
	"grad_norm": 2.8876144886016846,
	"learning_rate": 0.00012136044162524858,
	"loss": 0.8686,
	"step": 933
	},
	{
	"epoch": 0.44976825377716245,
	"grad_norm": 3.0945162773132324,
	"learning_rate": 0.00012120802814037663,
	"loss": 0.3943,
	"step": 934
	},
	{
	"epoch": 0.4502498043700716,
	"grad_norm": 2.6051065921783447,
	"learning_rate": 0.00012105556305178399,
	"loss": 0.5688,
	"step": 935
	},
	{
	"epoch": 0.4507313549629808,
	"grad_norm": 3.5704925060272217,
	"learning_rate": 0.00012090304673045123,
	"loss": 0.627,
	"step": 936
	},
	{
	"epoch": 0.45121290555588994,
	"grad_norm": 4.448431491851807,
	"learning_rate": 0.00012075047954748353,
	"loss": 1.0867,
	"step": 937
	},
	{
	"epoch": 0.4516944561487991,
	"grad_norm": 3.0634100437164307,
	"learning_rate": 0.00012059786187410984,
	"loss": 0.7893,
	"step": 938
	},
	{
	"epoch": 0.4521760067417083,
	"grad_norm": 2.6939537525177,
	"learning_rate": 0.000120445194081682,
	"loss": 0.4777,
	"step": 939
	},
	{
	"epoch": 0.4526575573346175,
	"grad_norm": 1.9854243993759155,
	"learning_rate": 0.00012029247654167379,
	"loss": 0.4645,
	"step": 940
	},
	{
	"epoch": 0.45313910792752665,
	"grad_norm": 2.8124136924743652,
	"learning_rate": 0.00012013970962568002,
	"loss": 0.6528,
	"step": 941
	},
	{
	"epoch": 0.4536206585204358,
	"grad_norm": 2.101633310317993,
	"learning_rate": 0.00011998689370541562,
	"loss": 0.72,
	"step": 942
	},
	{
	"epoch": 0.454102209113345,
	"grad_norm": 2.0931015014648438,
	"learning_rate": 0.00011983402915271478,
	"loss": 0.6483,
	"step": 943
	},
	{
	"epoch": 0.45458375970625414,
	"grad_norm": 3.0055315494537354,
	"learning_rate": 0.00011968111633953007,
	"loss": 0.9383,
	"step": 944
	},
	{
	"epoch": 0.4550653102991633,
	"grad_norm": 2.681931734085083,
	"learning_rate": 0.0001195281556379314,
	"loss": 0.7556,
	"step": 945
	},
	{
	"epoch": 0.45554686089207247,
	"grad_norm": 7.055514335632324,
	"learning_rate": 0.0001193751474201053,
	"loss": 0.4541,
	"step": 946
	},
	{
	"epoch": 0.45602841148498163,
	"grad_norm": 2.157724618911743,
	"learning_rate": 0.00011922209205835382,
	"loss": 0.5006,
	"step": 947
	},
	{
	"epoch": 0.4565099620778908,
	"grad_norm": 1.7895299196243286,
	"learning_rate": 0.0001190689899250938,
	"loss": 0.8087,
	"step": 948
	},
	{
	"epoch": 0.45699151267079996,
	"grad_norm": 2.361053228378296,
	"learning_rate": 0.00011891584139285582,
	"loss": 1.002,
	"step": 949
	},
	{
	"epoch": 0.4574730632637091,
	"grad_norm": 2.3107917308807373,
	"learning_rate": 0.00011876264683428344,
	"loss": 0.5038,
	"step": 950
	},
	{
	"epoch": 0.4579546138566183,
	"grad_norm": 2.193125009536743,
	"learning_rate": 0.00011860940662213211,
	"loss": 0.5385,
	"step": 951
	},
	{
	"epoch": 0.4584361644495275,
	"grad_norm": 2.572331190109253,
	"learning_rate": 0.00011845612112926843,
	"loss": 0.7153,
	"step": 952
	},
	{
	"epoch": 0.45891771504243667,
	"grad_norm": 4.683573246002197,
	"learning_rate": 0.00011830279072866921,
	"loss": 0.7073,
	"step": 953
	},
	{
	"epoch": 0.45939926563534583,
	"grad_norm": 2.2095165252685547,
	"learning_rate": 0.00011814941579342044,
	"loss": 0.706,
	"step": 954
	},
	{
	"epoch": 0.459880816228255,
	"grad_norm": 1.5698320865631104,
	"learning_rate": 0.00011799599669671654,
	"loss": 0.364,
	"step": 955
	},
	{
	"epoch": 0.46036236682116416,
	"grad_norm": 1.8080724477767944,
	"learning_rate": 0.00011784253381185937,
	"loss": 0.8959,
	"step": 956
	},
	{
	"epoch": 0.4608439174140733,
	"grad_norm": 1.1480696201324463,
	"learning_rate": 0.0001176890275122573,
	"loss": 0.522,
	"step": 957
	},
	{
	"epoch": 0.4613254680069825,
	"grad_norm": 2.714405059814453,
	"learning_rate": 0.0001175354781714244,
	"loss": 0.4145,
	"step": 958
	},
	{
	"epoch": 0.46180701859989165,
	"grad_norm": 3.001786470413208,
	"learning_rate": 0.0001173818861629794,
	"loss": 0.9095,
	"step": 959
	},
	{
	"epoch": 0.4622885691928008,
	"grad_norm": 1.477941870689392,
	"learning_rate": 0.00011722825186064494,
	"loss": 0.3998,
	"step": 960
	},
	{
	"epoch": 0.46277011978571,
	"grad_norm": 2.0230369567871094,
	"learning_rate": 0.00011707457563824646,
	"loss": 0.7196,
	"step": 961
	},
	{
	"epoch": 0.46325167037861914,
	"grad_norm": 2.4260199069976807,
	"learning_rate": 0.00011692085786971149,
	"loss": 0.5469,
	"step": 962
	},
	{
	"epoch": 0.4637332209715283,
	"grad_norm": 2.5478789806365967,
	"learning_rate": 0.00011676709892906858,
	"loss": 0.4603,
	"step": 963
	},
	{
	"epoch": 0.46421477156443747,
	"grad_norm": 2.9565882682800293,
	"learning_rate": 0.00011661329919044656,
	"loss": 0.8948,
	"step": 964
	},
	{
	"epoch": 0.46469632215734663,
	"grad_norm": 2.064451217651367,
	"learning_rate": 0.00011645945902807341,
	"loss": 0.4803,
	"step": 965
	},
	{
	"epoch": 0.46517787275025585,
	"grad_norm": 2.4505929946899414,
	"learning_rate": 0.00011630557881627553,
	"loss": 0.6063,
	"step": 966
	},
	{
	"epoch": 0.465659423343165,
	"grad_norm": 2.06998348236084,
	"learning_rate": 0.0001161516589294768,
	"loss": 1.0691,
	"step": 967
	},
	{
	"epoch": 0.4661409739360742,
	"grad_norm": 2.934462785720825,
	"learning_rate": 0.00011599769974219757,
	"loss": 0.5514,
	"step": 968
	},
	{
	"epoch": 0.46662252452898334,
	"grad_norm": 3.6781392097473145,
	"learning_rate": 0.0001158437016290539,
	"loss": 0.7883,
	"step": 969
	},
	{
	"epoch": 0.4671040751218925,
	"grad_norm": 5.928903102874756,
	"learning_rate": 0.00011568966496475649,
	"loss": 0.7908,
	"step": 970
	},
	{
	"epoch": 0.46758562571480167,
	"grad_norm": 4.225213050842285,
	"learning_rate": 0.00011553559012410984,
	"loss": 0.642,
	"step": 971
	},
	{
	"epoch": 0.46806717630771083,
	"grad_norm": 4.230667591094971,
	"learning_rate": 0.00011538147748201138,
	"loss": 1.0245,
	"step": 972
	},
	{
	"epoch": 0.46854872690062,
	"grad_norm": 2.045747995376587,
	"learning_rate": 0.00011522732741345053,
	"loss": 0.8693,
	"step": 973
	},
	{
	"epoch": 0.46903027749352916,
	"grad_norm": 2.93965744972229,
	"learning_rate": 0.00011507314029350776,
	"loss": 0.7032,
	"step": 974
	},
	{
	"epoch": 0.4695118280864383,
	"grad_norm": 2.0694057941436768,
	"learning_rate": 0.00011491891649735366,
	"loss": 0.7536,
	"step": 975
	},
	{
	"epoch": 0.4699933786793475,
	"grad_norm": 2.6590757369995117,
	"learning_rate": 0.00011476465640024814,
	"loss": 0.8599,
	"step": 976
	},
	{
	"epoch": 0.47047492927225665,
	"grad_norm": 2.7925920486450195,
	"learning_rate": 0.00011461036037753934,
	"loss": 0.4626,
	"step": 977
	},
	{
	"epoch": 0.4709564798651658,
	"grad_norm": 1.8391474485397339,
	"learning_rate": 0.00011445602880466288,
	"loss": 0.5219,
	"step": 978
	},
	{
	"epoch": 0.471438030458075,
	"grad_norm": 1.3266628980636597,
	"learning_rate": 0.00011430166205714088,
	"loss": 0.6874,
	"step": 979
	},
	{
	"epoch": 0.4719195810509842,
	"grad_norm": 2.141636848449707,
	"learning_rate": 0.00011414726051058102,
	"loss": 0.6873,
	"step": 980
	},
	{
	"epoch": 0.47240113164389336,
	"grad_norm": 2.55141019821167,
	"learning_rate": 0.0001139928245406757,
	"loss": 0.6919,
	"step": 981
	},
	{
	"epoch": 0.4728826822368025,
	"grad_norm": 1.6124935150146484,
	"learning_rate": 0.00011383835452320097,
	"loss": 0.982,
	"step": 982
	},
	{
	"epoch": 0.4733642328297117,
	"grad_norm": 2.0726158618927,
	"learning_rate": 0.00011368385083401585,
	"loss": 1.022,
	"step": 983
	},
	{
	"epoch": 0.47384578342262085,
	"grad_norm": 3.476106882095337,
	"learning_rate": 0.00011352931384906125,
	"loss": 0.5655,
	"step": 984
	},
	{
	"epoch": 0.47432733401553,
	"grad_norm": 2.5833818912506104,
	"learning_rate": 0.00011337474394435908,
	"loss": 0.7119,
	"step": 985
	},
	{
	"epoch": 0.4748088846084392,
	"grad_norm": 1.4606103897094727,
	"learning_rate": 0.00011322014149601136,
	"loss": 0.6343,
	"step": 986
	},
	{
	"epoch": 0.47529043520134834,
	"grad_norm": 3.8119499683380127,
	"learning_rate": 0.00011306550688019926,
	"loss": 0.7238,
	"step": 987
	},
	{
	"epoch": 0.4757719857942575,
	"grad_norm": 2.314828872680664,
	"learning_rate": 0.0001129108404731823,
	"loss": 0.7181,
	"step": 988
	},
	{
	"epoch": 0.47625353638716666,
	"grad_norm": 1.842475175857544,
	"learning_rate": 0.0001127561426512973,
	"loss": 0.7928,
	"step": 989
	},
	{
	"epoch": 0.47673508698007583,
	"grad_norm": 2.3919920921325684,
	"learning_rate": 0.0001126014137909575,
	"loss": 0.6528,
	"step": 990
	},
	{
	"epoch": 0.477216637572985,
	"grad_norm": 1.3240762948989868,
	"learning_rate": 0.00011244665426865174,
	"loss": 0.543,
	"step": 991
	},
	{
	"epoch": 0.47769818816589416,
	"grad_norm": 2.4185304641723633,
	"learning_rate": 0.00011229186446094338,
	"loss": 0.5988,
	"step": 992
	},
	{
	"epoch": 0.4781797387588034,
	"grad_norm": 1.5180091857910156,
	"learning_rate": 0.00011213704474446951,
	"loss": 0.8106,
	"step": 993
	},
	{
	"epoch": 0.47866128935171254,
	"grad_norm": 3.8423268795013428,
	"learning_rate": 0.00011198219549594,
	"loss": 0.7134,
	"step": 994
	},
	{
	"epoch": 0.4791428399446217,
	"grad_norm": 4.835480213165283,
	"learning_rate": 0.00011182731709213659,
	"loss": 0.5784,
	"step": 995
	},
	{
	"epoch": 0.47962439053753086,
	"grad_norm": 4.3305511474609375,
	"learning_rate": 0.00011167240990991192,
	"loss": 0.6444,
	"step": 996
	},
	{
	"epoch": 0.48010594113044003,
	"grad_norm": 1.8703162670135498,
	"learning_rate": 0.00011151747432618871,
	"loss": 0.6062,
	"step": 997
	},
	{
	"epoch": 0.4805874917233492,
	"grad_norm": 3.5100109577178955,
	"learning_rate": 0.00011136251071795871,
	"loss": 0.5488,
	"step": 998
	},
	{
	"epoch": 0.48106904231625836,
	"grad_norm": 2.39043927192688,
	"learning_rate": 0.00011120751946228197,
	"loss": 0.7438,
	"step": 999
	},
	{
	"epoch": 0.4815505929091675,
	"grad_norm": 3.630051612854004,
	"learning_rate": 0.00011105250093628565,
	"loss": 0.8574,
	"step": 1000
	},
	{
	"epoch": 0.4820321435020767,
	"grad_norm": 2.519382953643799,
	"learning_rate": 0.00011089745551716344,
	"loss": 0.8414,
	"step": 1001
	},
	{
	"epoch": 0.48251369409498585,
	"grad_norm": 2.2886648178100586,
	"learning_rate": 0.00011074238358217437,
	"loss": 0.9677,
	"step": 1002
	},
	{
	"epoch": 0.482995244687895,
	"grad_norm": 1.639683723449707,
	"learning_rate": 0.00011058728550864197,
	"loss": 0.4151,
	"step": 1003
	},
	{
	"epoch": 0.4834767952808042,
	"grad_norm": 2.246243715286255,
	"learning_rate": 0.00011043216167395344,
	"loss": 0.8334,
	"step": 1004
	},
	{
	"epoch": 0.48395834587371334,
	"grad_norm": 4.043992519378662,
	"learning_rate": 0.00011027701245555865,
	"loss": 1.1405,
	"step": 1005
	},
	{
	"epoch": 0.4844398964666225,
	"grad_norm": 5.069822788238525,
	"learning_rate": 0.00011012183823096917,
	"loss": 0.7535,
	"step": 1006
	},
	{
	"epoch": 0.4849214470595317,
	"grad_norm": 1.6714848279953003,
	"learning_rate": 0.00010996663937775751,
	"loss": 0.4408,
	"step": 1007
	},
	{
	"epoch": 0.4854029976524409,
	"grad_norm": 1.6782582998275757,
	"learning_rate": 0.000109811416273556,
	"loss": 1.055,
	"step": 1008
	},
	{
	"epoch": 0.48588454824535005,
	"grad_norm": 2.3483331203460693,
	"learning_rate": 0.00010965616929605609,
	"loss": 0.7248,
	"step": 1009
	},
	{
	"epoch": 0.4863660988382592,
	"grad_norm": 2.866668701171875,
	"learning_rate": 0.0001095008988230072,
	"loss": 0.8629,
	"step": 1010
	},
	{
	"epoch": 0.48684764943116837,
	"grad_norm": 3.7616584300994873,
	"learning_rate": 0.00010934560523221602,
	"loss": 0.952,
	"step": 1011
	},
	{
	"epoch": 0.48732920002407754,
	"grad_norm": 5.05987548828125,
	"learning_rate": 0.00010919028890154543,
	"loss": 0.7482,
	"step": 1012
	},
	{
	"epoch": 0.4878107506169867,
	"grad_norm": 3.6084094047546387,
	"learning_rate": 0.00010903495020891375,
	"loss": 0.8013,
	"step": 1013
	},
	{
	"epoch": 0.48829230120989586,
	"grad_norm": 2.3544795513153076,
	"learning_rate": 0.00010887958953229349,
	"loss": 0.9513,
	"step": 1014
	},
	{
	"epoch": 0.488773851802805,
	"grad_norm": 4.078423500061035,
	"learning_rate": 0.00010872420724971088,
	"loss": 0.8901,
	"step": 1015
	},
	{
	"epoch": 0.4892554023957142,
	"grad_norm": 3.262572765350342,
	"learning_rate": 0.0001085688037392446,
	"loss": 0.7107,
	"step": 1016
	},
	{
	"epoch": 0.48973695298862335,
	"grad_norm": 3.4895589351654053,
	"learning_rate": 0.000108413379379025,
	"loss": 0.5975,
	"step": 1017
	},
	{
	"epoch": 0.4902185035815325,
	"grad_norm": 2.3548641204833984,
	"learning_rate": 0.00010825793454723325,
	"loss": 0.71,
	"step": 1018
	},
	{
	"epoch": 0.4907000541744417,
	"grad_norm": 2.5070619583129883,
	"learning_rate": 0.00010810246962210018,
	"loss": 0.8754,
	"step": 1019
	},
	{
	"epoch": 0.49118160476735084,
	"grad_norm": 2.6222572326660156,
	"learning_rate": 0.00010794698498190557,
	"loss": 0.7779,
	"step": 1020
	},
	{
	"epoch": 0.49166315536026006,
	"grad_norm": 2.8594443798065186,
	"learning_rate": 0.00010779148100497722,
	"loss": 0.5911,
	"step": 1021
	},
	{
	"epoch": 0.4921447059531692,
	"grad_norm": 3.380793333053589,
	"learning_rate": 0.00010763595806968996,
	"loss": 0.8463,
	"step": 1022
	},
	{
	"epoch": 0.4926262565460784,
	"grad_norm": 3.048558235168457,
	"learning_rate": 0.00010748041655446473,
	"loss": 1.1503,
	"step": 1023
	},
	{
	"epoch": 0.49310780713898755,
	"grad_norm": 3.162221670150757,
	"learning_rate": 0.00010732485683776768,
	"loss": 0.9634,
	"step": 1024
	},
	{
	"epoch": 0.4935893577318967,
	"grad_norm": 1.7662273645401,
	"learning_rate": 0.00010716927929810925,
	"loss": 0.9218,
	"step": 1025
	},
	{
	"epoch": 0.4940709083248059,
	"grad_norm": 6.701080322265625,
	"learning_rate": 0.00010701368431404326,
	"loss": 0.6088,
	"step": 1026
	},
	{
	"epoch": 0.49455245891771504,
	"grad_norm": 1.6572067737579346,
	"learning_rate": 0.00010685807226416598,
	"loss": 0.4006,
	"step": 1027
	},
	{
	"epoch": 0.4950340095106242,
	"grad_norm": 2.3362746238708496,
	"learning_rate": 0.00010670244352711518,
	"loss": 0.4711,
	"step": 1028
	},
	{
	"epoch": 0.49551556010353337,
	"grad_norm": 3.27119779586792,
	"learning_rate": 0.00010654679848156925,
	"loss": 0.5751,
	"step": 1029
	},
	{
	"epoch": 0.49599711069644253,
	"grad_norm": 2.6703665256500244,
	"learning_rate": 0.00010639113750624625,
	"loss": 0.3203,
	"step": 1030
	},
	{
	"epoch": 0.4964786612893517,
	"grad_norm": 2.749845027923584,
	"learning_rate": 0.00010623546097990303,
	"loss": 0.7552,
	"step": 1031
	},
	{
	"epoch": 0.49696021188226086,
	"grad_norm": 1.693564772605896,
	"learning_rate": 0.00010607976928133423,
	"loss": 0.3451,
	"step": 1032
	},
	{
	"epoch": 0.49744176247517,
	"grad_norm": 2.492354154586792,
	"learning_rate": 0.00010592406278937144,
	"loss": 0.6278,
	"step": 1033
	},
	{
	"epoch": 0.4979233130680792,
	"grad_norm": 3.982508897781372,
	"learning_rate": 0.00010576834188288226,
	"loss": 0.9494,
	"step": 1034
	},
	{
	"epoch": 0.4984048636609884,
	"grad_norm": 3.6745517253875732,
	"learning_rate": 0.00010561260694076935,
	"loss": 0.8115,
	"step": 1035
	},
	{
	"epoch": 0.49888641425389757,
	"grad_norm": 1.9711278676986694,
	"learning_rate": 0.00010545685834196948,
	"loss": 0.7224,
	"step": 1036
	},
	{
	"epoch": 0.49936796484680673,
	"grad_norm": 3.948199510574341,
	"learning_rate": 0.00010530109646545272,
	"loss": 0.7509,
	"step": 1037
	},
	{
	"epoch": 0.4998495154397159,
	"grad_norm": 4.0536041259765625,
	"learning_rate": 0.0001051453216902214,
	"loss": 0.8095,
	"step": 1038
	},
	{
	"epoch": 0.500331066032625,
	"grad_norm": 3.6049885749816895,
	"learning_rate": 0.00010498953439530925,
	"loss": 0.8699,
	"step": 1039
	},
	{
	"epoch": 0.5008126166255342,
	"grad_norm": 1.3765301704406738,
	"learning_rate": 0.00010483373495978046,
	"loss": 0.6613,
	"step": 1040
	},
	{
	"epoch": 0.5012941672184434,
	"grad_norm": 1.5237274169921875,
	"learning_rate": 0.00010467792376272877,
	"loss": 0.8436,
	"step": 1041
	},
	{
	"epoch": 0.5017757178113526,
	"grad_norm": 2.1992526054382324,
	"learning_rate": 0.00010452210118327652,
	"loss": 0.429,
	"step": 1042
	},
	{
	"epoch": 0.5022572684042618,
	"grad_norm": 4.125129222869873,
	"learning_rate": 0.00010436626760057378,
	"loss": 0.7708,
	"step": 1043
	},
	{
	"epoch": 0.5027388189971709,
	"grad_norm": 2.204009771347046,
	"learning_rate": 0.00010421042339379732,
	"loss": 0.5653,
	"step": 1044
	},
	{
	"epoch": 0.5032203695900801,
	"grad_norm": 4.470865726470947,
	"learning_rate": 0.00010405456894214987,
	"loss": 0.7858,
	"step": 1045
	},
	{
	"epoch": 0.5037019201829892,
	"grad_norm": 1.1038165092468262,
	"learning_rate": 0.00010389870462485902,
	"loss": 1.4328,
	"step": 1046
	},
	{
	"epoch": 0.5041834707758984,
	"grad_norm": 1.9314682483673096,
	"learning_rate": 0.00010374283082117635,
	"loss": 0.3706,
	"step": 1047
	},
	{
	"epoch": 0.5046650213688075,
	"grad_norm": 2.6393468379974365,
	"learning_rate": 0.00010358694791037653,
	"loss": 1.1257,
	"step": 1048
	},
	{
	"epoch": 0.5051465719617168,
	"grad_norm": 3.338649034500122,
	"learning_rate": 0.00010343105627175644,
	"loss": 0.8054,
	"step": 1049
	},
	{
	"epoch": 0.5056281225546259,
	"grad_norm": 1.6628873348236084,
	"learning_rate": 0.00010327515628463415,
	"loss": 0.4518,
	"step": 1050
	},
	{
	"epoch": 0.5061096731475351,
	"grad_norm": 3.3070363998413086,
	"learning_rate": 0.00010311924832834808,
	"loss": 1.2035,
	"step": 1051
	},
	{
	"epoch": 0.5065912237404442,
	"grad_norm": 2.4879815578460693,
	"learning_rate": 0.00010296333278225599,
	"loss": 0.5938,
	"step": 1052
	},
	{
	"epoch": 0.5070727743333534,
	"grad_norm": 3.5677437782287598,
	"learning_rate": 0.00010280741002573413,
	"loss": 0.3152,
	"step": 1053
	},
	{
	"epoch": 0.5075543249262625,
	"grad_norm": 2.475534200668335,
	"learning_rate": 0.00010265148043817632,
	"loss": 0.789,
	"step": 1054
	},
	{
	"epoch": 0.5080358755191717,
	"grad_norm": 3.422375202178955,
	"learning_rate": 0.00010249554439899298,
	"loss": 0.8623,
	"step": 1055
	},
	{
	"epoch": 0.508517426112081,
	"grad_norm": 1.3005175590515137,
	"learning_rate": 0.00010233960228761022,
	"loss": 0.6675,
	"step": 1056
	},
	{
	"epoch": 0.5089989767049901,
	"grad_norm": 1.275846004486084,
	"learning_rate": 0.00010218365448346893,
	"loss": 0.7612,
	"step": 1057
	},
	{
	"epoch": 0.5094805272978993,
	"grad_norm": 3.3997249603271484,
	"learning_rate": 0.00010202770136602388,
	"loss": 0.839,
	"step": 1058
	},
	{
	"epoch": 0.5099620778908084,
	"grad_norm": 1.7658668756484985,
	"learning_rate": 0.00010187174331474271,
	"loss": 0.4518,
	"step": 1059
	},
	{
	"epoch": 0.5104436284837176,
	"grad_norm": 2.3334131240844727,
	"learning_rate": 0.00010171578070910512,
	"loss": 0.4001,
	"step": 1060
	},
	{
	"epoch": 0.5109251790766267,
	"grad_norm": 2.203070878982544,
	"learning_rate": 0.00010155981392860185,
	"loss": 0.8666,
	"step": 1061
	},
	{
	"epoch": 0.5114067296695359,
	"grad_norm": 1.5210875272750854,
	"learning_rate": 0.00010140384335273386,
	"loss": 0.8547,
	"step": 1062
	},
	{
	"epoch": 0.511888280262445,
	"grad_norm": 2.5150206089019775,
	"learning_rate": 0.00010124786936101127,
	"loss": 0.6131,
	"step": 1063
	},
	{
	"epoch": 0.5123698308553543,
	"grad_norm": 2.087355852127075,
	"learning_rate": 0.00010109189233295255,
	"loss": 0.7018,
	"step": 1064
	},
	{
	"epoch": 0.5128513814482634,
	"grad_norm": 3.2802398204803467,
	"learning_rate": 0.00010093591264808358,
	"loss": 0.6533,
	"step": 1065
	},
	{
	"epoch": 0.5133329320411726,
	"grad_norm": 2.5115907192230225,
	"learning_rate": 0.00010077993068593663,
	"loss": 0.8199,
	"step": 1066
	},
	{
	"epoch": 0.5138144826340817,
	"grad_norm": 3.236037015914917,
	"learning_rate": 0.00010062394682604963,
	"loss": 0.649,
	"step": 1067
	},
	{
	"epoch": 0.5142960332269909,
	"grad_norm": 2.0290400981903076,
	"learning_rate": 0.00010046796144796497,
	"loss": 0.5048,
	"step": 1068
	},
	{
	"epoch": 0.5147775838199001,
	"grad_norm": 2.3132944107055664,
	"learning_rate": 0.0001003119749312289,
	"loss": 0.8111,
	"step": 1069
	},
	{
	"epoch": 0.5152591344128092,
	"grad_norm": 3.8110101222991943,
	"learning_rate": 0.00010015598765539031,
	"loss": 0.831,
	"step": 1070
	},
	{
	"epoch": 0.5157406850057185,
	"grad_norm": 2.6409425735473633,
	"learning_rate": 0.0001,
	"loss": 0.5713,
	"step": 1071
	},
	{
	"epoch": 0.5162222355986276,
	"grad_norm": 2.540348768234253,
	"learning_rate": 9.984401234460971e-05,
	"loss": 0.896,
	"step": 1072
	},
	{
	"epoch": 0.5167037861915368,
	"grad_norm": 1.605286717414856,
	"learning_rate": 9.968802506877111e-05,
	"loss": 0.7826,
	"step": 1073
	},
	{
	"epoch": 0.5171853367844459,
	"grad_norm": 1.9852914810180664,
	"learning_rate": 9.953203855203504e-05,
	"loss": 0.484,
	"step": 1074
	},
	{
	"epoch": 0.5176668873773551,
	"grad_norm": 2.697453498840332,
	"learning_rate": 9.93760531739504e-05,
	"loss": 0.5764,
	"step": 1075
	},
	{
	"epoch": 0.5181484379702642,
	"grad_norm": 3.257183074951172,
	"learning_rate": 9.922006931406338e-05,
	"loss": 0.8809,
	"step": 1076
	},
	{
	"epoch": 0.5186299885631734,
	"grad_norm": 1.4868934154510498,
	"learning_rate": 9.906408735191643e-05,
	"loss": 0.3895,
	"step": 1077
	},
	{
	"epoch": 0.5191115391560825,
	"grad_norm": 2.801379919052124,
	"learning_rate": 9.890810766704745e-05,
	"loss": 0.6476,
	"step": 1078
	},
	{
	"epoch": 0.5195930897489918,
	"grad_norm": 5.3302321434021,
	"learning_rate": 9.875213063898875e-05,
	"loss": 0.7303,
	"step": 1079
	},
	{
	"epoch": 0.5200746403419009,
	"grad_norm": 8.286264419555664,
	"learning_rate": 9.859615664726615e-05,
	"loss": 0.8864,
	"step": 1080
	},
	{
	"epoch": 0.5205561909348101,
	"grad_norm": 2.5525264739990234,
	"learning_rate": 9.844018607139818e-05,
	"loss": 1.2073,
	"step": 1081
	},
	{
	"epoch": 0.5210377415277193,
	"grad_norm": 3.0127081871032715,
	"learning_rate": 9.828421929089493e-05,
	"loss": 0.7991,
	"step": 1082
	},
	{
	"epoch": 0.5215192921206284,
	"grad_norm": 3.983294725418091,
	"learning_rate": 9.812825668525733e-05,
	"loss": 0.821,
	"step": 1083
	},
	{
	"epoch": 0.5220008427135376,
	"grad_norm": 7.565732479095459,
	"learning_rate": 9.797229863397615e-05,
	"loss": 1.2835,
	"step": 1084
	},
	{
	"epoch": 0.5224823933064467,
	"grad_norm": 2.560930013656616,
	"learning_rate": 9.781634551653108e-05,
	"loss": 0.7872,
	"step": 1085
	},
	{
	"epoch": 0.522963943899356,
	"grad_norm": 3.7823336124420166,
	"learning_rate": 9.766039771238982e-05,
	"loss": 0.9539,
	"step": 1086
	},
	{
	"epoch": 0.5234454944922651,
	"grad_norm": 1.7851648330688477,
	"learning_rate": 9.750445560100706e-05,
	"loss": 0.7338,
	"step": 1087
	},
	{
	"epoch": 0.5239270450851743,
	"grad_norm": 2.3627817630767822,
	"learning_rate": 9.73485195618237e-05,
	"loss": 0.87,
	"step": 1088
	},
	{
	"epoch": 0.5244085956780834,
	"grad_norm": 3.479341745376587,
	"learning_rate": 9.719258997426588e-05,
	"loss": 0.872,
	"step": 1089
	},
	{
	"epoch": 0.5248901462709926,
	"grad_norm": 3.8782670497894287,
	"learning_rate": 9.703666721774402e-05,
	"loss": 0.4405,
	"step": 1090
	},
	{
	"epoch": 0.5253716968639017,
	"grad_norm": 1.5596513748168945,
	"learning_rate": 9.688075167165194e-05,
	"loss": 0.5061,
	"step": 1091
	},
	{
	"epoch": 0.525853247456811,
	"grad_norm": 2.221703290939331,
	"learning_rate": 9.672484371536586e-05,
	"loss": 0.4747,
	"step": 1092
	},
	{
	"epoch": 0.52633479804972,
	"grad_norm": 5.022744178771973,
	"learning_rate": 9.656894372824358e-05,
	"loss": 1.0149,
	"step": 1093
	},
	{
	"epoch": 0.5268163486426293,
	"grad_norm": 1.9501501321792603,
	"learning_rate": 9.64130520896235e-05,
	"loss": 0.7204,
	"step": 1094
	},
	{
	"epoch": 0.5272978992355384,
	"grad_norm": 1.2803010940551758,
	"learning_rate": 9.625716917882367e-05,
	"loss": 0.5088,
	"step": 1095
	},
	{
	"epoch": 0.5277794498284476,
	"grad_norm": 1.8832592964172363,
	"learning_rate": 9.6101295375141e-05,
	"loss": 0.921,
	"step": 1096
	},
	{
	"epoch": 0.5282610004213568,
	"grad_norm": 2.0987727642059326,
	"learning_rate": 9.594543105785013e-05,
	"loss": 0.8486,
	"step": 1097
	},
	{
	"epoch": 0.5287425510142659,
	"grad_norm": 3.9266583919525146,
	"learning_rate": 9.578957660620267e-05,
	"loss": 0.5983,
	"step": 1098
	},
	{
	"epoch": 0.5292241016071751,
	"grad_norm": 2.6706717014312744,
	"learning_rate": 9.563373239942623e-05,
	"loss": 0.617,
	"step": 1099
	},
	{
	"epoch": 0.5297056522000843,
	"grad_norm": 1.8419418334960938,
	"learning_rate": 9.547789881672348e-05,
	"loss": 0.4538,
	"step": 1100
	},
	{
	"epoch": 0.5301872027929935,
	"grad_norm": 1.9119439125061035,
	"learning_rate": 9.532207623727126e-05,
	"loss": 0.7275,
	"step": 1101
	},
	{
	"epoch": 0.5306687533859026,
	"grad_norm": 3.1396830081939697,
	"learning_rate": 9.516626504021957e-05,
	"loss": 0.6206,
	"step": 1102
	},
	{
	"epoch": 0.5311503039788118,
	"grad_norm": 2.5384531021118164,
	"learning_rate": 9.501046560469079e-05,
	"loss": 1.0057,
	"step": 1103
	},
	{
	"epoch": 0.5316318545717209,
	"grad_norm": 3.143725872039795,
	"learning_rate": 9.485467830977864e-05,
	"loss": 1.1685,
	"step": 1104
	},
	{
	"epoch": 0.5321134051646301,
	"grad_norm": 4.282426357269287,
	"learning_rate": 9.469890353454732e-05,
	"loss": 0.6259,
	"step": 1105
	},
	{
	"epoch": 0.5325949557575392,
	"grad_norm": 2.5603525638580322,
	"learning_rate": 9.454314165803054e-05,
	"loss": 0.6818,
	"step": 1106
	},
	{
	"epoch": 0.5330765063504485,
	"grad_norm": 2.4884443283081055,
	"learning_rate": 9.438739305923067e-05,
	"loss": 0.7338,
	"step": 1107
	},
	{
	"epoch": 0.5335580569433576,
	"grad_norm": 3.4453368186950684,
	"learning_rate": 9.423165811711777e-05,
	"loss": 0.8649,
	"step": 1108
	},
	{
	"epoch": 0.5340396075362668,
	"grad_norm": 2.136265516281128,
	"learning_rate": 9.407593721062859e-05,
	"loss": 0.5932,
	"step": 1109
	},
	{
	"epoch": 0.534521158129176,
	"grad_norm": 2.1778738498687744,
	"learning_rate": 9.39202307186658e-05,
	"loss": 0.3037,
	"step": 1110
	},
	{
	"epoch": 0.5350027087220851,
	"grad_norm": 1.9080350399017334,
	"learning_rate": 9.3764539020097e-05,
	"loss": 0.7092,
	"step": 1111
	},
	{
	"epoch": 0.5354842593149943,
	"grad_norm": 2.198676824569702,
	"learning_rate": 9.360886249375376e-05,
	"loss": 1.0817,
	"step": 1112
	},
	{
	"epoch": 0.5359658099079034,
	"grad_norm": 1.1177189350128174,
	"learning_rate": 9.345320151843078e-05,
	"loss": 0.5078,
	"step": 1113
	},
	{
	"epoch": 0.5364473605008127,
	"grad_norm": 3.8491134643554688,
	"learning_rate": 9.329755647288485e-05,
	"loss": 0.9873,
	"step": 1114
	},
	{
	"epoch": 0.5369289110937218,
	"grad_norm": 4.839039325714111,
	"learning_rate": 9.314192773583403e-05,
	"loss": 0.8585,
	"step": 1115
	},
	{
	"epoch": 0.537410461686631,
	"grad_norm": 3.628781795501709,
	"learning_rate": 9.298631568595674e-05,
	"loss": 0.9069,
	"step": 1116
	},
	{
	"epoch": 0.5378920122795401,
	"grad_norm": 1.936665415763855,
	"learning_rate": 9.283072070189075e-05,
	"loss": 0.6665,
	"step": 1117
	},
	{
	"epoch": 0.5383735628724493,
	"grad_norm": 1.807746171951294,
	"learning_rate": 9.267514316223234e-05,
	"loss": 0.7337,
	"step": 1118
	},
	{
	"epoch": 0.5388551134653584,
	"grad_norm": 2.178152322769165,
	"learning_rate": 9.251958344553528e-05,
	"loss": 0.701,
	"step": 1119
	},
	{
	"epoch": 0.5393366640582676,
	"grad_norm": 2.263169765472412,
	"learning_rate": 9.23640419303101e-05,
	"loss": 0.6482,
	"step": 1120
	},
	{
	"epoch": 0.5398182146511767,
	"grad_norm": 2.1158978939056396,
	"learning_rate": 9.220851899502283e-05,
	"loss": 0.9083,
	"step": 1121
	},
	{
	"epoch": 0.540299765244086,
	"grad_norm": 2.0844359397888184,
	"learning_rate": 9.205301501809448e-05,
	"loss": 0.9297,
	"step": 1122
	},
	{
	"epoch": 0.5407813158369951,
	"grad_norm": 1.7951438426971436,
	"learning_rate": 9.189753037789987e-05,
	"loss": 0.7921,
	"step": 1123
	},
	{
	"epoch": 0.5412628664299043,
	"grad_norm": 2.2726521492004395,
	"learning_rate": 9.174206545276677e-05,
	"loss": 0.8874,
	"step": 1124
	},
	{
	"epoch": 0.5417444170228135,
	"grad_norm": 3.393622875213623,
	"learning_rate": 9.158662062097501e-05,
	"loss": 1.0911,
	"step": 1125
	},
	{
	"epoch": 0.5422259676157226,
	"grad_norm": 1.4040405750274658,
	"learning_rate": 9.143119626075542e-05,
	"loss": 0.5292,
	"step": 1126
	},
	{
	"epoch": 0.5427075182086318,
	"grad_norm": 1.302949070930481,
	"learning_rate": 9.127579275028914e-05,
	"loss": 1.026,
	"step": 1127
	},
	{
	"epoch": 0.5431890688015409,
	"grad_norm": 2.1794188022613525,
	"learning_rate": 9.112041046770653e-05,
	"loss": 0.8072,
	"step": 1128
	},
	{
	"epoch": 0.5436706193944502,
	"grad_norm": 2.3347835540771484,
	"learning_rate": 9.096504979108629e-05,
	"loss": 0.8512,
	"step": 1129
	},
	{
	"epoch": 0.5441521699873593,
	"grad_norm": 2.353959083557129,
	"learning_rate": 9.080971109845458e-05,
	"loss": 0.9363,
	"step": 1130
	},
	{
	"epoch": 0.5446337205802685,
	"grad_norm": 2.1609857082366943,
	"learning_rate": 9.0654394767784e-05,
	"loss": 0.3391,
	"step": 1131
	},
	{
	"epoch": 0.5451152711731776,
	"grad_norm": 3.1173667907714844,
	"learning_rate": 9.049910117699281e-05,
	"loss": 0.5835,
	"step": 1132
	},
	{
	"epoch": 0.5455968217660868,
	"grad_norm": 3.2934017181396484,
	"learning_rate": 9.034383070394393e-05,
	"loss": 0.9396,
	"step": 1133
	},
	{
	"epoch": 0.5460783723589959,
	"grad_norm": 3.2750277519226074,
	"learning_rate": 9.0188583726444e-05,
	"loss": 0.8517,
	"step": 1134
	},
	{
	"epoch": 0.5465599229519051,
	"grad_norm": 0.8598925471305847,
	"learning_rate": 9.00333606222425e-05,
	"loss": 0.5358,
	"step": 1135
	},
	{
	"epoch": 0.5470414735448142,
	"grad_norm": 2.2244086265563965,
	"learning_rate": 8.987816176903082e-05,
	"loss": 0.3203,
	"step": 1136
	},
	{
	"epoch": 0.5475230241377235,
	"grad_norm": 2.2185556888580322,
	"learning_rate": 8.972298754444136e-05,
	"loss": 1.0547,
	"step": 1137
	},
	{
	"epoch": 0.5480045747306327,
	"grad_norm": 1.47505784034729,
	"learning_rate": 8.956783832604654e-05,
	"loss": 0.4243,
	"step": 1138
	},
	{
	"epoch": 0.5484861253235418,
	"grad_norm": 3.940340757369995,
	"learning_rate": 8.941271449135806e-05,
	"loss": 0.8955,
	"step": 1139
	},
	{
	"epoch": 0.548967675916451,
	"grad_norm": 2.3822591304779053,
	"learning_rate": 8.925761641782567e-05,
	"loss": 0.6393,
	"step": 1140
	},
	{
	"epoch": 0.5494492265093601,
	"grad_norm": 1.8161604404449463,
	"learning_rate": 8.910254448283659e-05,
	"loss": 0.4928,
	"step": 1141
	},
	{
	"epoch": 0.5499307771022693,
	"grad_norm": 3.2381865978240967,
	"learning_rate": 8.894749906371439e-05,
	"loss": 0.862,
	"step": 1142
	},
	{
	"epoch": 0.5504123276951784,
	"grad_norm": 2.2119295597076416,
	"learning_rate": 8.87924805377181e-05,
	"loss": 0.4778,
	"step": 1143
	},
	{
	"epoch": 0.5508938782880877,
	"grad_norm": 2.175503730773926,
	"learning_rate": 8.863748928204131e-05,
	"loss": 0.3811,
	"step": 1144
	},
	{
	"epoch": 0.5513754288809968,
	"grad_norm": 3.0040910243988037,
	"learning_rate": 8.848252567381131e-05,
	"loss": 0.5659,
	"step": 1145
	},
	{
	"epoch": 0.551856979473906,
	"grad_norm": 2.7777600288391113,
	"learning_rate": 8.83275900900881e-05,
	"loss": 0.5106,
	"step": 1146
	},
	{
	"epoch": 0.5523385300668151,
	"grad_norm": 4.878298282623291,
	"learning_rate": 8.817268290786343e-05,
	"loss": 0.5554,
	"step": 1147
	},
	{
	"epoch": 0.5528200806597243,
	"grad_norm": 5.596711158752441,
	"learning_rate": 8.801780450406002e-05,
	"loss": 0.4911,
	"step": 1148
	},
	{
	"epoch": 0.5533016312526334,
	"grad_norm": 3.1435718536376953,
	"learning_rate": 8.786295525553053e-05,
	"loss": 0.3324,
	"step": 1149
	},
	{
	"epoch": 0.5537831818455426,
	"grad_norm": 3.044595956802368,
	"learning_rate": 8.770813553905664e-05,
	"loss": 0.6101,
	"step": 1150
	},
	{
	"epoch": 0.5542647324384519,
	"grad_norm": 2.739715576171875,
	"learning_rate": 8.755334573134829e-05,
	"loss": 0.6972,
	"step": 1151
	},
	{
	"epoch": 0.554746283031361,
	"grad_norm": 1.3641911745071411,
	"learning_rate": 8.739858620904251e-05,
	"loss": 0.4947,
	"step": 1152
	},
	{
	"epoch": 0.5552278336242702,
	"grad_norm": 1.8812917470932007,
	"learning_rate": 8.724385734870271e-05,
	"loss": 0.8228,
	"step": 1153
	},
	{
	"epoch": 0.5557093842171793,
	"grad_norm": 3.0910966396331787,
	"learning_rate": 8.708915952681769e-05,
	"loss": 0.5776,
	"step": 1154
	},
	{
	"epoch": 0.5561909348100885,
	"grad_norm": 2.192817449569702,
	"learning_rate": 8.693449311980074e-05,
	"loss": 0.935,
	"step": 1155
	},
	{
	"epoch": 0.5566724854029976,
	"grad_norm": 2.3270866870880127,
	"learning_rate": 8.677985850398866e-05,
	"loss": 0.5251,
	"step": 1156
	},
	{
	"epoch": 0.5571540359959068,
	"grad_norm": 3.0047972202301025,
	"learning_rate": 8.662525605564093e-05,
	"loss": 0.9796,
	"step": 1157
	},
	{
	"epoch": 0.557635586588816,
	"grad_norm": 2.3164725303649902,
	"learning_rate": 8.647068615093875e-05,
	"loss": 1.551,
	"step": 1158
	},
	{
	"epoch": 0.5581171371817252,
	"grad_norm": 3.4601895809173584,
	"learning_rate": 8.631614916598419e-05,
	"loss": 0.7455,
	"step": 1159
	},
	{
	"epoch": 0.5585986877746343,
	"grad_norm": 3.388256549835205,
	"learning_rate": 8.616164547679906e-05,
	"loss": 0.5484,
	"step": 1160
	},
	{
	"epoch": 0.5590802383675435,
	"grad_norm": 2.2302229404449463,
	"learning_rate": 8.600717545932435e-05,
	"loss": 0.5789,
	"step": 1161
	},
	{
	"epoch": 0.5595617889604526,
	"grad_norm": 2.5507445335388184,
	"learning_rate": 8.5852739489419e-05,
	"loss": 0.2962,
	"step": 1162
	},
	{
	"epoch": 0.5600433395533618,
	"grad_norm": 2.2931394577026367,
	"learning_rate": 8.569833794285915e-05,
	"loss": 0.9057,
	"step": 1163
	},
	{
	"epoch": 0.5605248901462709,
	"grad_norm": 2.3694357872009277,
	"learning_rate": 8.554397119533714e-05,
	"loss": 0.9051,
	"step": 1164
	},
	{
	"epoch": 0.5610064407391802,
	"grad_norm": 2.3861167430877686,
	"learning_rate": 8.538963962246069e-05,
	"loss": 0.6481,
	"step": 1165
	},
	{
	"epoch": 0.5614879913320894,
	"grad_norm": 3.3635268211364746,
	"learning_rate": 8.523534359975189e-05,
	"loss": 0.6873,
	"step": 1166
	},
	{
	"epoch": 0.5619695419249985,
	"grad_norm": 2.3280465602874756,
	"learning_rate": 8.508108350264635e-05,
	"loss": 0.4409,
	"step": 1167
	},
	{
	"epoch": 0.5624510925179077,
	"grad_norm": 2.3025784492492676,
	"learning_rate": 8.492685970649228e-05,
	"loss": 0.3629,
	"step": 1168
	},
	{
	"epoch": 0.5629326431108168,
	"grad_norm": 4.259292125701904,
	"learning_rate": 8.477267258654949e-05,
	"loss": 0.7646,
	"step": 1169
	},
	{
	"epoch": 0.563414193703726,
	"grad_norm": 3.045973777770996,
	"learning_rate": 8.461852251798866e-05,
	"loss": 0.8309,
	"step": 1170
	},
	{
	"epoch": 0.5638957442966351,
	"grad_norm": 2.590165376663208,
	"learning_rate": 8.44644098758902e-05,
	"loss": 0.4435,
	"step": 1171
	},
	{
	"epoch": 0.5643772948895444,
	"grad_norm": 2.0724105834960938,
	"learning_rate": 8.431033503524354e-05,
	"loss": 0.4976,
	"step": 1172
	},
	{
	"epoch": 0.5648588454824535,
	"grad_norm": 3.144411087036133,
	"learning_rate": 8.415629837094611e-05,
	"loss": 0.9775,
	"step": 1173
	},
	{
	"epoch": 0.5653403960753627,
	"grad_norm": 2.584644079208374,
	"learning_rate": 8.400230025780243e-05,
	"loss": 0.6065,
	"step": 1174
	},
	{
	"epoch": 0.5658219466682718,
	"grad_norm": 1.8154007196426392,
	"learning_rate": 8.384834107052321e-05,
	"loss": 0.3035,
	"step": 1175
	},
	{
	"epoch": 0.566303497261181,
	"grad_norm": 3.097371816635132,
	"learning_rate": 8.369442118372447e-05,
	"loss": 0.6747,
	"step": 1176
	},
	{
	"epoch": 0.5667850478540901,
	"grad_norm": 1.322751522064209,
	"learning_rate": 8.35405409719266e-05,
	"loss": 0.6194,
	"step": 1177
	},
	{
	"epoch": 0.5672665984469993,
	"grad_norm": 2.8619985580444336,
	"learning_rate": 8.338670080955349e-05,
	"loss": 0.9159,
	"step": 1178
	},
	{
	"epoch": 0.5677481490399086,
	"grad_norm": 1.4597111940383911,
	"learning_rate": 8.323290107093143e-05,
	"loss": 0.5528,
	"step": 1179
	},
	{
	"epoch": 0.5682296996328177,
	"grad_norm": 1.321386694908142,
	"learning_rate": 8.307914213028856e-05,
	"loss": 0.5454,
	"step": 1180
	},
	{
	"epoch": 0.5687112502257269,
	"grad_norm": 2.653350591659546,
	"learning_rate": 8.292542436175356e-05,
	"loss": 0.6959,
	"step": 1181
	},
	{
	"epoch": 0.569192800818636,
	"grad_norm": 3.2664124965667725,
	"learning_rate": 8.277174813935508e-05,
	"loss": 0.9298,
	"step": 1182
	},
	{
	"epoch": 0.5696743514115452,
	"grad_norm": 3.0547754764556885,
	"learning_rate": 8.261811383702061e-05,
	"loss": 0.7422,
	"step": 1183
	},
	{
	"epoch": 0.5701559020044543,
	"grad_norm": 2.19242000579834,
	"learning_rate": 8.246452182857562e-05,
	"loss": 0.7436,
	"step": 1184
	},
	{
	"epoch": 0.5706374525973635,
	"grad_norm": 4.479813098907471,
	"learning_rate": 8.231097248774274e-05,
	"loss": 1.1304,
	"step": 1185
	},
	{
	"epoch": 0.5711190031902726,
	"grad_norm": 2.662180185317993,
	"learning_rate": 8.215746618814067e-05,
	"loss": 0.6066,
	"step": 1186
	},
	{
	"epoch": 0.5716005537831819,
	"grad_norm": 3.7930872440338135,
	"learning_rate": 8.200400330328348e-05,
	"loss": 0.7421,
	"step": 1187
	},
	{
	"epoch": 0.572082104376091,
	"grad_norm": 2.9955811500549316,
	"learning_rate": 8.185058420657957e-05,
	"loss": 1.1659,
	"step": 1188
	},
	{
	"epoch": 0.5725636549690002,
	"grad_norm": 1.8238601684570312,
	"learning_rate": 8.16972092713308e-05,
	"loss": 0.636,
	"step": 1189
	},
	{
	"epoch": 0.5730452055619093,
	"grad_norm": 2.3906164169311523,
	"learning_rate": 8.154387887073158e-05,
	"loss": 0.4951,
	"step": 1190
	},
	{
	"epoch": 0.5735267561548185,
	"grad_norm": 1.853758692741394,
	"learning_rate": 8.139059337786792e-05,
	"loss": 0.7715,
	"step": 1191
	},
	{
	"epoch": 0.5740083067477277,
	"grad_norm": 2.8323585987091064,
	"learning_rate": 8.12373531657166e-05,
	"loss": 1.0706,
	"step": 1192
	},
	{
	"epoch": 0.5744898573406368,
	"grad_norm": 1.3406877517700195,
	"learning_rate": 8.108415860714418e-05,
	"loss": 0.3461,
	"step": 1193
	},
	{
	"epoch": 0.5749714079335461,
	"grad_norm": 2.031278371810913,
	"learning_rate": 8.093101007490622e-05,
	"loss": 0.8868,
	"step": 1194
	},
	{
	"epoch": 0.5754529585264552,
	"grad_norm": 3.345834255218506,
	"learning_rate": 8.077790794164619e-05,
	"loss": 0.4278,
	"step": 1195
	},
	{
	"epoch": 0.5759345091193644,
	"grad_norm": 2.130840301513672,
	"learning_rate": 8.062485257989471e-05,
	"loss": 1.0242,
	"step": 1196
	},
	{
	"epoch": 0.5764160597122735,
	"grad_norm": 2.4846746921539307,
	"learning_rate": 8.047184436206864e-05,
	"loss": 0.7,
	"step": 1197
	},
	{
	"epoch": 0.5768976103051827,
	"grad_norm": 2.193743944168091,
	"learning_rate": 8.031888366046998e-05,
	"loss": 0.6467,
	"step": 1198
	},
	{
	"epoch": 0.5773791608980918,
	"grad_norm": 1.9895037412643433,
	"learning_rate": 8.016597084728526e-05,
	"loss": 0.7244,
	"step": 1199
	},
	{
	"epoch": 0.577860711491001,
	"grad_norm": 2.5619122982025146,
	"learning_rate": 8.001310629458443e-05,
	"loss": 0.9385,
	"step": 1200
	},
	{
	"epoch": 0.5783422620839102,
	"grad_norm": 1.9781352281570435,
	"learning_rate": 7.986029037432002e-05,
	"loss": 0.8172,
	"step": 1201
	},
	{
	"epoch": 0.5788238126768194,
	"grad_norm": 3.2591843605041504,
	"learning_rate": 7.970752345832623e-05,
	"loss": 0.7278,
	"step": 1202
	},
	{
	"epoch": 0.5793053632697285,
	"grad_norm": 1.6107450723648071,
	"learning_rate": 7.9554805918318e-05,
	"loss": 0.3799,
	"step": 1203
	},
	{
	"epoch": 0.5797869138626377,
	"grad_norm": 3.175673484802246,
	"learning_rate": 7.940213812589018e-05,
	"loss": 0.5979,
	"step": 1204
	},
	{
	"epoch": 0.5802684644555468,
	"grad_norm": 2.0690531730651855,
	"learning_rate": 7.92495204525165e-05,
	"loss": 0.6842,
	"step": 1205
	},
	{
	"epoch": 0.580750015048456,
	"grad_norm": 3.2871673107147217,
	"learning_rate": 7.909695326954878e-05,
	"loss": 1.0002,
	"step": 1206
	},
	{
	"epoch": 0.5812315656413652,
	"grad_norm": 1.0341432094573975,
	"learning_rate": 7.894443694821602e-05,
	"loss": 0.516,
	"step": 1207
	},
	{
	"epoch": 0.5817131162342744,
	"grad_norm": 2.580730676651001,
	"learning_rate": 7.879197185962339e-05,
	"loss": 0.7898,
	"step": 1208
	},
	{
	"epoch": 0.5821946668271836,
	"grad_norm": 4.864838600158691,
	"learning_rate": 7.863955837475144e-05,
	"loss": 0.8172,
	"step": 1209
	},
	{
	"epoch": 0.5826762174200927,
	"grad_norm": 1.060434341430664,
	"learning_rate": 7.848719686445515e-05,
	"loss": 0.3784,
	"step": 1210
	},
	{
	"epoch": 0.5831577680130019,
	"grad_norm": 3.191971778869629,
	"learning_rate": 7.833488769946306e-05,
	"loss": 0.8063,
	"step": 1211
	},
	{
	"epoch": 0.583639318605911,
	"grad_norm": 2.525768518447876,
	"learning_rate": 7.818263125037633e-05,
	"loss": 0.6985,
	"step": 1212
	},
	{
	"epoch": 0.5841208691988202,
	"grad_norm": 2.8149242401123047,
	"learning_rate": 7.803042788766777e-05,
	"loss": 1.064,
	"step": 1213
	},
	{
	"epoch": 0.5846024197917293,
	"grad_norm": 3.0168797969818115,
	"learning_rate": 7.787827798168115e-05,
	"loss": 0.387,
	"step": 1214
	},
	{
	"epoch": 0.5850839703846386,
	"grad_norm": 2.1874630451202393,
	"learning_rate": 7.772618190263009e-05,
	"loss": 0.5811,
	"step": 1215
	},
	{
	"epoch": 0.5855655209775477,
	"grad_norm": 1.8578369617462158,
	"learning_rate": 7.757414002059726e-05,
	"loss": 0.5424,
	"step": 1216
	},
	{
	"epoch": 0.5860470715704569,
	"grad_norm": 3.507887840270996,
	"learning_rate": 7.742215270553349e-05,
	"loss": 0.5704,
	"step": 1217
	},
	{
	"epoch": 0.586528622163366,
	"grad_norm": 1.8217120170593262,
	"learning_rate": 7.727022032725672e-05,
	"loss": 0.72,
	"step": 1218
	},
	{
	"epoch": 0.5870101727562752,
	"grad_norm": 1.2904176712036133,
	"learning_rate": 7.711834325545135e-05,
	"loss": 0.4966,
	"step": 1219
	},
	{
	"epoch": 0.5874917233491844,
	"grad_norm": 1.9854986667633057,
	"learning_rate": 7.696652185966711e-05,
	"loss": 0.6202,
	"step": 1220
	},
	{
	"epoch": 0.5879732739420935,
	"grad_norm": 2.831481456756592,
	"learning_rate": 7.681475650931834e-05,
	"loss": 0.568,
	"step": 1221
	},
	{
	"epoch": 0.5884548245350028,
	"grad_norm": 2.528315305709839,
	"learning_rate": 7.666304757368297e-05,
	"loss": 0.9762,
	"step": 1222
	},
	{
	"epoch": 0.5889363751279119,
	"grad_norm": 1.8668657541275024,
	"learning_rate": 7.651139542190164e-05,
	"loss": 0.7539,
	"step": 1223
	},
	{
	"epoch": 0.5894179257208211,
	"grad_norm": 2.6514816284179688,
	"learning_rate": 7.635980042297687e-05,
	"loss": 0.6104,
	"step": 1224
	},
	{
	"epoch": 0.5898994763137302,
	"grad_norm": 2.8228659629821777,
	"learning_rate": 7.620826294577208e-05,
	"loss": 0.5398,
	"step": 1225
	},
	{
	"epoch": 0.5903810269066394,
	"grad_norm": 1.8238624334335327,
	"learning_rate": 7.605678335901071e-05,
	"loss": 0.4965,
	"step": 1226
	},
	{
	"epoch": 0.5908625774995485,
	"grad_norm": 2.332958221435547,
	"learning_rate": 7.59053620312754e-05,
	"loss": 1.0528,
	"step": 1227
	},
	{
	"epoch": 0.5913441280924577,
	"grad_norm": 3.5963058471679688,
	"learning_rate": 7.575399933100697e-05,
	"loss": 0.5706,
	"step": 1228
	},
	{
	"epoch": 0.5918256786853668,
	"grad_norm": 3.345517873764038,
	"learning_rate": 7.560269562650368e-05,
	"loss": 1.0137,
	"step": 1229
	},
	{
	"epoch": 0.5923072292782761,
	"grad_norm": 5.635433673858643,
	"learning_rate": 7.54514512859201e-05,
	"loss": 0.4088,
	"step": 1230
	},
	{
	"epoch": 0.5927887798711852,
	"grad_norm": 2.0128109455108643,
	"learning_rate": 7.530026667726645e-05,
	"loss": 0.5574,
	"step": 1231
	},
	{
	"epoch": 0.5932703304640944,
	"grad_norm": 2.09451961517334,
	"learning_rate": 7.51491421684076e-05,
	"loss": 0.9239,
	"step": 1232
	},
	{
	"epoch": 0.5937518810570036,
	"grad_norm": 3.1210248470306396,
	"learning_rate": 7.49980781270622e-05,
	"loss": 0.9972,
	"step": 1233
	},
	{
	"epoch": 0.5942334316499127,
	"grad_norm": 2.9423022270202637,
	"learning_rate": 7.484707492080172e-05,
	"loss": 0.9545,
	"step": 1234
	},
	{
	"epoch": 0.5947149822428219,
	"grad_norm": 8.651620864868164,
	"learning_rate": 7.469613291704962e-05,
	"loss": 0.9859,
	"step": 1235
	},
	{
	"epoch": 0.595196532835731,
	"grad_norm": 2.376633644104004,
	"learning_rate": 7.45452524830805e-05,
	"loss": 0.6038,
	"step": 1236
	},
	{
	"epoch": 0.5956780834286403,
	"grad_norm": 1.0200681686401367,
	"learning_rate": 7.439443398601903e-05,
	"loss": 0.4735,
	"step": 1237
	},
	{
	"epoch": 0.5961596340215494,
	"grad_norm": 3.08100962638855,
	"learning_rate": 7.424367779283926e-05,
	"loss": 0.9614,
	"step": 1238
	},
	{
	"epoch": 0.5966411846144586,
	"grad_norm": 1.8693490028381348,
	"learning_rate": 7.409298427036364e-05,
	"loss": 0.4885,
	"step": 1239
	},
	{
	"epoch": 0.5971227352073677,
	"grad_norm": 3.25297474861145,
	"learning_rate": 7.39423537852621e-05,
	"loss": 0.7068,
	"step": 1240
	},
	{
	"epoch": 0.5976042858002769,
	"grad_norm": 2.2627036571502686,
	"learning_rate": 7.379178670405123e-05,
	"loss": 0.9651,
	"step": 1241
	},
	{
	"epoch": 0.598085836393186,
	"grad_norm": 3.4235429763793945,
	"learning_rate": 7.364128339309326e-05,
	"loss": 0.9293,
	"step": 1242
	},
	{
	"epoch": 0.5985673869860952,
	"grad_norm": 2.710484743118286,
	"learning_rate": 7.349084421859533e-05,
	"loss": 0.6263,
	"step": 1243
	},
	{
	"epoch": 0.5990489375790043,
	"grad_norm": 2.2872800827026367,
	"learning_rate": 7.334046954660852e-05,
	"loss": 0.4224,
	"step": 1244
	},
	{
	"epoch": 0.5995304881719136,
	"grad_norm": 1.7476857900619507,
	"learning_rate": 7.31901597430269e-05,
	"loss": 0.7634,
	"step": 1245
	},
	{
	"epoch": 0.6000120387648227,
	"grad_norm": 2.7267417907714844,
	"learning_rate": 7.303991517358678e-05,
	"loss": 0.845,
	"step": 1246
	},
	{
	"epoch": 0.6004935893577319,
	"grad_norm": 2.053980827331543,
	"learning_rate": 7.288973620386568e-05,
	"loss": 0.8618,
	"step": 1247
	},
	{
	"epoch": 0.6009751399506411,
	"grad_norm": 1.9945694208145142,
	"learning_rate": 7.273962319928151e-05,
	"loss": 0.7425,
	"step": 1248
	},
	{
	"epoch": 0.6014566905435502,
	"grad_norm": 1.3418025970458984,
	"learning_rate": 7.258957652509171e-05,
	"loss": 0.6352,
	"step": 1249
	},
	{
	"epoch": 0.6019382411364594,
	"grad_norm": 2.392909288406372,
	"learning_rate": 7.24395965463923e-05,
	"loss": 0.4533,
	"step": 1250
	},
	{
	"epoch": 0.6024197917293685,
	"grad_norm": 2.3010342121124268,
	"learning_rate": 7.228968362811702e-05,
	"loss": 0.4342,
	"step": 1251
	},
	{
	"epoch": 0.6029013423222778,
	"grad_norm": 3.485913038253784,
	"learning_rate": 7.21398381350364e-05,
	"loss": 0.9649,
	"step": 1252
	},
	{
	"epoch": 0.6033828929151869,
	"grad_norm": 0.9633323550224304,
	"learning_rate": 7.199006043175698e-05,
	"loss": 0.7225,
	"step": 1253
	},
	{
	"epoch": 0.6038644435080961,
	"grad_norm": 2.767479658126831,
	"learning_rate": 7.184035088272028e-05,
	"loss": 0.5086,
	"step": 1254
	},
	{
	"epoch": 0.6043459941010052,
	"grad_norm": 3.417263984680176,
	"learning_rate": 7.169070985220208e-05,
	"loss": 0.7542,
	"step": 1255
	},
	{
	"epoch": 0.6048275446939144,
	"grad_norm": 1.0457793474197388,
	"learning_rate": 7.154113770431132e-05,
	"loss": 1.0051,
	"step": 1256
	},
	{
	"epoch": 0.6053090952868235,
	"grad_norm": 1.780932068824768,
	"learning_rate": 7.13916348029894e-05,
	"loss": 0.7171,
	"step": 1257
	},
	{
	"epoch": 0.6057906458797327,
	"grad_norm": 3.2504794597625732,
	"learning_rate": 7.124220151200926e-05,
	"loss": 0.4477,
	"step": 1258
	},
	{
	"epoch": 0.6062721964726419,
	"grad_norm": 3.2658979892730713,
	"learning_rate": 7.10928381949744e-05,
	"loss": 0.5208,
	"step": 1259
	},
	{
	"epoch": 0.6067537470655511,
	"grad_norm": 2.36083984375,
	"learning_rate": 7.094354521531807e-05,
	"loss": 0.8187,
	"step": 1260
	},
	{
	"epoch": 0.6072352976584603,
	"grad_norm": 1.9257503747940063,
	"learning_rate": 7.079432293630244e-05,
	"loss": 0.9669,
	"step": 1261
	},
	{
	"epoch": 0.6077168482513694,
	"grad_norm": 3.070887804031372,
	"learning_rate": 7.064517172101753e-05,
	"loss": 0.8842,
	"step": 1262
	},
	{
	"epoch": 0.6081983988442786,
	"grad_norm": 2.950284481048584,
	"learning_rate": 7.04960919323806e-05,
	"loss": 0.6997,
	"step": 1263
	},
	{
	"epoch": 0.6086799494371877,
	"grad_norm": 3.656165838241577,
	"learning_rate": 7.034708393313493e-05,
	"loss": 0.7774,
	"step": 1264
	},
	{
	"epoch": 0.609161500030097,
	"grad_norm": 3.879746198654175,
	"learning_rate": 7.019814808584928e-05,
	"loss": 0.6871,
	"step": 1265
	},
	{
	"epoch": 0.609643050623006,
	"grad_norm": 2.684112310409546,
	"learning_rate": 7.004928475291678e-05,
	"loss": 0.36,
	"step": 1266
	},
	{
	"epoch": 0.6101246012159153,
	"grad_norm": 4.9579644203186035,
	"learning_rate": 6.990049429655412e-05,
	"loss": 0.888,
	"step": 1267
	},
	{
	"epoch": 0.6106061518088244,
	"grad_norm": 2.2652103900909424,
	"learning_rate": 6.97517770788007e-05,
	"loss": 0.6242,
	"step": 1268
	},
	{
	"epoch": 0.6110877024017336,
	"grad_norm": 2.9428718090057373,
	"learning_rate": 6.960313346151761e-05,
	"loss": 0.5431,
	"step": 1269
	},
	{
	"epoch": 0.6115692529946427,
	"grad_norm": 3.530306339263916,
	"learning_rate": 6.9454563806387e-05,
	"loss": 1.0434,
	"step": 1270
	},
	{
	"epoch": 0.6120508035875519,
	"grad_norm": 1.2338889837265015,
	"learning_rate": 6.930606847491094e-05,
	"loss": 0.7309,
	"step": 1271
	},
	{
	"epoch": 0.612532354180461,
	"grad_norm": 2.873732328414917,
	"learning_rate": 6.915764782841072e-05,
	"loss": 0.8321,
	"step": 1272
	},
	{
	"epoch": 0.6130139047733703,
	"grad_norm": 2.2155025005340576,
	"learning_rate": 6.900930222802588e-05,
	"loss": 0.3917,
	"step": 1273
	},
	{
	"epoch": 0.6134954553662795,
	"grad_norm": 1.7441500425338745,
	"learning_rate": 6.886103203471337e-05,
	"loss": 0.587,
	"step": 1274
	},
	{
	"epoch": 0.6139770059591886,
	"grad_norm": 3.1032984256744385,
	"learning_rate": 6.871283760924665e-05,
	"loss": 0.6219,
	"step": 1275
	},
	{
	"epoch": 0.6144585565520978,
	"grad_norm": 1.5018118619918823,
	"learning_rate": 6.856471931221478e-05,
	"loss": 0.8532,
	"step": 1276
	},
	{
	"epoch": 0.6149401071450069,
	"grad_norm": 2.236863851547241,
	"learning_rate": 6.841667750402162e-05,
	"loss": 0.4704,
	"step": 1277
	},
	{
	"epoch": 0.6154216577379161,
	"grad_norm": 2.4868059158325195,
	"learning_rate": 6.826871254488496e-05,
	"loss": 0.688,
	"step": 1278
	},
	{
	"epoch": 0.6159032083308252,
	"grad_norm": 1.9790339469909668,
	"learning_rate": 6.812082479483553e-05,
	"loss": 0.2572,
	"step": 1279
	},
	{
	"epoch": 0.6163847589237345,
	"grad_norm": 8.326330184936523,
	"learning_rate": 6.797301461371625e-05,
	"loss": 0.7398,
	"step": 1280
	},
	{
	"epoch": 0.6168663095166436,
	"grad_norm": 2.0423824787139893,
	"learning_rate": 6.782528236118124e-05,
	"loss": 0.6242,
	"step": 1281
	},
	{
	"epoch": 0.6173478601095528,
	"grad_norm": 5.033036708831787,
	"learning_rate": 6.767762839669503e-05,
	"loss": 0.9255,
	"step": 1282
	},
	{
	"epoch": 0.6178294107024619,
	"grad_norm": 1.7515184879302979,
	"learning_rate": 6.753005307953167e-05,
	"loss": 0.536,
	"step": 1283
	},
	{
	"epoch": 0.6183109612953711,
	"grad_norm": 3.3410611152648926,
	"learning_rate": 6.738255676877381e-05,
	"loss": 0.6655,
	"step": 1284
	},
	{
	"epoch": 0.6187925118882802,
	"grad_norm": 2.5926554203033447,
	"learning_rate": 6.723513982331195e-05,
	"loss": 0.7555,
	"step": 1285
	},
	{
	"epoch": 0.6192740624811894,
	"grad_norm": 3.253159761428833,
	"learning_rate": 6.708780260184333e-05,
	"loss": 0.5316,
	"step": 1286
	},
	{
	"epoch": 0.6197556130740985,
	"grad_norm": 3.1270864009857178,
	"learning_rate": 6.694054546287132e-05,
	"loss": 0.6255,
	"step": 1287
	},
	{
	"epoch": 0.6202371636670078,
	"grad_norm": 5.128495216369629,
	"learning_rate": 6.679336876470441e-05,
	"loss": 0.7771,
	"step": 1288
	},
	{
	"epoch": 0.620718714259917,
	"grad_norm": 1.7940768003463745,
	"learning_rate": 6.664627286545535e-05,
	"loss": 0.7788,
	"step": 1289
	},
	{
	"epoch": 0.6212002648528261,
	"grad_norm": 2.9516167640686035,
	"learning_rate": 6.649925812304025e-05,
	"loss": 0.3909,
	"step": 1290
	},
	{
	"epoch": 0.6216818154457353,
	"grad_norm": 1.452250599861145,
	"learning_rate": 6.635232489517782e-05,
	"loss": 0.6476,
	"step": 1291
	},
	{
	"epoch": 0.6221633660386444,
	"grad_norm": 1.570677638053894,
	"learning_rate": 6.620547353938836e-05,
	"loss": 0.4986,
	"step": 1292
	},
	{
	"epoch": 0.6226449166315536,
	"grad_norm": 2.5657029151916504,
	"learning_rate": 6.605870441299302e-05,
	"loss": 0.7346,
	"step": 1293
	},
	{
	"epoch": 0.6231264672244627,
	"grad_norm": 3.6876044273376465,
	"learning_rate": 6.591201787311285e-05,
	"loss": 1.2753,
	"step": 1294
	},
	{
	"epoch": 0.623608017817372,
	"grad_norm": 2.211846113204956,
	"learning_rate": 6.57654142766679e-05,
	"loss": 0.8497,
	"step": 1295
	},
	{
	"epoch": 0.6240895684102811,
	"grad_norm": 2.1840100288391113,
	"learning_rate": 6.561889398037643e-05,
	"loss": 0.4188,
	"step": 1296
	},
	{
	"epoch": 0.6245711190031903,
	"grad_norm": 1.7685606479644775,
	"learning_rate": 6.547245734075403e-05,
	"loss": 0.6529,
	"step": 1297
	},
	{
	"epoch": 0.6250526695960994,
	"grad_norm": 1.6937798261642456,
	"learning_rate": 6.532610471411274e-05,
	"loss": 0.5592,
	"step": 1298
	},
	{
	"epoch": 0.6255342201890086,
	"grad_norm": 2.8329896926879883,
	"learning_rate": 6.517983645656014e-05,
	"loss": 0.6343,
	"step": 1299
	},
	{
	"epoch": 0.6260157707819177,
	"grad_norm": 2.330113172531128,
	"learning_rate": 6.503365292399857e-05,
	"loss": 1.0539,
	"step": 1300
	},
	{
	"epoch": 0.6264973213748269,
	"grad_norm": 2.074939489364624,
	"learning_rate": 6.488755447212418e-05,
	"loss": 0.7005,
	"step": 1301
	},
	{
	"epoch": 0.6269788719677362,
	"grad_norm": 1.091862678527832,
	"learning_rate": 6.474154145642612e-05,
	"loss": 0.9088,
	"step": 1302
	},
	{
	"epoch": 0.6274604225606453,
	"grad_norm": 2.839646577835083,
	"learning_rate": 6.459561423218561e-05,
	"loss": 0.8255,
	"step": 1303
	},
	{
	"epoch": 0.6279419731535545,
	"grad_norm": 2.919734477996826,
	"learning_rate": 6.444977315447521e-05,
	"loss": 0.5693,
	"step": 1304
	},
	{
	"epoch": 0.6284235237464636,
	"grad_norm": 3.9232397079467773,
	"learning_rate": 6.430401857815776e-05,
	"loss": 0.9091,
	"step": 1305
	},
	{
	"epoch": 0.6289050743393728,
	"grad_norm": 2.6297950744628906,
	"learning_rate": 6.415835085788575e-05,
	"loss": 0.6015,
	"step": 1306
	},
	{
	"epoch": 0.6293866249322819,
	"grad_norm": 4.350391387939453,
	"learning_rate": 6.401277034810017e-05,
	"loss": 0.4089,
	"step": 1307
	},
	{
	"epoch": 0.6298681755251911,
	"grad_norm": 2.0582656860351562,
	"learning_rate": 6.386727740302994e-05,
	"loss": 0.5737,
	"step": 1308
	},
	{
	"epoch": 0.6303497261181003,
	"grad_norm": 2.8593883514404297,
	"learning_rate": 6.37218723766909e-05,
	"loss": 0.5349,
	"step": 1309
	},
	{
	"epoch": 0.6308312767110095,
	"grad_norm": 2.048414945602417,
	"learning_rate": 6.357655562288488e-05,
	"loss": 0.928,
	"step": 1310
	},
	{
	"epoch": 0.6313128273039186,
	"grad_norm": 2.7405006885528564,
	"learning_rate": 6.343132749519902e-05,
	"loss": 0.9519,
	"step": 1311
	},
	{
	"epoch": 0.6317943778968278,
	"grad_norm": 1.9664356708526611,
	"learning_rate": 6.328618834700474e-05,
	"loss": 0.5531,
	"step": 1312
	},
	{
	"epoch": 0.6322759284897369,
	"grad_norm": 3.6196768283843994,
	"learning_rate": 6.314113853145703e-05,
	"loss": 0.9089,
	"step": 1313
	},
	{
	"epoch": 0.6327574790826461,
	"grad_norm": 2.040229082107544,
	"learning_rate": 6.299617840149349e-05,
	"loss": 0.7539,
	"step": 1314
	},
	{
	"epoch": 0.6332390296755553,
	"grad_norm": 3.340404748916626,
	"learning_rate": 6.285130830983339e-05,
	"loss": 0.4569,
	"step": 1315
	},
	{
	"epoch": 0.6337205802684645,
	"grad_norm": 1.9362350702285767,
	"learning_rate": 6.270652860897704e-05,
	"loss": 0.6094,
	"step": 1316
	},
	{
	"epoch": 0.6342021308613737,
	"grad_norm": 3.0248935222625732,
	"learning_rate": 6.25618396512048e-05,
	"loss": 0.9306,
	"step": 1317
	},
	{
	"epoch": 0.6346836814542828,
	"grad_norm": 4.514108657836914,
	"learning_rate": 6.24172417885762e-05,
	"loss": 0.672,
	"step": 1318
	},
	{
	"epoch": 0.635165232047192,
	"grad_norm": 1.5764905214309692,
	"learning_rate": 6.227273537292911e-05,
	"loss": 0.8099,
	"step": 1319
	},
	{
	"epoch": 0.6356467826401011,
	"grad_norm": 1.775439739227295,
	"learning_rate": 6.212832075587891e-05,
	"loss": 0.6518,
	"step": 1320
	},
	{
	"epoch": 0.6361283332330103,
	"grad_norm": 2.8492040634155273,
	"learning_rate": 6.19839982888176e-05,
	"loss": 0.6318,
	"step": 1321
	},
	{
	"epoch": 0.6366098838259194,
	"grad_norm": 3.787897825241089,
	"learning_rate": 6.183976832291296e-05,
	"loss": 0.546,
	"step": 1322
	},
	{
	"epoch": 0.6370914344188287,
	"grad_norm": 1.812030553817749,
	"learning_rate": 6.169563120910775e-05,
	"loss": 0.9272,
	"step": 1323
	},
	{
	"epoch": 0.6375729850117378,
	"grad_norm": 3.6685822010040283,
	"learning_rate": 6.155158729811867e-05,
	"loss": 0.9627,
	"step": 1324
	},
	{
	"epoch": 0.638054535604647,
	"grad_norm": 2.6122891902923584,
	"learning_rate": 6.140763694043578e-05,
	"loss": 0.6109,
	"step": 1325
	},
	{
	"epoch": 0.6385360861975561,
	"grad_norm": 1.1181974411010742,
	"learning_rate": 6.126378048632139e-05,
	"loss": 0.6805,
	"step": 1326
	},
	{
	"epoch": 0.6390176367904653,
	"grad_norm": 2.0275444984436035,
	"learning_rate": 6.112001828580944e-05,
	"loss": 0.9841,
	"step": 1327
	},
	{
	"epoch": 0.6394991873833744,
	"grad_norm": 2.3112661838531494,
	"learning_rate": 6.0976350688704455e-05,
	"loss": 0.4051,
	"step": 1328
	},
	{
	"epoch": 0.6399807379762836,
	"grad_norm": 2.2878177165985107,
	"learning_rate": 6.083277804458072e-05,
	"loss": 0.6933,
	"step": 1329
	},
	{
	"epoch": 0.6404622885691929,
	"grad_norm": 2.3939578533172607,
	"learning_rate": 6.068930070278159e-05,
	"loss": 0.7104,
	"step": 1330
	},
	{
	"epoch": 0.640943839162102,
	"grad_norm": 2.7695722579956055,
	"learning_rate": 6.054591901241846e-05,
	"loss": 0.592,
	"step": 1331
	},
	{
	"epoch": 0.6414253897550112,
	"grad_norm": 1.4362547397613525,
	"learning_rate": 6.040263332237002e-05,
	"loss": 0.7355,
	"step": 1332
	},
	{
	"epoch": 0.6419069403479203,
	"grad_norm": 1.8200515508651733,
	"learning_rate": 6.025944398128137e-05,
	"loss": 0.6226,
	"step": 1333
	},
	{
	"epoch": 0.6423884909408295,
	"grad_norm": 3.0382542610168457,
	"learning_rate": 6.011635133756309e-05,
	"loss": 0.5577,
	"step": 1334
	},
	{
	"epoch": 0.6428700415337386,
	"grad_norm": 1.6607255935668945,
	"learning_rate": 5.99733557393906e-05,
	"loss": 0.7285,
	"step": 1335
	},
	{
	"epoch": 0.6433515921266478,
	"grad_norm": 5.5869951248168945,
	"learning_rate": 5.983045753470308e-05,
	"loss": 1.0281,
	"step": 1336
	},
	{
	"epoch": 0.6438331427195569,
	"grad_norm": 1.662786841392517,
	"learning_rate": 5.96876570712028e-05,
	"loss": 0.9,
	"step": 1337
	},
	{
	"epoch": 0.6443146933124662,
	"grad_norm": 1.6657735109329224,
	"learning_rate": 5.954495469635417e-05,
	"loss": 0.3676,
	"step": 1338
	},
	{
	"epoch": 0.6447962439053753,
	"grad_norm": 2.297683000564575,
	"learning_rate": 5.940235075738296e-05,
	"loss": 0.8609,
	"step": 1339
	},
	{
	"epoch": 0.6452777944982845,
	"grad_norm": 3.4080722332000732,
	"learning_rate": 5.925984560127542e-05,
	"loss": 1.11,
	"step": 1340
	},
	{
	"epoch": 0.6457593450911936,
	"grad_norm": 5.633896350860596,
	"learning_rate": 5.911743957477739e-05,
	"loss": 1.1069,
	"step": 1341
	},
	{
	"epoch": 0.6462408956841028,
	"grad_norm": 3.4594554901123047,
	"learning_rate": 5.897513302439355e-05,
	"loss": 0.5313,
	"step": 1342
	},
	{
	"epoch": 0.646722446277012,
	"grad_norm": 2.593113660812378,
	"learning_rate": 5.883292629638651e-05,
	"loss": 0.7902,
	"step": 1343
	},
	{
	"epoch": 0.6472039968699211,
	"grad_norm": 1.7572481632232666,
	"learning_rate": 5.869081973677604e-05,
	"loss": 0.6139,
	"step": 1344
	},
	{
	"epoch": 0.6476855474628304,
	"grad_norm": 2.4023494720458984,
	"learning_rate": 5.8548813691338134e-05,
	"loss": 0.9859,
	"step": 1345
	},
	{
	"epoch": 0.6481670980557395,
	"grad_norm": 2.5072319507598877,
	"learning_rate": 5.84069085056042e-05,
	"loss": 1.0116,
	"step": 1346
	},
	{
	"epoch": 0.6486486486486487,
	"grad_norm": 4.375543117523193,
	"learning_rate": 5.826510452486027e-05,
	"loss": 0.4556,
	"step": 1347
	},
	{
	"epoch": 0.6491301992415578,
	"grad_norm": 2.4621849060058594,
	"learning_rate": 5.81234020941461e-05,
	"loss": 0.422,
	"step": 1348
	},
	{
	"epoch": 0.649611749834467,
	"grad_norm": 3.9239776134490967,
	"learning_rate": 5.798180155825437e-05,
	"loss": 0.8455,
	"step": 1349
	},
	{
	"epoch": 0.6500933004273761,
	"grad_norm": 2.0162253379821777,
	"learning_rate": 5.784030326172981e-05,
	"loss": 0.9106,
	"step": 1350
	},
	{
	"epoch": 0.6505748510202853,
	"grad_norm": 2.1763408184051514,
	"learning_rate": 5.7698907548868395e-05,
	"loss": 0.3975,
	"step": 1351
	},
	{
	"epoch": 0.6510564016131944,
	"grad_norm": 1.5609049797058105,
	"learning_rate": 5.755761476371653e-05,
	"loss": 0.4149,
	"step": 1352
	},
	{
	"epoch": 0.6515379522061037,
	"grad_norm": 2.1389899253845215,
	"learning_rate": 5.741642525007003e-05,
	"loss": 1.0683,
	"step": 1353
	},
	{
	"epoch": 0.6520195027990128,
	"grad_norm": 2.574646472930908,
	"learning_rate": 5.727533935147359e-05,
	"loss": 0.6677,
	"step": 1354
	},
	{
	"epoch": 0.652501053391922,
	"grad_norm": 3.0155136585235596,
	"learning_rate": 5.713435741121975e-05,
	"loss": 0.5586,
	"step": 1355
	},
	{
	"epoch": 0.6529826039848312,
	"grad_norm": 2.190906524658203,
	"learning_rate": 5.699347977234799e-05,
	"loss": 0.7389,
	"step": 1356
	},
	{
	"epoch": 0.6534641545777403,
	"grad_norm": 3.7313098907470703,
	"learning_rate": 5.685270677764412e-05,
	"loss": 0.6318,
	"step": 1357
	},
	{
	"epoch": 0.6539457051706495,
	"grad_norm": 1.6453399658203125,
	"learning_rate": 5.671203876963931e-05,
	"loss": 0.6455,
	"step": 1358
	},
	{
	"epoch": 0.6544272557635586,
	"grad_norm": 2.063249111175537,
	"learning_rate": 5.657147609060924e-05,
	"loss": 0.7916,
	"step": 1359
	},
	{
	"epoch": 0.6549088063564679,
	"grad_norm": 2.5297508239746094,
	"learning_rate": 5.643101908257333e-05,
	"loss": 0.7939,
	"step": 1360
	},
	{
	"epoch": 0.655390356949377,
	"grad_norm": 3.1960248947143555,
	"learning_rate": 5.629066808729385e-05,
	"loss": 0.4917,
	"step": 1361
	},
	{
	"epoch": 0.6558719075422862,
	"grad_norm": 2.5569260120391846,
	"learning_rate": 5.6150423446275144e-05,
	"loss": 0.54,
	"step": 1362
	},
	{
	"epoch": 0.6563534581351953,
	"grad_norm": 2.1092185974121094,
	"learning_rate": 5.601028550076277e-05,
	"loss": 0.5214,
	"step": 1363
	},
	{
	"epoch": 0.6568350087281045,
	"grad_norm": 1.965410828590393,
	"learning_rate": 5.587025459174271e-05,
	"loss": 0.5952,
	"step": 1364
	},
	{
	"epoch": 0.6573165593210136,
	"grad_norm": 1.7591605186462402,
	"learning_rate": 5.573033105994038e-05,
	"loss": 0.7113,
	"step": 1365
	},
	{
	"epoch": 0.6577981099139228,
	"grad_norm": 1.566158652305603,
	"learning_rate": 5.559051524582002e-05,
	"loss": 0.7087,
	"step": 1366
	},
	{
	"epoch": 0.658279660506832,
	"grad_norm": 2.3267264366149902,
	"learning_rate": 5.5450807489583777e-05,
	"loss": 0.673,
	"step": 1367
	},
	{
	"epoch": 0.6587612110997412,
	"grad_norm": 2.1096274852752686,
	"learning_rate": 5.531120813117085e-05,
	"loss": 0.6511,
	"step": 1368
	},
	{
	"epoch": 0.6592427616926503,
	"grad_norm": 2.117785692214966,
	"learning_rate": 5.517171751025667e-05,
	"loss": 0.6863,
	"step": 1369
	},
	{
	"epoch": 0.6597243122855595,
	"grad_norm": 2.8077008724212646,
	"learning_rate": 5.5032335966252103e-05,
	"loss": 0.3785,
	"step": 1370
	},
	{
	"epoch": 0.6602058628784687,
	"grad_norm": 3.291800022125244,
	"learning_rate": 5.489306383830258e-05,
	"loss": 0.4787,
	"step": 1371
	},
	{
	"epoch": 0.6606874134713778,
	"grad_norm": 2.1516401767730713,
	"learning_rate": 5.475390146528738e-05,
	"loss": 0.6011,
	"step": 1372
	},
	{
	"epoch": 0.661168964064287,
	"grad_norm": 1.5693684816360474,
	"learning_rate": 5.461484918581858e-05,
	"loss": 0.4216,
	"step": 1373
	},
	{
	"epoch": 0.6616505146571962,
	"grad_norm": 3.2931742668151855,
	"learning_rate": 5.4475907338240494e-05,
	"loss": 0.4253,
	"step": 1374
	},
	{
	"epoch": 0.6621320652501054,
	"grad_norm": 4.190717697143555,
	"learning_rate": 5.43370762606287e-05,
	"loss": 1.0326,
	"step": 1375
	},
	{
	"epoch": 0.6626136158430145,
	"grad_norm": 2.5259127616882324,
	"learning_rate": 5.4198356290789276e-05,
	"loss": 0.738,
	"step": 1376
	},
	{
	"epoch": 0.6630951664359237,
	"grad_norm": 3.3138790130615234,
	"learning_rate": 5.405974776625785e-05,
	"loss": 0.4473,
	"step": 1377
	},
	{
	"epoch": 0.6635767170288328,
	"grad_norm": 1.9713718891143799,
	"learning_rate": 5.392125102429899e-05,
	"loss": 0.6931,
	"step": 1378
	},
	{
	"epoch": 0.664058267621742,
	"grad_norm": 1.5703426599502563,
	"learning_rate": 5.378286640190522e-05,
	"loss": 0.6073,
	"step": 1379
	},
	{
	"epoch": 0.6645398182146511,
	"grad_norm": 2.653319835662842,
	"learning_rate": 5.364459423579629e-05,
	"loss": 0.7751,
	"step": 1380
	},
	{
	"epoch": 0.6650213688075604,
	"grad_norm": 1.566805124282837,
	"learning_rate": 5.350643486241825e-05,
	"loss": 0.4636,
	"step": 1381
	},
	{
	"epoch": 0.6655029194004695,
	"grad_norm": 1.8555259704589844,
	"learning_rate": 5.33683886179428e-05,
	"loss": 0.4216,
	"step": 1382
	},
	{
	"epoch": 0.6659844699933787,
	"grad_norm": 5.087174892425537,
	"learning_rate": 5.3230455838266266e-05,
	"loss": 0.5842,
	"step": 1383
	},
	{
	"epoch": 0.6664660205862879,
	"grad_norm": 3.006080150604248,
	"learning_rate": 5.309263685900898e-05,
	"loss": 0.4825,
	"step": 1384
	},
	{
	"epoch": 0.666947571179197,
	"grad_norm": 1.3487999439239502,
	"learning_rate": 5.295493201551433e-05,
	"loss": 0.4206,
	"step": 1385
	},
	{
	"epoch": 0.6674291217721062,
	"grad_norm": 3.11458683013916,
	"learning_rate": 5.281734164284802e-05,
	"loss": 0.7871,
	"step": 1386
	},
	{
	"epoch": 0.6679106723650153,
	"grad_norm": 2.8389225006103516,
	"learning_rate": 5.26798660757971e-05,
	"loss": 0.7236,
	"step": 1387
	},
	{
	"epoch": 0.6683922229579246,
	"grad_norm": 2.2241663932800293,
	"learning_rate": 5.2542505648869434e-05,
	"loss": 0.7008,
	"step": 1388
	},
	{
	"epoch": 0.6688737735508337,
	"grad_norm": 1.7703624963760376,
	"learning_rate": 5.240526069629265e-05,
	"loss": 0.7416,
	"step": 1389
	},
	{
	"epoch": 0.6693553241437429,
	"grad_norm": 2.581017017364502,
	"learning_rate": 5.22681315520134e-05,
	"loss": 1.014,
	"step": 1390
	},
	{
	"epoch": 0.669836874736652,
	"grad_norm": 1.8126165866851807,
	"learning_rate": 5.213111854969661e-05,
	"loss": 0.7268,
	"step": 1391
	},
	{
	"epoch": 0.6703184253295612,
	"grad_norm": 2.5835635662078857,
	"learning_rate": 5.199422202272448e-05,
	"loss": 0.6623,
	"step": 1392
	},
	{
	"epoch": 0.6707999759224703,
	"grad_norm": 1.6041021347045898,
	"learning_rate": 5.185744230419589e-05,
	"loss": 0.6665,
	"step": 1393
	},
	{
	"epoch": 0.6712815265153795,
	"grad_norm": 1.2473118305206299,
	"learning_rate": 5.172077972692553e-05,
	"loss": 0.5992,
	"step": 1394
	},
	{
	"epoch": 0.6717630771082886,
	"grad_norm": 2.415090560913086,
	"learning_rate": 5.1584234623442974e-05,
	"loss": 0.7947,
	"step": 1395
	},
	{
	"epoch": 0.6722446277011979,
	"grad_norm": 1.156660795211792,
	"learning_rate": 5.1447807325992025e-05,
	"loss": 0.4969,
	"step": 1396
	},
	{
	"epoch": 0.6727261782941071,
	"grad_norm": 1.7031502723693848,
	"learning_rate": 5.13114981665298e-05,
	"loss": 0.7214,
	"step": 1397
	},
	{
	"epoch": 0.6732077288870162,
	"grad_norm": 1.3150161504745483,
	"learning_rate": 5.117530747672603e-05,
	"loss": 0.6689,
	"step": 1398
	},
	{
	"epoch": 0.6736892794799254,
	"grad_norm": 4.105159282684326,
	"learning_rate": 5.103923558796203e-05,
	"loss": 0.8401,
	"step": 1399
	},
	{
	"epoch": 0.6741708300728345,
	"grad_norm": 1.6519443988800049,
	"learning_rate": 5.090328283133019e-05,
	"loss": 0.603,
	"step": 1400
	},
	{
	"epoch": 0.6746523806657437,
	"grad_norm": 2.9713187217712402,
	"learning_rate": 5.0767449537632986e-05,
	"loss": 0.5331,
	"step": 1401
	},
	{
	"epoch": 0.6751339312586528,
	"grad_norm": 2.6898117065429688,
	"learning_rate": 5.06317360373822e-05,
	"loss": 0.7912,
	"step": 1402
	},
	{
	"epoch": 0.6756154818515621,
	"grad_norm": 2.4432663917541504,
	"learning_rate": 5.049614266079813e-05,
	"loss": 0.5751,
	"step": 1403
	},
	{
	"epoch": 0.6760970324444712,
	"grad_norm": 2.470055103302002,
	"learning_rate": 5.036066973780882e-05,
	"loss": 0.483,
	"step": 1404
	},
	{
	"epoch": 0.6765785830373804,
	"grad_norm": 1.4328922033309937,
	"learning_rate": 5.022531759804918e-05,
	"loss": 0.6776,
	"step": 1405
	},
	{
	"epoch": 0.6770601336302895,
	"grad_norm": 3.8581573963165283,
	"learning_rate": 5.009008657086025e-05,
	"loss": 1.1248,
	"step": 1406
	},
	{
	"epoch": 0.6775416842231987,
	"grad_norm": 2.339750289916992,
	"learning_rate": 4.9954976985288395e-05,
	"loss": 0.644,
	"step": 1407
	},
	{
	"epoch": 0.6780232348161078,
	"grad_norm": 4.777864456176758,
	"learning_rate": 4.981998917008448e-05,
	"loss": 0.6807,
	"step": 1408
	},
	{
	"epoch": 0.678504785409017,
	"grad_norm": 1.9307043552398682,
	"learning_rate": 4.9685123453703e-05,
	"loss": 0.6934,
	"step": 1409
	},
	{
	"epoch": 0.6789863360019261,
	"grad_norm": 2.9564454555511475,
	"learning_rate": 4.955038016430149e-05,
	"loss": 0.6737,
	"step": 1410
	},
	{
	"epoch": 0.6794678865948354,
	"grad_norm": 2.540689468383789,
	"learning_rate": 4.9415759629739455e-05,
	"loss": 0.5258,
	"step": 1411
	},
	{
	"epoch": 0.6799494371877446,
	"grad_norm": 1.9452462196350098,
	"learning_rate": 4.928126217757782e-05,
	"loss": 0.9632,
	"step": 1412
	},
	{
	"epoch": 0.6804309877806537,
	"grad_norm": 2.1210763454437256,
	"learning_rate": 4.914688813507797e-05,
	"loss": 0.981,
	"step": 1413
	},
	{
	"epoch": 0.6809125383735629,
	"grad_norm": 2.85506010055542,
	"learning_rate": 4.901263782920105e-05,
	"loss": 0.6188,
	"step": 1414
	},
	{
	"epoch": 0.681394088966472,
	"grad_norm": 2.3669497966766357,
	"learning_rate": 4.887851158660706e-05,
	"loss": 0.4394,
	"step": 1415
	},
	{
	"epoch": 0.6818756395593812,
	"grad_norm": 1.358422040939331,
	"learning_rate": 4.8744509733654184e-05,
	"loss": 0.6346,
	"step": 1416
	},
	{
	"epoch": 0.6823571901522903,
	"grad_norm": 1.982836127281189,
	"learning_rate": 4.861063259639793e-05,
	"loss": 0.5481,
	"step": 1417
	},
	{
	"epoch": 0.6828387407451996,
	"grad_norm": 2.6741561889648438,
	"learning_rate": 4.847688050059033e-05,
	"loss": 0.7687,
	"step": 1418
	},
	{
	"epoch": 0.6833202913381087,
	"grad_norm": 2.1041994094848633,
	"learning_rate": 4.8343253771679155e-05,
	"loss": 0.6466,
	"step": 1419
	},
	{
	"epoch": 0.6838018419310179,
	"grad_norm": 1.9289573431015015,
	"learning_rate": 4.82097527348072e-05,
	"loss": 0.8381,
	"step": 1420
	},
	{
	"epoch": 0.684283392523927,
	"grad_norm": 1.3552838563919067,
	"learning_rate": 4.8076377714811284e-05,
	"loss": 0.654,
	"step": 1421
	},
	{
	"epoch": 0.6847649431168362,
	"grad_norm": 2.0205588340759277,
	"learning_rate": 4.7943129036221735e-05,
	"loss": 0.6172,
	"step": 1422
	},
	{
	"epoch": 0.6852464937097453,
	"grad_norm": 1.8615128993988037,
	"learning_rate": 4.781000702326142e-05,
	"loss": 0.502,
	"step": 1423
	},
	{
	"epoch": 0.6857280443026545,
	"grad_norm": 3.413642406463623,
	"learning_rate": 4.767701199984497e-05,
	"loss": 0.7401,
	"step": 1424
	},
	{
	"epoch": 0.6862095948955638,
	"grad_norm": 1.818594217300415,
	"learning_rate": 4.7544144289578066e-05,
	"loss": 0.3476,
	"step": 1425
	},
	{
	"epoch": 0.6866911454884729,
	"grad_norm": 2.0355210304260254,
	"learning_rate": 4.7411404215756594e-05,
	"loss": 0.6143,
	"step": 1426
	},
	{
	"epoch": 0.6871726960813821,
	"grad_norm": 2.3590846061706543,
	"learning_rate": 4.7278792101365866e-05,
	"loss": 0.7234,
	"step": 1427
	},
	{
	"epoch": 0.6876542466742912,
	"grad_norm": 4.059364318847656,
	"learning_rate": 4.714630826907985e-05,
	"loss": 0.8293,
	"step": 1428
	},
	{
	"epoch": 0.6881357972672004,
	"grad_norm": 5.450283050537109,
	"learning_rate": 4.701395304126038e-05,
	"loss": 0.9174,
	"step": 1429
	},
	{
	"epoch": 0.6886173478601095,
	"grad_norm": 1.6425899267196655,
	"learning_rate": 4.6881726739956375e-05,
	"loss": 0.482,
	"step": 1430
	},
	{
	"epoch": 0.6890988984530187,
	"grad_norm": 1.5643469095230103,
	"learning_rate": 4.6749629686902984e-05,
	"loss": 0.8827,
	"step": 1431
	},
	{
	"epoch": 0.6895804490459279,
	"grad_norm": 4.321664333343506,
	"learning_rate": 4.661766220352097e-05,
	"loss": 0.5119,
	"step": 1432
	},
	{
	"epoch": 0.6900619996388371,
	"grad_norm": 3.4666759967803955,
	"learning_rate": 4.64858246109157e-05,
	"loss": 1.1991,
	"step": 1433
	},
	{
	"epoch": 0.6905435502317462,
	"grad_norm": 4.904367923736572,
	"learning_rate": 4.63541172298766e-05,
	"loss": 0.8087,
	"step": 1434
	},
	{
	"epoch": 0.6910251008246554,
	"grad_norm": 2.414597272872925,
	"learning_rate": 4.622254038087622e-05,
	"loss": 0.9569,
	"step": 1435
	},
	{
	"epoch": 0.6915066514175645,
	"grad_norm": 2.7692532539367676,
	"learning_rate": 4.60910943840695e-05,
	"loss": 0.705,
	"step": 1436
	},
	{
	"epoch": 0.6919882020104737,
	"grad_norm": 1.4934767484664917,
	"learning_rate": 4.5959779559292985e-05,
	"loss": 0.3923,
	"step": 1437
	},
	{
	"epoch": 0.6924697526033828,
	"grad_norm": 1.7524209022521973,
	"learning_rate": 4.582859622606406e-05,
	"loss": 0.4808,
	"step": 1438
	},
	{
	"epoch": 0.6929513031962921,
	"grad_norm": 1.5376161336898804,
	"learning_rate": 4.569754470358014e-05,
	"loss": 0.7108,
	"step": 1439
	},
	{
	"epoch": 0.6934328537892013,
	"grad_norm": 2.8887248039245605,
	"learning_rate": 4.556662531071796e-05,
	"loss": 0.7365,
	"step": 1440
	},
	{
	"epoch": 0.6939144043821104,
	"grad_norm": 1.2393244504928589,
	"learning_rate": 4.54358383660327e-05,
	"loss": 0.6929,
	"step": 1441
	},
	{
	"epoch": 0.6943959549750196,
	"grad_norm": 1.984318494796753,
	"learning_rate": 4.530518418775733e-05,
	"loss": 0.5759,
	"step": 1442
	},
	{
	"epoch": 0.6948775055679287,
	"grad_norm": 3.4806067943573,
	"learning_rate": 4.5174663093801674e-05,
	"loss": 0.963,
	"step": 1443
	},
	{
	"epoch": 0.6953590561608379,
	"grad_norm": 0.9544948935508728,
	"learning_rate": 4.504427540175181e-05,
	"loss": 0.4253,
	"step": 1444
	},
	{
	"epoch": 0.695840606753747,
	"grad_norm": 1.5310953855514526,
	"learning_rate": 4.491402142886922e-05,
	"loss": 0.6396,
	"step": 1445
	},
	{
	"epoch": 0.6963221573466563,
	"grad_norm": 1.5412194728851318,
	"learning_rate": 4.4783901492089984e-05,
	"loss": 0.8048,
	"step": 1446
	},
	{
	"epoch": 0.6968037079395654,
	"grad_norm": 0.9687153100967407,
	"learning_rate": 4.465391590802407e-05,
	"loss": 0.3689,
	"step": 1447
	},
	{
	"epoch": 0.6972852585324746,
	"grad_norm": 2.8478314876556396,
	"learning_rate": 4.4524064992954516e-05,
	"loss": 0.6788,
	"step": 1448
	},
	{
	"epoch": 0.6977668091253837,
	"grad_norm": 4.530721187591553,
	"learning_rate": 4.4394349062836736e-05,
	"loss": 0.7302,
	"step": 1449
	},
	{
	"epoch": 0.6982483597182929,
	"grad_norm": 2.6478817462921143,
	"learning_rate": 4.4264768433297565e-05,
	"loss": 0.8899,
	"step": 1450
	},
	{
	"epoch": 0.698729910311202,
	"grad_norm": 2.0213184356689453,
	"learning_rate": 4.4135323419634766e-05,
	"loss": 0.9633,
	"step": 1451
	},
	{
	"epoch": 0.6992114609041112,
	"grad_norm": 3.5041468143463135,
	"learning_rate": 4.4006014336816035e-05,
	"loss": 1.0225,
	"step": 1452
	},
	{
	"epoch": 0.6996930114970205,
	"grad_norm": 1.1038386821746826,
	"learning_rate": 4.387684149947837e-05,
	"loss": 0.3247,
	"step": 1453
	},
	{
	"epoch": 0.7001745620899296,
	"grad_norm": 2.288525342941284,
	"learning_rate": 4.374780522192726e-05,
	"loss": 1.0528,
	"step": 1454
	},
	{
	"epoch": 0.7006561126828388,
	"grad_norm": 4.361599922180176,
	"learning_rate": 4.3618905818135805e-05,
	"loss": 0.5694,
	"step": 1455
	},
	{
	"epoch": 0.7011376632757479,
	"grad_norm": 2.766280174255371,
	"learning_rate": 4.349014360174417e-05,
	"loss": 0.5461,
	"step": 1456
	},
	{
	"epoch": 0.7016192138686571,
	"grad_norm": 2.252498149871826,
	"learning_rate": 4.336151888605871e-05,
	"loss": 0.6858,
	"step": 1457
	},
	{
	"epoch": 0.7021007644615662,
	"grad_norm": 2.4805188179016113,
	"learning_rate": 4.323303198405117e-05,
	"loss": 0.9368,
	"step": 1458
	},
	{
	"epoch": 0.7025823150544754,
	"grad_norm": 3.281759738922119,
	"learning_rate": 4.310468320835796e-05,
	"loss": 0.9059,
	"step": 1459
	},
	{
	"epoch": 0.7030638656473845,
	"grad_norm": 1.8752919435501099,
	"learning_rate": 4.297647287127946e-05,
	"loss": 0.3884,
	"step": 1460
	},
	{
	"epoch": 0.7035454162402938,
	"grad_norm": 2.1307055950164795,
	"learning_rate": 4.284840128477913e-05,
	"loss": 0.8951,
	"step": 1461
	},
	{
	"epoch": 0.7040269668332029,
	"grad_norm": 1.1077980995178223,
	"learning_rate": 4.2720468760482854e-05,
	"loss": 0.5871,
	"step": 1462
	},
	{
	"epoch": 0.7045085174261121,
	"grad_norm": 1.9439555406570435,
	"learning_rate": 4.2592675609678135e-05,
	"loss": 0.5813,
	"step": 1463
	},
	{
	"epoch": 0.7049900680190212,
	"grad_norm": 2.4993746280670166,
	"learning_rate": 4.24650221433134e-05,
	"loss": 0.671,
	"step": 1464
	},
	{
	"epoch": 0.7054716186119304,
	"grad_norm": 2.181760787963867,
	"learning_rate": 4.2337508671997086e-05,
	"loss": 0.4199,
	"step": 1465
	},
	{
	"epoch": 0.7059531692048396,
	"grad_norm": 2.025681495666504,
	"learning_rate": 4.221013550599707e-05,
	"loss": 0.4202,
	"step": 1466
	},
	{
	"epoch": 0.7064347197977487,
	"grad_norm": 3.3956892490386963,
	"learning_rate": 4.208290295523984e-05,
	"loss": 0.8027,
	"step": 1467
	},
	{
	"epoch": 0.706916270390658,
	"grad_norm": 2.7422797679901123,
	"learning_rate": 4.1955811329309746e-05,
	"loss": 1.2046,
	"step": 1468
	},
	{
	"epoch": 0.7073978209835671,
	"grad_norm": 2.5726048946380615,
	"learning_rate": 4.182886093744813e-05,
	"loss": 1.1736,
	"step": 1469
	},
	{
	"epoch": 0.7078793715764763,
	"grad_norm": 2.2286531925201416,
	"learning_rate": 4.170205208855281e-05,
	"loss": 0.5392,
	"step": 1470
	},
	{
	"epoch": 0.7083609221693854,
	"grad_norm": 1.7045278549194336,
	"learning_rate": 4.157538509117714e-05,
	"loss": 0.7592,
	"step": 1471
	},
	{
	"epoch": 0.7088424727622946,
	"grad_norm": 3.2489068508148193,
	"learning_rate": 4.144886025352934e-05,
	"loss": 0.6095,
	"step": 1472
	},
	{
	"epoch": 0.7093240233552037,
	"grad_norm": 2.089620351791382,
	"learning_rate": 4.13224778834717e-05,
	"loss": 0.4248,
	"step": 1473
	},
	{
	"epoch": 0.7098055739481129,
	"grad_norm": 1.5423088073730469,
	"learning_rate": 4.1196238288519874e-05,
	"loss": 0.2669,
	"step": 1474
	},
	{
	"epoch": 0.710287124541022,
	"grad_norm": 1.412553071975708,
	"learning_rate": 4.107014177584211e-05,
	"loss": 0.3754,
	"step": 1475
	},
	{
	"epoch": 0.7107686751339313,
	"grad_norm": 1.9968864917755127,
	"learning_rate": 4.094418865225853e-05,
	"loss": 0.5111,
	"step": 1476
	},
	{
	"epoch": 0.7112502257268404,
	"grad_norm": 2.4750101566314697,
	"learning_rate": 4.081837922424027e-05,
	"loss": 0.6448,
	"step": 1477
	},
	{
	"epoch": 0.7117317763197496,
	"grad_norm": 2.239208221435547,
	"learning_rate": 4.069271379790891e-05,
	"loss": 0.4287,
	"step": 1478
	},
	{
	"epoch": 0.7122133269126587,
	"grad_norm": 7.368317604064941,
	"learning_rate": 4.0567192679035636e-05,
	"loss": 1.0325,
	"step": 1479
	},
	{
	"epoch": 0.7126948775055679,
	"grad_norm": 1.8279064893722534,
	"learning_rate": 4.044181617304048e-05,
	"loss": 0.2988,
	"step": 1480
	},
	{
	"epoch": 0.7131764280984771,
	"grad_norm": 1.781420111656189,
	"learning_rate": 4.03165845849916e-05,
	"loss": 0.5195,
	"step": 1481
	},
	{
	"epoch": 0.7136579786913863,
	"grad_norm": 4.007209777832031,
	"learning_rate": 4.019149821960455e-05,
	"loss": 0.6838,
	"step": 1482
	},
	{
	"epoch": 0.7141395292842955,
	"grad_norm": 2.4389944076538086,
	"learning_rate": 4.006655738124152e-05,
	"loss": 0.773,
	"step": 1483
	},
	{
	"epoch": 0.7146210798772046,
	"grad_norm": 1.686699628829956,
	"learning_rate": 3.9941762373910586e-05,
	"loss": 0.3119,
	"step": 1484
	},
	{
	"epoch": 0.7151026304701138,
	"grad_norm": 3.3148396015167236,
	"learning_rate": 3.9817113501265016e-05,
	"loss": 0.5543,
	"step": 1485
	},
	{
	"epoch": 0.7155841810630229,
	"grad_norm": 1.4443938732147217,
	"learning_rate": 3.9692611066602516e-05,
	"loss": 0.6294,
	"step": 1486
	},
	{
	"epoch": 0.7160657316559321,
	"grad_norm": 1.5491441488265991,
	"learning_rate": 3.956825537286436e-05,
	"loss": 0.312,
	"step": 1487
	},
	{
	"epoch": 0.7165472822488412,
	"grad_norm": 2.059058427810669,
	"learning_rate": 3.944404672263494e-05,
	"loss": 0.7961,
	"step": 1488
	},
	{
	"epoch": 0.7170288328417505,
	"grad_norm": 2.7234976291656494,
	"learning_rate": 3.931998541814069e-05,
	"loss": 0.7906,
	"step": 1489
	},
	{
	"epoch": 0.7175103834346596,
	"grad_norm": 3.2691609859466553,
	"learning_rate": 3.919607176124966e-05,
	"loss": 0.9895,
	"step": 1490
	},
	{
	"epoch": 0.7179919340275688,
	"grad_norm": 4.118697166442871,
	"learning_rate": 3.9072306053470566e-05,
	"loss": 0.5686,
	"step": 1491
	},
	{
	"epoch": 0.7184734846204779,
	"grad_norm": 0.7744289040565491,
	"learning_rate": 3.8948688595952164e-05,
	"loss": 0.3498,
	"step": 1492
	},
	{
	"epoch": 0.7189550352133871,
	"grad_norm": 2.5896894931793213,
	"learning_rate": 3.882521968948246e-05,
	"loss": 0.9491,
	"step": 1493
	},
	{
	"epoch": 0.7194365858062963,
	"grad_norm": 3.2697811126708984,
	"learning_rate": 3.8701899634488014e-05,
	"loss": 0.6669,
	"step": 1494
	},
	{
	"epoch": 0.7199181363992054,
	"grad_norm": 1.5220059156417847,
	"learning_rate": 3.857872873103322e-05,
	"loss": 0.5544,
	"step": 1495
	},
	{
	"epoch": 0.7203996869921147,
	"grad_norm": 2.07112717628479,
	"learning_rate": 3.8455707278819507e-05,
	"loss": 0.5567,
	"step": 1496
	},
	{
	"epoch": 0.7208812375850238,
	"grad_norm": 2.2222588062286377,
	"learning_rate": 3.833283557718471e-05,
	"loss": 0.4158,
	"step": 1497
	},
	{
	"epoch": 0.721362788177933,
	"grad_norm": 2.1287026405334473,
	"learning_rate": 3.821011392510228e-05,
	"loss": 0.4907,
	"step": 1498
	},
	{
	"epoch": 0.7218443387708421,
	"grad_norm": 3.9196484088897705,
	"learning_rate": 3.808754262118046e-05,
	"loss": 0.3054,
	"step": 1499
	},
	{
	"epoch": 0.7223258893637513,
	"grad_norm": 1.4229110479354858,
	"learning_rate": 3.796512196366182e-05,
	"loss": 0.7671,
	"step": 1500
	},
	{
	"epoch": 0.7228074399566604,
	"grad_norm": 1.968087911605835,
	"learning_rate": 3.784285225042229e-05,
	"loss": 0.9449,
	"step": 1501
	},
	{
	"epoch": 0.7232889905495696,
	"grad_norm": 3.0822694301605225,
	"learning_rate": 3.772073377897052e-05,
	"loss": 0.8556,
	"step": 1502
	},
	{
	"epoch": 0.7237705411424787,
	"grad_norm": 2.186847686767578,
	"learning_rate": 3.7598766846447184e-05,
	"loss": 0.364,
	"step": 1503
	},
	{
	"epoch": 0.724252091735388,
	"grad_norm": 2.3544540405273438,
	"learning_rate": 3.747695174962423e-05,
	"loss": 0.9461,
	"step": 1504
	},
	{
	"epoch": 0.7247336423282971,
	"grad_norm": 2.437185525894165,
	"learning_rate": 3.7355288784904116e-05,
	"loss": 0.6636,
	"step": 1505
	},
	{
	"epoch": 0.7252151929212063,
	"grad_norm": 4.94728422164917,
	"learning_rate": 3.7233778248319176e-05,
	"loss": 0.4558,
	"step": 1506
	},
	{
	"epoch": 0.7256967435141155,
	"grad_norm": 1.2178354263305664,
	"learning_rate": 3.7112420435530845e-05,
	"loss": 0.4775,
	"step": 1507
	},
	{
	"epoch": 0.7261782941070246,
	"grad_norm": 2.4430301189422607,
	"learning_rate": 3.69912156418289e-05,
	"loss": 0.385,
	"step": 1508
	},
	{
	"epoch": 0.7266598446999338,
	"grad_norm": 1.444451093673706,
	"learning_rate": 3.687016416213084e-05,
	"loss": 0.5812,
	"step": 1509
	},
	{
	"epoch": 0.7271413952928429,
	"grad_norm": 1.2562270164489746,
	"learning_rate": 3.674926629098113e-05,
	"loss": 0.2545,
	"step": 1510
	},
	{
	"epoch": 0.7276229458857522,
	"grad_norm": 3.6411561965942383,
	"learning_rate": 3.6628522322550394e-05,
	"loss": 0.4228,
	"step": 1511
	},
	{
	"epoch": 0.7281044964786613,
	"grad_norm": 2.335496425628662,
	"learning_rate": 3.6507932550634846e-05,
	"loss": 0.4863,
	"step": 1512
	},
	{
	"epoch": 0.7285860470715705,
	"grad_norm": 2.558220386505127,
	"learning_rate": 3.638749726865552e-05,
	"loss": 0.31,
	"step": 1513
	},
	{
	"epoch": 0.7290675976644796,
	"grad_norm": 1.8757025003433228,
	"learning_rate": 3.6267216769657485e-05,
	"loss": 0.722,
	"step": 1514
	},
	{
	"epoch": 0.7295491482573888,
	"grad_norm": 1.8420372009277344,
	"learning_rate": 3.6147091346309224e-05,
	"loss": 0.7818,
	"step": 1515
	},
	{
	"epoch": 0.7300306988502979,
	"grad_norm": 3.078178644180298,
	"learning_rate": 3.602712129090189e-05,
	"loss": 0.5627,
	"step": 1516
	},
	{
	"epoch": 0.7305122494432071,
	"grad_norm": 1.5147814750671387,
	"learning_rate": 3.590730689534857e-05,
	"loss": 0.6291,
	"step": 1517
	},
	{
	"epoch": 0.7309938000361162,
	"grad_norm": 1.8767939805984497,
	"learning_rate": 3.578764845118362e-05,
	"loss": 0.3796,
	"step": 1518
	},
	{
	"epoch": 0.7314753506290255,
	"grad_norm": 3.034921646118164,
	"learning_rate": 3.566814624956194e-05,
	"loss": 0.5662,
	"step": 1519
	},
	{
	"epoch": 0.7319569012219346,
	"grad_norm": 0.8887065649032593,
	"learning_rate": 3.554880058125819e-05,
	"loss": 0.4554,
	"step": 1520
	},
	{
	"epoch": 0.7324384518148438,
	"grad_norm": 3.390536308288574,
	"learning_rate": 3.5429611736666235e-05,
	"loss": 0.543,
	"step": 1521
	},
	{
	"epoch": 0.732920002407753,
	"grad_norm": 1.751465082168579,
	"learning_rate": 3.53105800057983e-05,
	"loss": 0.4838,
	"step": 1522
	},
	{
	"epoch": 0.7334015530006621,
	"grad_norm": 1.9569932222366333,
	"learning_rate": 3.519170567828435e-05,
	"loss": 0.557,
	"step": 1523
	},
	{
	"epoch": 0.7338831035935713,
	"grad_norm": 1.79653799533844,
	"learning_rate": 3.507298904337134e-05,
	"loss": 0.7246,
	"step": 1524
	},
	{
	"epoch": 0.7343646541864804,
	"grad_norm": 1.8159929513931274,
	"learning_rate": 3.495443038992253e-05,
	"loss": 0.3555,
	"step": 1525
	},
	{
	"epoch": 0.7348462047793897,
	"grad_norm": 3.199507474899292,
	"learning_rate": 3.4836030006416775e-05,
	"loss": 0.5046,
	"step": 1526
	},
	{
	"epoch": 0.7353277553722988,
	"grad_norm": 2.1682991981506348,
	"learning_rate": 3.471778818094785e-05,
	"loss": 0.7456,
	"step": 1527
	},
	{
	"epoch": 0.735809305965208,
	"grad_norm": 1.0989952087402344,
	"learning_rate": 3.459970520122364e-05,
	"loss": 0.5804,
	"step": 1528
	},
	{
	"epoch": 0.7362908565581171,
	"grad_norm": 1.0416721105575562,
	"learning_rate": 3.44817813545656e-05,
	"loss": 0.3529,
	"step": 1529
	},
	{
	"epoch": 0.7367724071510263,
	"grad_norm": 4.703090667724609,
	"learning_rate": 3.4364016927907974e-05,
	"loss": 0.5961,
	"step": 1530
	},
	{
	"epoch": 0.7372539577439354,
	"grad_norm": 2.4348576068878174,
	"learning_rate": 3.424641220779711e-05,
	"loss": 1.0376,
	"step": 1531
	},
	{
	"epoch": 0.7377355083368446,
	"grad_norm": 1.9386546611785889,
	"learning_rate": 3.412896748039067e-05,
	"loss": 0.5634,
	"step": 1532
	},
	{
	"epoch": 0.7382170589297538,
	"grad_norm": 1.232994556427002,
	"learning_rate": 3.401168303145713e-05,
	"loss": 0.4579,
	"step": 1533
	},
	{
	"epoch": 0.738698609522663,
	"grad_norm": 1.5528676509857178,
	"learning_rate": 3.3894559146374924e-05,
	"loss": 0.4419,
	"step": 1534
	},
	{
	"epoch": 0.7391801601155722,
	"grad_norm": 2.1310155391693115,
	"learning_rate": 3.37775961101318e-05,
	"loss": 0.5981,
	"step": 1535
	},
	{
	"epoch": 0.7396617107084813,
	"grad_norm": 3.4211032390594482,
	"learning_rate": 3.366079420732413e-05,
	"loss": 1.0065,
	"step": 1536
	},
	{
	"epoch": 0.7401432613013905,
	"grad_norm": 2.1246776580810547,
	"learning_rate": 3.3544153722156216e-05,
	"loss": 0.4723,
	"step": 1537
	},
	{
	"epoch": 0.7406248118942996,
	"grad_norm": 3.7902305126190186,
	"learning_rate": 3.3427674938439594e-05,
	"loss": 0.7686,
	"step": 1538
	},
	{
	"epoch": 0.7411063624872088,
	"grad_norm": 1.1903222799301147,
	"learning_rate": 3.3311358139592317e-05,
	"loss": 0.425,
	"step": 1539
	},
	{
	"epoch": 0.741587913080118,
	"grad_norm": 2.2395949363708496,
	"learning_rate": 3.319520360863837e-05,
	"loss": 0.511,
	"step": 1540
	},
	{
	"epoch": 0.7420694636730272,
	"grad_norm": 3.4029898643493652,
	"learning_rate": 3.3079211628206854e-05,
	"loss": 0.3296,
	"step": 1541
	},
	{
	"epoch": 0.7425510142659363,
	"grad_norm": 2.042520523071289,
	"learning_rate": 3.296338248053129e-05,
	"loss": 0.3447,
	"step": 1542
	},
	{
	"epoch": 0.7430325648588455,
	"grad_norm": 1.8394999504089355,
	"learning_rate": 3.2847716447449096e-05,
	"loss": 0.8341,
	"step": 1543
	},
	{
	"epoch": 0.7435141154517546,
	"grad_norm": 1.8122001886367798,
	"learning_rate": 3.2732213810400745e-05,
	"loss": 0.5026,
	"step": 1544
	},
	{
	"epoch": 0.7439956660446638,
	"grad_norm": 1.094519853591919,
	"learning_rate": 3.261687485042915e-05,
	"loss": 0.3282,
	"step": 1545
	},
	{
	"epoch": 0.7444772166375729,
	"grad_norm": 1.8451745510101318,
	"learning_rate": 3.250169984817897e-05,
	"loss": 0.598,
	"step": 1546
	},
	{
	"epoch": 0.7449587672304822,
	"grad_norm": 2.1285202503204346,
	"learning_rate": 3.238668908389586e-05,
	"loss": 0.5509,
	"step": 1547
	},
	{
	"epoch": 0.7454403178233914,
	"grad_norm": 5.398340702056885,
	"learning_rate": 3.227184283742591e-05,
	"loss": 0.8054,
	"step": 1548
	},
	{
	"epoch": 0.7459218684163005,
	"grad_norm": 4.164507865905762,
	"learning_rate": 3.215716138821488e-05,
	"loss": 0.6616,
	"step": 1549
	},
	{
	"epoch": 0.7464034190092097,
	"grad_norm": 2.449247360229492,
	"learning_rate": 3.204264501530756e-05,
	"loss": 0.5928,
	"step": 1550
	},
	{
	"epoch": 0.7468849696021188,
	"grad_norm": 2.73966121673584,
	"learning_rate": 3.192829399734706e-05,
	"loss": 0.8957,
	"step": 1551
	},
	{
	"epoch": 0.747366520195028,
	"grad_norm": 1.7563470602035522,
	"learning_rate": 3.181410861257413e-05,
	"loss": 0.7951,
	"step": 1552
	},
	{
	"epoch": 0.7478480707879371,
	"grad_norm": 2.797558307647705,
	"learning_rate": 3.170008913882656e-05,
	"loss": 0.4921,
	"step": 1553
	},
	{
	"epoch": 0.7483296213808464,
	"grad_norm": 0.7847899794578552,
	"learning_rate": 3.1586235853538325e-05,
	"loss": 0.6741,
	"step": 1554
	},
	{
	"epoch": 0.7488111719737555,
	"grad_norm": 3.5709075927734375,
	"learning_rate": 3.1472549033739126e-05,
	"loss": 0.3847,
	"step": 1555
	},
	{
	"epoch": 0.7492927225666647,
	"grad_norm": 2.81365704536438,
	"learning_rate": 3.1359028956053615e-05,
	"loss": 0.4534,
	"step": 1556
	},
	{
	"epoch": 0.7497742731595738,
	"grad_norm": 2.762085199356079,
	"learning_rate": 3.1245675896700685e-05,
	"loss": 1.0397,
	"step": 1557
	},
	{
	"epoch": 0.750255823752483,
	"grad_norm": 1.423474907875061,
	"learning_rate": 3.113249013149284e-05,
	"loss": 0.3242,
	"step": 1558
	},
	{
	"epoch": 0.7507373743453921,
	"grad_norm": 2.8285672664642334,
	"learning_rate": 3.101947193583557e-05,
	"loss": 0.7633,
	"step": 1559
	},
	{
	"epoch": 0.7512189249383013,
	"grad_norm": 1.8334670066833496,
	"learning_rate": 3.0906621584726546e-05,
	"loss": 0.9668,
	"step": 1560
	},
	{
	"epoch": 0.7517004755312104,
	"grad_norm": 4.472631454467773,
	"learning_rate": 3.079393935275513e-05,
	"loss": 0.752,
	"step": 1561
	},
	{
	"epoch": 0.7521820261241197,
	"grad_norm": 1.2868741750717163,
	"learning_rate": 3.068142551410155e-05,
	"loss": 0.2786,
	"step": 1562
	},
	{
	"epoch": 0.7526635767170289,
	"grad_norm": 1.9108686447143555,
	"learning_rate": 3.0569080342536347e-05,
	"loss": 0.4188,
	"step": 1563
	},
	{
	"epoch": 0.753145127309938,
	"grad_norm": 3.5402438640594482,
	"learning_rate": 3.0456904111419572e-05,
	"loss": 0.7682,
	"step": 1564
	},
	{
	"epoch": 0.7536266779028472,
	"grad_norm": 2.0236620903015137,
	"learning_rate": 3.034489709370033e-05,
	"loss": 0.4578,
	"step": 1565
	},
	{
	"epoch": 0.7541082284957563,
	"grad_norm": 1.6213475465774536,
	"learning_rate": 3.0233059561915855e-05,
	"loss": 0.625,
	"step": 1566
	},
	{
	"epoch": 0.7545897790886655,
	"grad_norm": 2.4623217582702637,
	"learning_rate": 3.01213917881911e-05,
	"loss": 0.6995,
	"step": 1567
	},
	{
	"epoch": 0.7550713296815746,
	"grad_norm": 7.693182468414307,
	"learning_rate": 3.0009894044237907e-05,
	"loss": 0.5778,
	"step": 1568
	},
	{
	"epoch": 0.7555528802744839,
	"grad_norm": 1.2127306461334229,
	"learning_rate": 2.9898566601354418e-05,
	"loss": 0.7593,
	"step": 1569
	},
	{
	"epoch": 0.756034430867393,
	"grad_norm": 2.333127021789551,
	"learning_rate": 2.9787409730424374e-05,
	"loss": 0.5861,
	"step": 1570
	},
	{
	"epoch": 0.7565159814603022,
	"grad_norm": 2.391322612762451,
	"learning_rate": 2.96764237019165e-05,
	"loss": 0.6207,
	"step": 1571
	},
	{
	"epoch": 0.7569975320532113,
	"grad_norm": 2.2498581409454346,
	"learning_rate": 2.9565608785883815e-05,
	"loss": 0.7908,
	"step": 1572
	},
	{
	"epoch": 0.7574790826461205,
	"grad_norm": 1.9790698289871216,
	"learning_rate": 2.9454965251962973e-05,
	"loss": 0.9975,
	"step": 1573
	},
	{
	"epoch": 0.7579606332390296,
	"grad_norm": 1.974587321281433,
	"learning_rate": 2.9344493369373637e-05,
	"loss": 0.4825,
	"step": 1574
	},
	{
	"epoch": 0.7584421838319388,
	"grad_norm": 4.309758186340332,
	"learning_rate": 2.9234193406917833e-05,
	"loss": 0.6722,
	"step": 1575
	},
	{
	"epoch": 0.7589237344248481,
	"grad_norm": 8.978368759155273,
	"learning_rate": 2.912406563297916e-05,
	"loss": 1.2436,
	"step": 1576
	},
	{
	"epoch": 0.7594052850177572,
	"grad_norm": 2.3713653087615967,
	"learning_rate": 2.901411031552236e-05,
	"loss": 0.6931,
	"step": 1577
	},
	{
	"epoch": 0.7598868356106664,
	"grad_norm": 2.390103340148926,
	"learning_rate": 2.8904327722092495e-05,
	"loss": 0.7875,
	"step": 1578
	},
	{
	"epoch": 0.7603683862035755,
	"grad_norm": 2.7292089462280273,
	"learning_rate": 2.879471811981437e-05,
	"loss": 0.9226,
	"step": 1579
	},
	{
	"epoch": 0.7608499367964847,
	"grad_norm": 1.6569634675979614,
	"learning_rate": 2.868528177539187e-05,
	"loss": 0.9006,
	"step": 1580
	},
	{
	"epoch": 0.7613314873893938,
	"grad_norm": 2.9300882816314697,
	"learning_rate": 2.8576018955107285e-05,
	"loss": 0.7848,
	"step": 1581
	},
	{
	"epoch": 0.761813037982303,
	"grad_norm": 3.6729085445404053,
	"learning_rate": 2.8466929924820705e-05,
	"loss": 0.9429,
	"step": 1582
	},
	{
	"epoch": 0.7622945885752122,
	"grad_norm": 2.4138500690460205,
	"learning_rate": 2.8358014949969334e-05,
	"loss": 0.8423,
	"step": 1583
	},
	{
	"epoch": 0.7627761391681214,
	"grad_norm": 2.518306255340576,
	"learning_rate": 2.8249274295566864e-05,
	"loss": 0.5066,
	"step": 1584
	},
	{
	"epoch": 0.7632576897610305,
	"grad_norm": 2.244164228439331,
	"learning_rate": 2.8140708226202884e-05,
	"loss": 0.5005,
	"step": 1585
	},
	{
	"epoch": 0.7637392403539397,
	"grad_norm": 2.3682243824005127,
	"learning_rate": 2.803231700604204e-05,
	"loss": 0.5431,
	"step": 1586
	},
	{
	"epoch": 0.7642207909468488,
	"grad_norm": 2.6888134479522705,
	"learning_rate": 2.7924100898823702e-05,
	"loss": 0.6596,
	"step": 1587
	},
	{
	"epoch": 0.764702341539758,
	"grad_norm": 2.3632571697235107,
	"learning_rate": 2.7816060167861002e-05,
	"loss": 0.6924,
	"step": 1588
	},
	{
	"epoch": 0.7651838921326672,
	"grad_norm": 1.6576805114746094,
	"learning_rate": 2.7708195076040445e-05,
	"loss": 0.5694,
	"step": 1589
	},
	{
	"epoch": 0.7656654427255764,
	"grad_norm": 1.40910005569458,
	"learning_rate": 2.760050588582114e-05,
	"loss": 0.6316,
	"step": 1590
	},
	{
	"epoch": 0.7661469933184856,
	"grad_norm": 4.426036834716797,
	"learning_rate": 2.749299285923417e-05,
	"loss": 0.7936,
	"step": 1591
	},
	{
	"epoch": 0.7666285439113947,
	"grad_norm": 2.89945387840271,
	"learning_rate": 2.7385656257881997e-05,
	"loss": 0.4305,
	"step": 1592
	},
	{
	"epoch": 0.7671100945043039,
	"grad_norm": 7.421449184417725,
	"learning_rate": 2.7278496342937788e-05,
	"loss": 0.4538,
	"step": 1593
	},
	{
	"epoch": 0.767591645097213,
	"grad_norm": 1.8523188829421997,
	"learning_rate": 2.717151337514482e-05,
	"loss": 0.4911,
	"step": 1594
	},
	{
	"epoch": 0.7680731956901222,
	"grad_norm": 1.7590643167495728,
	"learning_rate": 2.7064707614815776e-05,
	"loss": 0.5798,
	"step": 1595
	},
	{
	"epoch": 0.7685547462830313,
	"grad_norm": 2.223667860031128,
	"learning_rate": 2.6958079321832185e-05,
	"loss": 0.4897,
	"step": 1596
	},
	{
	"epoch": 0.7690362968759406,
	"grad_norm": 2.7905259132385254,
	"learning_rate": 2.6851628755643776e-05,
	"loss": 0.5537,
	"step": 1597
	},
	{
	"epoch": 0.7695178474688497,
	"grad_norm": 4.461797714233398,
	"learning_rate": 2.6745356175267765e-05,
	"loss": 0.4501,
	"step": 1598
	},
	{
	"epoch": 0.7699993980617589,
	"grad_norm": 4.621416091918945,
	"learning_rate": 2.6639261839288343e-05,
	"loss": 0.5464,
	"step": 1599
	},
	{
	"epoch": 0.770480948654668,
	"grad_norm": 6.821358680725098,
	"learning_rate": 2.6533346005855987e-05,
	"loss": 0.824,
	"step": 1600
	},
	{
	"epoch": 0.7709624992475772,
	"grad_norm": 1.811643123626709,
	"learning_rate": 2.6427608932686843e-05,
	"loss": 0.43,
	"step": 1601
	},
	{
	"epoch": 0.7714440498404863,
	"grad_norm": 4.649533271789551,
	"learning_rate": 2.6322050877062064e-05,
	"loss": 0.7568,
	"step": 1602
	},
	{
	"epoch": 0.7719256004333955,
	"grad_norm": 4.929988861083984,
	"learning_rate": 2.6216672095827266e-05,
	"loss": 0.8517,
	"step": 1603
	},
	{
	"epoch": 0.7724071510263048,
	"grad_norm": 1.7828702926635742,
	"learning_rate": 2.6111472845391827e-05,
	"loss": 0.5364,
	"step": 1604
	},
	{
	"epoch": 0.7728887016192139,
	"grad_norm": 0.7335327863693237,
	"learning_rate": 2.6006453381728236e-05,
	"loss": 0.306,
	"step": 1605
	},
	{
	"epoch": 0.7733702522121231,
	"grad_norm": 2.5753631591796875,
	"learning_rate": 2.5901613960371585e-05,
	"loss": 0.3027,
	"step": 1606
	},
	{
	"epoch": 0.7738518028050322,
	"grad_norm": 2.3606786727905273,
	"learning_rate": 2.5796954836418884e-05,
	"loss": 0.6378,
	"step": 1607
	},
	{
	"epoch": 0.7743333533979414,
	"grad_norm": 2.3741252422332764,
	"learning_rate": 2.569247626452842e-05,
	"loss": 0.7027,
	"step": 1608
	},
	{
	"epoch": 0.7748149039908505,
	"grad_norm": 3.609069347381592,
	"learning_rate": 2.558817849891918e-05,
	"loss": 0.5579,
	"step": 1609
	},
	{
	"epoch": 0.7752964545837597,
	"grad_norm": 3.4135854244232178,
	"learning_rate": 2.548406179337015e-05,
	"loss": 0.6868,
	"step": 1610
	},
	{
	"epoch": 0.7757780051766688,
	"grad_norm": 1.7354921102523804,
	"learning_rate": 2.5380126401219807e-05,
	"loss": 0.4444,
	"step": 1611
	},
	{
	"epoch": 0.7762595557695781,
	"grad_norm": 3.9232022762298584,
	"learning_rate": 2.527637257536547e-05,
	"loss": 0.5952,
	"step": 1612
	},
	{
	"epoch": 0.7767411063624872,
	"grad_norm": 3.5635814666748047,
	"learning_rate": 2.517280056826262e-05,
	"loss": 0.6536,
	"step": 1613
	},
	{
	"epoch": 0.7772226569553964,
	"grad_norm": 2.9686269760131836,
	"learning_rate": 2.5069410631924385e-05,
	"loss": 0.8749,
	"step": 1614
	},
	{
	"epoch": 0.7777042075483055,
	"grad_norm": 3.6469061374664307,
	"learning_rate": 2.4966203017920818e-05,
	"loss": 0.6617,
	"step": 1615
	},
	{
	"epoch": 0.7781857581412147,
	"grad_norm": 2.2301876544952393,
	"learning_rate": 2.4863177977378392e-05,
	"loss": 0.5759,
	"step": 1616
	},
	{
	"epoch": 0.7786673087341239,
	"grad_norm": 1.627908706665039,
	"learning_rate": 2.4760335760979312e-05,
	"loss": 0.6987,
	"step": 1617
	},
	{
	"epoch": 0.779148859327033,
	"grad_norm": 3.955803632736206,
	"learning_rate": 2.4657676618960944e-05,
	"loss": 0.8698,
	"step": 1618
	},
	{
	"epoch": 0.7796304099199423,
	"grad_norm": 2.231527328491211,
	"learning_rate": 2.455520080111522e-05,
	"loss": 0.996,
	"step": 1619
	},
	{
	"epoch": 0.7801119605128514,
	"grad_norm": 3.9988443851470947,
	"learning_rate": 2.4452908556787912e-05,
	"loss": 0.771,
	"step": 1620
	},
	{
	"epoch": 0.7805935111057606,
	"grad_norm": 2.5822057723999023,
	"learning_rate": 2.4350800134878203e-05,
	"loss": 0.6595,
	"step": 1621
	},
	{
	"epoch": 0.7810750616986697,
	"grad_norm": 2.1049022674560547,
	"learning_rate": 2.4248875783837987e-05,
	"loss": 0.2905,
	"step": 1622
	},
	{
	"epoch": 0.7815566122915789,
	"grad_norm": 3.057828426361084,
	"learning_rate": 2.414713575167129e-05,
	"loss": 0.455,
	"step": 1623
	},
	{
	"epoch": 0.782038162884488,
	"grad_norm": 3.8854563236236572,
	"learning_rate": 2.4045580285933557e-05,
	"loss": 0.5088,
	"step": 1624
	},
	{
	"epoch": 0.7825197134773972,
	"grad_norm": 3.774930715560913,
	"learning_rate": 2.3944209633731242e-05,
	"loss": 0.828,
	"step": 1625
	},
	{
	"epoch": 0.7830012640703063,
	"grad_norm": 2.168914794921875,
	"learning_rate": 2.3843024041721053e-05,
	"loss": 0.499,
	"step": 1626
	},
	{
	"epoch": 0.7834828146632156,
	"grad_norm": 4.615421772003174,
	"learning_rate": 2.3742023756109456e-05,
	"loss": 0.579,
	"step": 1627
	},
	{
	"epoch": 0.7839643652561247,
	"grad_norm": 2.246866226196289,
	"learning_rate": 2.3641209022651976e-05,
	"loss": 0.7131,
	"step": 1628
	},
	{
	"epoch": 0.7844459158490339,
	"grad_norm": 2.1677560806274414,
	"learning_rate": 2.3540580086652675e-05,
	"loss": 0.7653,
	"step": 1629
	},
	{
	"epoch": 0.7849274664419431,
	"grad_norm": 2.1894803047180176,
	"learning_rate": 2.344013719296353e-05,
	"loss": 0.5755,
	"step": 1630
	},
	{
	"epoch": 0.7854090170348522,
	"grad_norm": 1.5638082027435303,
	"learning_rate": 2.3339880585983842e-05,
	"loss": 0.8477,
	"step": 1631
	},
	{
	"epoch": 0.7858905676277614,
	"grad_norm": 0.8193626999855042,
	"learning_rate": 2.3239810509659597e-05,
	"loss": 0.5981,
	"step": 1632
	},
	{
	"epoch": 0.7863721182206705,
	"grad_norm": 2.181163787841797,
	"learning_rate": 2.313992720748295e-05,
	"loss": 0.6006,
	"step": 1633
	},
	{
	"epoch": 0.7868536688135798,
	"grad_norm": 2.4740288257598877,
	"learning_rate": 2.304023092249159e-05,
	"loss": 0.7376,
	"step": 1634
	},
	{
	"epoch": 0.7873352194064889,
	"grad_norm": 3.2138454914093018,
	"learning_rate": 2.2940721897268136e-05,
	"loss": 1.0772,
	"step": 1635
	},
	{
	"epoch": 0.7878167699993981,
	"grad_norm": 3.062891960144043,
	"learning_rate": 2.2841400373939592e-05,
	"loss": 0.9387,
	"step": 1636
	},
	{
	"epoch": 0.7882983205923072,
	"grad_norm": 1.856158971786499,
	"learning_rate": 2.274226659417671e-05,
	"loss": 0.891,
	"step": 1637
	},
	{
	"epoch": 0.7887798711852164,
	"grad_norm": 1.4928967952728271,
	"learning_rate": 2.2643320799193402e-05,
	"loss": 0.3832,
	"step": 1638
	},
	{
	"epoch": 0.7892614217781255,
	"grad_norm": 2.0665640830993652,
	"learning_rate": 2.2544563229746218e-05,
	"loss": 0.5602,
	"step": 1639
	},
	{
	"epoch": 0.7897429723710347,
	"grad_norm": 2.119544506072998,
	"learning_rate": 2.2445994126133708e-05,
	"loss": 0.8366,
	"step": 1640
	},
	{
	"epoch": 0.7902245229639439,
	"grad_norm": 1.5489791631698608,
	"learning_rate": 2.234761372819577e-05,
	"loss": 0.5582,
	"step": 1641
	},
	{
	"epoch": 0.7907060735568531,
	"grad_norm": 1.4361308813095093,
	"learning_rate": 2.2249422275313214e-05,
	"loss": 0.3052,
	"step": 1642
	},
	{
	"epoch": 0.7911876241497622,
	"grad_norm": 2.6924610137939453,
	"learning_rate": 2.215142000640714e-05,
	"loss": 0.4776,
	"step": 1643
	},
	{
	"epoch": 0.7916691747426714,
	"grad_norm": 2.889003038406372,
	"learning_rate": 2.2053607159938195e-05,
	"loss": 0.5702,
	"step": 1644
	},
	{
	"epoch": 0.7921507253355806,
	"grad_norm": 3.6985273361206055,
	"learning_rate": 2.1955983973906236e-05,
	"loss": 0.6528,
	"step": 1645
	},
	{
	"epoch": 0.7926322759284897,
	"grad_norm": 1.6268264055252075,
	"learning_rate": 2.1858550685849578e-05,
	"loss": 0.7206,
	"step": 1646
	},
	{
	"epoch": 0.793113826521399,
	"grad_norm": 2.64440655708313,
	"learning_rate": 2.17613075328445e-05,
	"loss": 0.742,
	"step": 1647
	},
	{
	"epoch": 0.793595377114308,
	"grad_norm": 2.0996243953704834,
	"learning_rate": 2.1664254751504642e-05,
	"loss": 0.6661,
	"step": 1648
	},
	{
	"epoch": 0.7940769277072173,
	"grad_norm": 1.764198660850525,
	"learning_rate": 2.1567392577980393e-05,
	"loss": 0.3963,
	"step": 1649
	},
	{
	"epoch": 0.7945584783001264,
	"grad_norm": 2.0087742805480957,
	"learning_rate": 2.1470721247958404e-05,
	"loss": 0.906,
	"step": 1650
	},
	{
	"epoch": 0.7950400288930356,
	"grad_norm": 2.0296401977539062,
	"learning_rate": 2.137424099666091e-05,
	"loss": 0.7582,
	"step": 1651
	},
	{
	"epoch": 0.7955215794859447,
	"grad_norm": 4.00960636138916,
	"learning_rate": 2.1277952058845284e-05,
	"loss": 0.5171,
	"step": 1652
	},
	{
	"epoch": 0.7960031300788539,
	"grad_norm": 1.5805654525756836,
	"learning_rate": 2.118185466880327e-05,
	"loss": 0.867,
	"step": 1653
	},
	{
	"epoch": 0.796484680671763,
	"grad_norm": 1.1289174556732178,
	"learning_rate": 2.1085949060360654e-05,
	"loss": 0.7591,
	"step": 1654
	},
	{
	"epoch": 0.7969662312646723,
	"grad_norm": 2.465733528137207,
	"learning_rate": 2.0990235466876517e-05,
	"loss": 0.7738,
	"step": 1655
	},
	{
	"epoch": 0.7974477818575814,
	"grad_norm": 1.9576550722122192,
	"learning_rate": 2.089471412124274e-05,
	"loss": 0.5989,
	"step": 1656
	},
	{
	"epoch": 0.7979293324504906,
	"grad_norm": 2.245087146759033,
	"learning_rate": 2.079938525588342e-05,
	"loss": 0.6204,
	"step": 1657
	},
	{
	"epoch": 0.7984108830433998,
	"grad_norm": 2.1866295337677,
	"learning_rate": 2.0704249102754324e-05,
	"loss": 0.986,
	"step": 1658
	},
	{
	"epoch": 0.7988924336363089,
	"grad_norm": 2.3913660049438477,
	"learning_rate": 2.0609305893342278e-05,
	"loss": 0.4221,
	"step": 1659
	},
	{
	"epoch": 0.7993739842292181,
	"grad_norm": 2.4920084476470947,
	"learning_rate": 2.0514555858664663e-05,
	"loss": 0.7775,
	"step": 1660
	},
	{
	"epoch": 0.7998555348221272,
	"grad_norm": 1.5726318359375,
	"learning_rate": 2.0419999229268805e-05,
	"loss": 0.3526,
	"step": 1661
	},
	{
	"epoch": 0.8003370854150365,
	"grad_norm": 3.00834584236145,
	"learning_rate": 2.032563623523147e-05,
	"loss": 0.7506,
	"step": 1662
	},
	{
	"epoch": 0.8008186360079456,
	"grad_norm": 2.3384523391723633,
	"learning_rate": 2.0231467106158186e-05,
	"loss": 0.5321,
	"step": 1663
	},
	{
	"epoch": 0.8013001866008548,
	"grad_norm": 2.5204219818115234,
	"learning_rate": 2.0137492071182863e-05,
	"loss": 0.8753,
	"step": 1664
	},
	{
	"epoch": 0.8017817371937639,
	"grad_norm": 1.7203079462051392,
	"learning_rate": 2.0043711358967043e-05,
	"loss": 1.6074,
	"step": 1665
	},
	{
	"epoch": 0.8022632877866731,
	"grad_norm": 3.3237717151641846,
	"learning_rate": 1.9950125197699508e-05,
	"loss": 0.5971,
	"step": 1666
	},
	{
	"epoch": 0.8027448383795822,
	"grad_norm": 3.3281543254852295,
	"learning_rate": 1.985673381509565e-05,
	"loss": 0.6587,
	"step": 1667
	},
	{
	"epoch": 0.8032263889724914,
	"grad_norm": 3.3492562770843506,
	"learning_rate": 1.9763537438396894e-05,
	"loss": 0.9115,
	"step": 1668
	},
	{
	"epoch": 0.8037079395654005,
	"grad_norm": 1.2501789331436157,
	"learning_rate": 1.96705362943702e-05,
	"loss": 0.4328,
	"step": 1669
	},
	{
	"epoch": 0.8041894901583098,
	"grad_norm": 1.199196219444275,
	"learning_rate": 1.9577730609307454e-05,
	"loss": 0.283,
	"step": 1670
	},
	{
	"epoch": 0.804671040751219,
	"grad_norm": 2.56499981880188,
	"learning_rate": 1.9485120609024975e-05,
	"loss": 0.6122,
	"step": 1671
	},
	{
	"epoch": 0.8051525913441281,
	"grad_norm": 2.273665189743042,
	"learning_rate": 1.9392706518862935e-05,
	"loss": 0.9137,
	"step": 1672
	},
	{
	"epoch": 0.8056341419370373,
	"grad_norm": 2.3677797317504883,
	"learning_rate": 1.9300488563684804e-05,
	"loss": 0.398,
	"step": 1673
	},
	{
	"epoch": 0.8061156925299464,
	"grad_norm": 1.3238352537155151,
	"learning_rate": 1.920846696787684e-05,
	"loss": 0.8935,
	"step": 1674
	},
	{
	"epoch": 0.8065972431228556,
	"grad_norm": 2.1932382583618164,
	"learning_rate": 1.9116641955347446e-05,
	"loss": 0.5614,
	"step": 1675
	},
	{
	"epoch": 0.8070787937157647,
	"grad_norm": 1.0041841268539429,
	"learning_rate": 1.9025013749526767e-05,
	"loss": 0.5811,
	"step": 1676
	},
	{
	"epoch": 0.807560344308674,
	"grad_norm": 3.299774169921875,
	"learning_rate": 1.8933582573366036e-05,
	"loss": 0.8145,
	"step": 1677
	},
	{
	"epoch": 0.8080418949015831,
	"grad_norm": 2.4343202114105225,
	"learning_rate": 1.8842348649337116e-05,
	"loss": 0.6614,
	"step": 1678
	},
	{
	"epoch": 0.8085234454944923,
	"grad_norm": 2.8237218856811523,
	"learning_rate": 1.875131219943187e-05,
	"loss": 1.1075,
	"step": 1679
	},
	{
	"epoch": 0.8090049960874014,
	"grad_norm": 2.5553195476531982,
	"learning_rate": 1.8660473445161663e-05,
	"loss": 0.604,
	"step": 1680
	},
	{
	"epoch": 0.8094865466803106,
	"grad_norm": 1.5066719055175781,
	"learning_rate": 1.856983260755686e-05,
	"loss": 0.6099,
	"step": 1681
	},
	{
	"epoch": 0.8099680972732197,
	"grad_norm": 1.4844105243682861,
	"learning_rate": 1.8479389907166223e-05,
	"loss": 0.2163,
	"step": 1682
	},
	{
	"epoch": 0.8104496478661289,
	"grad_norm": 2.3299903869628906,
	"learning_rate": 1.8389145564056387e-05,
	"loss": 0.4654,
	"step": 1683
	},
	{
	"epoch": 0.810931198459038,
	"grad_norm": 1.9507116079330444,
	"learning_rate": 1.829909979781137e-05,
	"loss": 0.3359,
	"step": 1684
	},
	{
	"epoch": 0.8114127490519473,
	"grad_norm": 2.243999481201172,
	"learning_rate": 1.820925282753201e-05,
	"loss": 0.5519,
	"step": 1685
	},
	{
	"epoch": 0.8118942996448565,
	"grad_norm": 1.5406464338302612,
	"learning_rate": 1.8119604871835437e-05,
	"loss": 0.6571,
	"step": 1686
	},
	{
	"epoch": 0.8123758502377656,
	"grad_norm": 2.3631114959716797,
	"learning_rate": 1.8030156148854492e-05,
	"loss": 0.7404,
	"step": 1687
	},
	{
	"epoch": 0.8128574008306748,
	"grad_norm": 5.411351680755615,
	"learning_rate": 1.7940906876237284e-05,
	"loss": 0.4588,
	"step": 1688
	},
	{
	"epoch": 0.8133389514235839,
	"grad_norm": 1.9725611209869385,
	"learning_rate": 1.78518572711466e-05,
	"loss": 0.6191,
	"step": 1689
	},
	{
	"epoch": 0.8138205020164931,
	"grad_norm": 2.150035858154297,
	"learning_rate": 1.776300755025939e-05,
	"loss": 1.0107,
	"step": 1690
	},
	{
	"epoch": 0.8143020526094022,
	"grad_norm": 1.634321928024292,
	"learning_rate": 1.767435792976626e-05,
	"loss": 0.42,
	"step": 1691
	},
	{
	"epoch": 0.8147836032023115,
	"grad_norm": 1.756266474723816,
	"learning_rate": 1.7585908625370905e-05,
	"loss": 0.7629,
	"step": 1692
	},
	{
	"epoch": 0.8152651537952206,
	"grad_norm": 2.3073647022247314,
	"learning_rate": 1.749765985228963e-05,
	"loss": 0.8333,
	"step": 1693
	},
	{
	"epoch": 0.8157467043881298,
	"grad_norm": 2.3972604274749756,
	"learning_rate": 1.740961182525077e-05,
	"loss": 0.4132,
	"step": 1694
	},
	{
	"epoch": 0.8162282549810389,
	"grad_norm": 2.476473569869995,
	"learning_rate": 1.7321764758494252e-05,
	"loss": 0.8872,
	"step": 1695
	},
	{
	"epoch": 0.8167098055739481,
	"grad_norm": 2.487661361694336,
	"learning_rate": 1.7234118865770987e-05,
	"loss": 0.685,
	"step": 1696
	},
	{
	"epoch": 0.8171913561668572,
	"grad_norm": 1.8796521425247192,
	"learning_rate": 1.7146674360342373e-05,
	"loss": 0.6886,
	"step": 1697
	},
	{
	"epoch": 0.8176729067597664,
	"grad_norm": 1.8261488676071167,
	"learning_rate": 1.7059431454979824e-05,
	"loss": 0.8278,
	"step": 1698
	},
	{
	"epoch": 0.8181544573526757,
	"grad_norm": 4.842952728271484,
	"learning_rate": 1.6972390361964195e-05,
	"loss": 1.1315,
	"step": 1699
	},
	{
	"epoch": 0.8186360079455848,
	"grad_norm": 1.3907809257507324,
	"learning_rate": 1.688555129308531e-05,
	"loss": 0.5094,
	"step": 1700
	},
	{
	"epoch": 0.819117558538494,
	"grad_norm": 2.8094334602355957,
	"learning_rate": 1.6798914459641434e-05,
	"loss": 0.6765,
	"step": 1701
	},
	{
	"epoch": 0.8195991091314031,
	"grad_norm": 1.7565284967422485,
	"learning_rate": 1.6712480072438662e-05,
	"loss": 0.8474,
	"step": 1702
	},
	{
	"epoch": 0.8200806597243123,
	"grad_norm": 3.5661885738372803,
	"learning_rate": 1.6626248341790596e-05,
	"loss": 1.1639,
	"step": 1703
	},
	{
	"epoch": 0.8205622103172214,
	"grad_norm": 2.280489683151245,
	"learning_rate": 1.6540219477517684e-05,
	"loss": 0.6265,
	"step": 1704
	},
	{
	"epoch": 0.8210437609101306,
	"grad_norm": 1.485849380493164,
	"learning_rate": 1.6454393688946767e-05,
	"loss": 0.4715,
	"step": 1705
	},
	{
	"epoch": 0.8215253115030398,
	"grad_norm": 2.863246202468872,
	"learning_rate": 1.6368771184910557e-05,
	"loss": 0.4076,
	"step": 1706
	},
	{
	"epoch": 0.822006862095949,
	"grad_norm": 1.4689639806747437,
	"learning_rate": 1.6283352173747145e-05,
	"loss": 0.6784,
	"step": 1707
	},
	{
	"epoch": 0.8224884126888581,
	"grad_norm": 1.777126431465149,
	"learning_rate": 1.619813686329946e-05,
	"loss": 0.6577,
	"step": 1708
	},
	{
	"epoch": 0.8229699632817673,
	"grad_norm": 2.7305054664611816,
	"learning_rate": 1.611312546091476e-05,
	"loss": 0.8134,
	"step": 1709
	},
	{
	"epoch": 0.8234515138746764,
	"grad_norm": 1.6367037296295166,
	"learning_rate": 1.6028318173444202e-05,
	"loss": 0.7774,
	"step": 1710
	},
	{
	"epoch": 0.8239330644675856,
	"grad_norm": 1.054276943206787,
	"learning_rate": 1.594371520724226e-05,
	"loss": 0.6494,
	"step": 1711
	},
	{
	"epoch": 0.8244146150604948,
	"grad_norm": 1.8237320184707642,
	"learning_rate": 1.5859316768166244e-05,
	"loss": 0.784,
	"step": 1712
	},
	{
	"epoch": 0.824896165653404,
	"grad_norm": 2.3219659328460693,
	"learning_rate": 1.5775123061575836e-05,
	"loss": 0.8381,
	"step": 1713
	},
	{
	"epoch": 0.8253777162463132,
	"grad_norm": 2.7865233421325684,
	"learning_rate": 1.569113429233252e-05,
	"loss": 0.4768,
	"step": 1714
	},
	{
	"epoch": 0.8258592668392223,
	"grad_norm": 2.3359994888305664,
	"learning_rate": 1.5607350664799157e-05,
	"loss": 0.7649,
	"step": 1715
	},
	{
	"epoch": 0.8263408174321315,
	"grad_norm": 4.821800708770752,
	"learning_rate": 1.552377238283943e-05,
	"loss": 0.5414,
	"step": 1716
	},
	{
	"epoch": 0.8268223680250406,
	"grad_norm": 1.802925944328308,
	"learning_rate": 1.5440399649817385e-05,
	"loss": 0.2233,
	"step": 1717
	},
	{
	"epoch": 0.8273039186179498,
	"grad_norm": 3.2591664791107178,
	"learning_rate": 1.5357232668596933e-05,
	"loss": 1.2976,
	"step": 1718
	},
	{
	"epoch": 0.8277854692108589,
	"grad_norm": 2.02854585647583,
	"learning_rate": 1.5274271641541295e-05,
	"loss": 0.5666,
	"step": 1719
	},
	{
	"epoch": 0.8282670198037682,
	"grad_norm": 2.4530251026153564,
	"learning_rate": 1.5191516770512649e-05,
	"loss": 0.7718,
	"step": 1720
	},
	{
	"epoch": 0.8287485703966773,
	"grad_norm": 2.288060188293457,
	"learning_rate": 1.5108968256871437e-05,
	"loss": 0.333,
	"step": 1721
	},
	{
	"epoch": 0.8292301209895865,
	"grad_norm": 1.380737543106079,
	"learning_rate": 1.5026626301476087e-05,
	"loss": 0.4997,
	"step": 1722
	},
	{
	"epoch": 0.8297116715824956,
	"grad_norm": 3.533025026321411,
	"learning_rate": 1.4944491104682379e-05,
	"loss": 0.7909,
	"step": 1723
	},
	{
	"epoch": 0.8301932221754048,
	"grad_norm": 2.4573423862457275,
	"learning_rate": 1.4862562866343034e-05,
	"loss": 0.6396,
	"step": 1724
	},
	{
	"epoch": 0.8306747727683139,
	"grad_norm": 2.839277744293213,
	"learning_rate": 1.4780841785807164e-05,
	"loss": 0.6966,
	"step": 1725
	},
	{
	"epoch": 0.8311563233612231,
	"grad_norm": 1.8692930936813354,
	"learning_rate": 1.4699328061919848e-05,
	"loss": 0.5262,
	"step": 1726
	},
	{
	"epoch": 0.8316378739541324,
	"grad_norm": 2.2407124042510986,
	"learning_rate": 1.4618021893021605e-05,
	"loss": 0.3409,
	"step": 1727
	},
	{
	"epoch": 0.8321194245470415,
	"grad_norm": 1.3630995750427246,
	"learning_rate": 1.453692347694794e-05,
	"loss": 0.3524,
	"step": 1728
	},
	{
	"epoch": 0.8326009751399507,
	"grad_norm": 2.1543772220611572,
	"learning_rate": 1.4456033011028835e-05,
	"loss": 0.442,
	"step": 1729
	},
	{
	"epoch": 0.8330825257328598,
	"grad_norm": 3.9747824668884277,
	"learning_rate": 1.437535069208833e-05,
	"loss": 0.8306,
	"step": 1730
	},
	{
	"epoch": 0.833564076325769,
	"grad_norm": 2.9707400798797607,
	"learning_rate": 1.4294876716443906e-05,
	"loss": 0.3712,
	"step": 1731
	},
	{
	"epoch": 0.8340456269186781,
	"grad_norm": 1.9354028701782227,
	"learning_rate": 1.4214611279906187e-05,
	"loss": 0.2021,
	"step": 1732
	},
	{
	"epoch": 0.8345271775115873,
	"grad_norm": 2.2027204036712646,
	"learning_rate": 1.4134554577778337e-05,
	"loss": 0.7172,
	"step": 1733
	},
	{
	"epoch": 0.8350087281044964,
	"grad_norm": 1.4204658269882202,
	"learning_rate": 1.4054706804855634e-05,
	"loss": 0.8734,
	"step": 1734
	},
	{
	"epoch": 0.8354902786974057,
	"grad_norm": 2.0968925952911377,
	"learning_rate": 1.3975068155424976e-05,
	"loss": 0.9967,
	"step": 1735
	},
	{
	"epoch": 0.8359718292903148,
	"grad_norm": 2.840298891067505,
	"learning_rate": 1.3895638823264446e-05,
	"loss": 0.6381,
	"step": 1736
	},
	{
	"epoch": 0.836453379883224,
	"grad_norm": 1.5930904150009155,
	"learning_rate": 1.3816419001642777e-05,
	"loss": 0.5605,
	"step": 1737
	},
	{
	"epoch": 0.8369349304761331,
	"grad_norm": 4.177980899810791,
	"learning_rate": 1.3737408883318948e-05,
	"loss": 0.5922,
	"step": 1738
	},
	{
	"epoch": 0.8374164810690423,
	"grad_norm": 1.7408493757247925,
	"learning_rate": 1.365860866054165e-05,
	"loss": 0.4055,
	"step": 1739
	},
	{
	"epoch": 0.8378980316619515,
	"grad_norm": 1.257311224937439,
	"learning_rate": 1.358001852504891e-05,
	"loss": 0.2734,
	"step": 1740
	},
	{
	"epoch": 0.8383795822548606,
	"grad_norm": 1.8963124752044678,
	"learning_rate": 1.3501638668067485e-05,
	"loss": 0.7453,
	"step": 1741
	},
	{
	"epoch": 0.8388611328477699,
	"grad_norm": 1.7414535284042358,
	"learning_rate": 1.3423469280312562e-05,
	"loss": 0.6258,
	"step": 1742
	},
	{
	"epoch": 0.839342683440679,
	"grad_norm": 1.7837656736373901,
	"learning_rate": 1.3345510551987128e-05,
	"loss": 0.3573,
	"step": 1743
	},
	{
	"epoch": 0.8398242340335882,
	"grad_norm": 2.218170404434204,
	"learning_rate": 1.326776267278167e-05,
	"loss": 0.6641,
	"step": 1744
	},
	{
	"epoch": 0.8403057846264973,
	"grad_norm": 4.239348411560059,
	"learning_rate": 1.3190225831873581e-05,
	"loss": 0.7345,
	"step": 1745
	},
	{
	"epoch": 0.8407873352194065,
	"grad_norm": 1.7612202167510986,
	"learning_rate": 1.3112900217926782e-05,
	"loss": 0.6602,
	"step": 1746
	},
	{
	"epoch": 0.8412688858123156,
	"grad_norm": 5.180617332458496,
	"learning_rate": 1.3035786019091223e-05,
	"loss": 0.7354,
	"step": 1747
	},
	{
	"epoch": 0.8417504364052248,
	"grad_norm": 2.2071621417999268,
	"learning_rate": 1.2958883423002422e-05,
	"loss": 0.93,
	"step": 1748
	},
	{
	"epoch": 0.842231986998134,
	"grad_norm": 2.929159164428711,
	"learning_rate": 1.288219261678103e-05,
	"loss": 0.7676,
	"step": 1749
	},
	{
	"epoch": 0.8427135375910432,
	"grad_norm": 1.524143934249878,
	"learning_rate": 1.2805713787032381e-05,
	"loss": 0.269,
	"step": 1750
	},
	{
	"epoch": 0.8431950881839523,
	"grad_norm": 1.5855472087860107,
	"learning_rate": 1.2729447119846016e-05,
	"loss": 0.4037,
	"step": 1751
	},
	{
	"epoch": 0.8436766387768615,
	"grad_norm": 2.4679388999938965,
	"learning_rate": 1.265339280079525e-05,
	"loss": 0.7235,
	"step": 1752
	},
	{
	"epoch": 0.8441581893697706,
	"grad_norm": 3.3254940509796143,
	"learning_rate": 1.257755101493665e-05,
	"loss": 0.6291,
	"step": 1753
	},
	{
	"epoch": 0.8446397399626798,
	"grad_norm": 1.8408324718475342,
	"learning_rate": 1.2501921946809714e-05,
	"loss": 0.3552,
	"step": 1754
	},
	{
	"epoch": 0.845121290555589,
	"grad_norm": 2.5367562770843506,
	"learning_rate": 1.2426505780436326e-05,
	"loss": 0.8439,
	"step": 1755
	},
	{
	"epoch": 0.8456028411484982,
	"grad_norm": 2.7886507511138916,
	"learning_rate": 1.2351302699320332e-05,
	"loss": 0.7676,
	"step": 1756
	},
	{
	"epoch": 0.8460843917414074,
	"grad_norm": 4.4448628425598145,
	"learning_rate": 1.2276312886447106e-05,
	"loss": 0.8199,
	"step": 1757
	},
	{
	"epoch": 0.8465659423343165,
	"grad_norm": 2.484957218170166,
	"learning_rate": 1.2201536524283074e-05,
	"loss": 0.6567,
	"step": 1758
	},
	{
	"epoch": 0.8470474929272257,
	"grad_norm": 1.730948805809021,
	"learning_rate": 1.2126973794775343e-05,
	"loss": 0.4775,
	"step": 1759
	},
	{
	"epoch": 0.8475290435201348,
	"grad_norm": 3.7568891048431396,
	"learning_rate": 1.2052624879351104e-05,
	"loss": 0.8885,
	"step": 1760
	},
	{
	"epoch": 0.848010594113044,
	"grad_norm": 2.2398736476898193,
	"learning_rate": 1.1978489958917382e-05,
	"loss": 0.6513,
	"step": 1761
	},
	{
	"epoch": 0.8484921447059531,
	"grad_norm": 2.391688346862793,
	"learning_rate": 1.1904569213860472e-05,
	"loss": 0.7705,
	"step": 1762
	},
	{
	"epoch": 0.8489736952988624,
	"grad_norm": 1.2668508291244507,
	"learning_rate": 1.1830862824045552e-05,
	"loss": 0.7412,
	"step": 1763
	},
	{
	"epoch": 0.8494552458917715,
	"grad_norm": 2.4677586555480957,
	"learning_rate": 1.1757370968816217e-05,
	"loss": 0.4662,
	"step": 1764
	},
	{
	"epoch": 0.8499367964846807,
	"grad_norm": 1.866142988204956,
	"learning_rate": 1.1684093826994024e-05,
	"loss": 0.4521,
	"step": 1765
	},
	{
	"epoch": 0.8504183470775898,
	"grad_norm": 2.4763362407684326,
	"learning_rate": 1.1611031576878117e-05,
	"loss": 0.601,
	"step": 1766
	},
	{
	"epoch": 0.850899897670499,
	"grad_norm": 1.6049933433532715,
	"learning_rate": 1.1538184396244778e-05,
	"loss": 0.2667,
	"step": 1767
	},
	{
	"epoch": 0.8513814482634082,
	"grad_norm": 2.2135348320007324,
	"learning_rate": 1.146555246234694e-05,
	"loss": 0.6749,
	"step": 1768
	},
	{
	"epoch": 0.8518629988563173,
	"grad_norm": 3.2478649616241455,
	"learning_rate": 1.1393135951913824e-05,
	"loss": 0.6464,
	"step": 1769
	},
	{
	"epoch": 0.8523445494492266,
	"grad_norm": 1.7736784219741821,
	"learning_rate": 1.132093504115046e-05,
	"loss": 0.5814,
	"step": 1770
	},
	{
	"epoch": 0.8528261000421357,
	"grad_norm": 4.978511333465576,
	"learning_rate": 1.1248949905737283e-05,
	"loss": 0.5157,
	"step": 1771
	},
	{
	"epoch": 0.8533076506350449,
	"grad_norm": 0.9166672229766846,
	"learning_rate": 1.1177180720829694e-05,
	"loss": 0.1709,
	"step": 1772
	},
	{
	"epoch": 0.853789201227954,
	"grad_norm": 1.008035659790039,
	"learning_rate": 1.1105627661057671e-05,
	"loss": 0.4628,
	"step": 1773
	},
	{
	"epoch": 0.8542707518208632,
	"grad_norm": 2.2385506629943848,
	"learning_rate": 1.103429090052528e-05,
	"loss": 1.1356,
	"step": 1774
	},
	{
	"epoch": 0.8547523024137723,
	"grad_norm": 3.872480630874634,
	"learning_rate": 1.096317061281027e-05,
	"loss": 0.5905,
	"step": 1775
	},
	{
	"epoch": 0.8552338530066815,
	"grad_norm": 1.684135913848877,
	"learning_rate": 1.0892266970963704e-05,
	"loss": 0.5081,
	"step": 1776
	},
	{
	"epoch": 0.8557154035995906,
	"grad_norm": 3.901571035385132,
	"learning_rate": 1.082158014750948e-05,
	"loss": 0.9633,
	"step": 1777
	},
	{
	"epoch": 0.8561969541924999,
	"grad_norm": 2.208216905593872,
	"learning_rate": 1.0751110314443958e-05,
	"loss": 0.7731,
	"step": 1778
	},
	{
	"epoch": 0.856678504785409,
	"grad_norm": 2.4418656826019287,
	"learning_rate": 1.0680857643235431e-05,
	"loss": 0.6918,
	"step": 1779
	},
	{
	"epoch": 0.8571600553783182,
	"grad_norm": 1.6305257081985474,
	"learning_rate": 1.0610822304823887e-05,
	"loss": 0.5908,
	"step": 1780
	},
	{
	"epoch": 0.8576416059712274,
	"grad_norm": 1.6165392398834229,
	"learning_rate": 1.0541004469620452e-05,
	"loss": 0.6767,
	"step": 1781
	},
	{
	"epoch": 0.8581231565641365,
	"grad_norm": 2.7876946926116943,
	"learning_rate": 1.0471404307507016e-05,
	"loss": 1.0515,
	"step": 1782
	},
	{
	"epoch": 0.8586047071570457,
	"grad_norm": 3.3915517330169678,
	"learning_rate": 1.0402021987835831e-05,
	"loss": 0.8213,
	"step": 1783
	},
	{
	"epoch": 0.8590862577499548,
	"grad_norm": 3.31449031829834,
	"learning_rate": 1.0332857679429098e-05,
	"loss": 0.4672,
	"step": 1784
	},
	{
	"epoch": 0.8595678083428641,
	"grad_norm": 3.6324501037597656,
	"learning_rate": 1.0263911550578531e-05,
	"loss": 1.0045,
	"step": 1785
	},
	{
	"epoch": 0.8600493589357732,
	"grad_norm": 1.995388150215149,
	"learning_rate": 1.0195183769045013e-05,
	"loss": 0.3615,
	"step": 1786
	},
	{
	"epoch": 0.8605309095286824,
	"grad_norm": 3.063302993774414,
	"learning_rate": 1.0126674502058054e-05,
	"loss": 0.8128,
	"step": 1787
	},
	{
	"epoch": 0.8610124601215915,
	"grad_norm": 1.1554100513458252,
	"learning_rate": 1.005838391631555e-05,
	"loss": 0.6989,
	"step": 1788
	},
	{
	"epoch": 0.8614940107145007,
	"grad_norm": 1.5388157367706299,
	"learning_rate": 9.990312177983263e-06,
	"loss": 0.6449,
	"step": 1789
	},
	{
	"epoch": 0.8619755613074098,
	"grad_norm": 1.3487037420272827,
	"learning_rate": 9.922459452694466e-06,
	"loss": 0.7874,
	"step": 1790
	},
	{
	"epoch": 0.862457111900319,
	"grad_norm": 1.2846475839614868,
	"learning_rate": 9.854825905549503e-06,
	"loss": 0.6292,
	"step": 1791
	},
	{
	"epoch": 0.8629386624932281,
	"grad_norm": 2.74332594871521,
	"learning_rate": 9.787411701115456e-06,
	"loss": 0.5169,
	"step": 1792
	},
	{
	"epoch": 0.8634202130861374,
	"grad_norm": 1.1050007343292236,
	"learning_rate": 9.720217003425647e-06,
	"loss": 0.6624,
	"step": 1793
	},
	{
	"epoch": 0.8639017636790465,
	"grad_norm": 2.6142866611480713,
	"learning_rate": 9.65324197597931e-06,
	"loss": 0.5766,
	"step": 1794
	},
	{
	"epoch": 0.8643833142719557,
	"grad_norm": 1.3400239944458008,
	"learning_rate": 9.58648678174121e-06,
	"loss": 0.8115,
	"step": 1795
	},
	{
	"epoch": 0.8648648648648649,
	"grad_norm": 1.4871549606323242,
	"learning_rate": 9.51995158314113e-06,
	"loss": 0.7247,
	"step": 1796
	},
	{
	"epoch": 0.865346415457774,
	"grad_norm": 3.412703037261963,
	"learning_rate": 9.45363654207363e-06,
	"loss": 0.4651,
	"step": 1797
	},
	{
	"epoch": 0.8658279660506832,
	"grad_norm": 1.186317801475525,
	"learning_rate": 9.387541819897549e-06,
	"loss": 0.504,
	"step": 1798
	},
	{
	"epoch": 0.8663095166435923,
	"grad_norm": 3.1554412841796875,
	"learning_rate": 9.321667577435634e-06,
	"loss": 0.6253,
	"step": 1799
	},
	{
	"epoch": 0.8667910672365016,
	"grad_norm": 2.272794246673584,
	"learning_rate": 9.256013974974175e-06,
	"loss": 0.5426,
	"step": 1800
	},
	{
	"epoch": 0.8672726178294107,
	"grad_norm": 1.4032080173492432,
	"learning_rate": 9.19058117226258e-06,
	"loss": 0.4761,
	"step": 1801
	},
	{
	"epoch": 0.8677541684223199,
	"grad_norm": 2.6849613189697266,
	"learning_rate": 9.125369328513034e-06,
	"loss": 0.7514,
	"step": 1802
	},
	{
	"epoch": 0.868235719015229,
	"grad_norm": 4.494041442871094,
	"learning_rate": 9.060378602400054e-06,
	"loss": 0.5857,
	"step": 1803
	},
	{
	"epoch": 0.8687172696081382,
	"grad_norm": 1.7596466541290283,
	"learning_rate": 8.995609152060136e-06,
	"loss": 0.7958,
	"step": 1804
	},
	{
	"epoch": 0.8691988202010473,
	"grad_norm": 1.3782743215560913,
	"learning_rate": 8.931061135091357e-06,
	"loss": 0.7378,
	"step": 1805
	},
	{
	"epoch": 0.8696803707939565,
	"grad_norm": 2.1466805934906006,
	"learning_rate": 8.866734708553015e-06,
	"loss": 1.0608,
	"step": 1806
	},
	{
	"epoch": 0.8701619213868657,
	"grad_norm": 3.3685004711151123,
	"learning_rate": 8.802630028965242e-06,
	"loss": 0.4598,
	"step": 1807
	},
	{
	"epoch": 0.8706434719797749,
	"grad_norm": 2.506319999694824,
	"learning_rate": 8.738747252308555e-06,
	"loss": 0.5106,
	"step": 1808
	},
	{
	"epoch": 0.8711250225726841,
	"grad_norm": 0.8574779629707336,
	"learning_rate": 8.675086534023591e-06,
	"loss": 0.3669,
	"step": 1809
	},
	{
	"epoch": 0.8716065731655932,
	"grad_norm": 2.1670174598693848,
	"learning_rate": 8.611648029010643e-06,
	"loss": 0.33,
	"step": 1810
	},
	{
	"epoch": 0.8720881237585024,
	"grad_norm": 3.5678937435150146,
	"learning_rate": 8.548431891629316e-06,
	"loss": 0.7334,
	"step": 1811
	},
	{
	"epoch": 0.8725696743514115,
	"grad_norm": 2.3840737342834473,
	"learning_rate": 8.485438275698154e-06,
	"loss": 0.3852,
	"step": 1812
	},
	{
	"epoch": 0.8730512249443207,
	"grad_norm": 7.21331262588501,
	"learning_rate": 8.422667334494249e-06,
	"loss": 0.5615,
	"step": 1813
	},
	{
	"epoch": 0.8735327755372299,
	"grad_norm": 5.849119186401367,
	"learning_rate": 8.360119220752893e-06,
	"loss": 0.4217,
	"step": 1814
	},
	{
	"epoch": 0.8740143261301391,
	"grad_norm": 2.1622002124786377,
	"learning_rate": 8.297794086667165e-06,
	"loss": 0.8654,
	"step": 1815
	},
	{
	"epoch": 0.8744958767230482,
	"grad_norm": 1.9863747358322144,
	"learning_rate": 8.235692083887613e-06,
	"loss": 0.5413,
	"step": 1816
	},
	{
	"epoch": 0.8749774273159574,
	"grad_norm": 2.883000135421753,
	"learning_rate": 8.173813363521843e-06,
	"loss": 1.2884,
	"step": 1817
	},
	{
	"epoch": 0.8754589779088665,
	"grad_norm": 2.683244466781616,
	"learning_rate": 8.112158076134157e-06,
	"loss": 0.6079,
	"step": 1818
	},
	{
	"epoch": 0.8759405285017757,
	"grad_norm": 1.6069995164871216,
	"learning_rate": 8.05072637174522e-06,
	"loss": 0.56,
	"step": 1819
	},
	{
	"epoch": 0.8764220790946848,
	"grad_norm": 3.1434903144836426,
	"learning_rate": 7.989518399831641e-06,
	"loss": 0.5649,
	"step": 1820
	},
	{
	"epoch": 0.876903629687594,
	"grad_norm": 3.7238409519195557,
	"learning_rate": 7.928534309325675e-06,
	"loss": 0.729,
	"step": 1821
	},
	{
	"epoch": 0.8773851802805033,
	"grad_norm": 1.1204873323440552,
	"learning_rate": 7.8677742486148e-06,
	"loss": 0.4625,
	"step": 1822
	},
	{
	"epoch": 0.8778667308734124,
	"grad_norm": 2.0625314712524414,
	"learning_rate": 7.807238365541391e-06,
	"loss": 0.4157,
	"step": 1823
	},
	{
	"epoch": 0.8783482814663216,
	"grad_norm": 2.398089647293091,
	"learning_rate": 7.746926807402344e-06,
	"loss": 0.611,
	"step": 1824
	},
	{
	"epoch": 0.8788298320592307,
	"grad_norm": 4.007481098175049,
	"learning_rate": 7.686839720948736e-06,
	"loss": 1.2355,
	"step": 1825
	},
	{
	"epoch": 0.8793113826521399,
	"grad_norm": 3.5721206665039062,
	"learning_rate": 7.6269772523854365e-06,
	"loss": 0.4283,
	"step": 1826
	},
	{
	"epoch": 0.879792933245049,
	"grad_norm": 2.9199283123016357,
	"learning_rate": 7.567339547370789e-06,
	"loss": 0.4685,
	"step": 1827
	},
	{
	"epoch": 0.8802744838379583,
	"grad_norm": 1.7368232011795044,
	"learning_rate": 7.507926751016248e-06,
	"loss": 0.5865,
	"step": 1828
	},
	{
	"epoch": 0.8807560344308674,
	"grad_norm": 2.2206578254699707,
	"learning_rate": 7.4487390078859855e-06,
	"loss": 0.7996,
	"step": 1829
	},
	{
	"epoch": 0.8812375850237766,
	"grad_norm": 1.2094279527664185,
	"learning_rate": 7.389776461996578e-06,
	"loss": 0.5491,
	"step": 1830
	},
	{
	"epoch": 0.8817191356166857,
	"grad_norm": 1.2842280864715576,
	"learning_rate": 7.331039256816663e-06,
	"loss": 0.9658,
	"step": 1831
	},
	{
	"epoch": 0.8822006862095949,
	"grad_norm": 3.4786460399627686,
	"learning_rate": 7.27252753526656e-06,
	"loss": 1.0196,
	"step": 1832
	},
	{
	"epoch": 0.882682236802504,
	"grad_norm": 3.5382659435272217,
	"learning_rate": 7.214241439717962e-06,
	"loss": 1.0331,
	"step": 1833
	},
	{
	"epoch": 0.8831637873954132,
	"grad_norm": 1.174157738685608,
	"learning_rate": 7.1561811119935425e-06,
	"loss": 0.4535,
	"step": 1834
	},
	{
	"epoch": 0.8836453379883223,
	"grad_norm": 2.7539329528808594,
	"learning_rate": 7.098346693366642e-06,
	"loss": 0.5103,
	"step": 1835
	},
	{
	"epoch": 0.8841268885812316,
	"grad_norm": 2.5821847915649414,
	"learning_rate": 7.0407383245609136e-06,
	"loss": 0.4046,
	"step": 1836
	},
	{
	"epoch": 0.8846084391741408,
	"grad_norm": 1.5927815437316895,
	"learning_rate": 6.983356145749975e-06,
	"loss": 0.3623,
	"step": 1837
	},
	{
	"epoch": 0.8850899897670499,
	"grad_norm": 2.2626142501831055,
	"learning_rate": 6.9262002965570835e-06,
	"loss": 0.6639,
	"step": 1838
	},
	{
	"epoch": 0.8855715403599591,
	"grad_norm": 3.217414617538452,
	"learning_rate": 6.869270916054782e-06,
	"loss": 0.519,
	"step": 1839
	},
	{
	"epoch": 0.8860530909528682,
	"grad_norm": 1.9122174978256226,
	"learning_rate": 6.812568142764575e-06,
	"loss": 0.5984,
	"step": 1840
	},
	{
	"epoch": 0.8865346415457774,
	"grad_norm": 2.481517791748047,
	"learning_rate": 6.756092114656587e-06,
	"loss": 0.8017,
	"step": 1841
	},
	{
	"epoch": 0.8870161921386865,
	"grad_norm": 2.714883327484131,
	"learning_rate": 6.699842969149195e-06,
	"loss": 0.5422,
	"step": 1842
	},
	{
	"epoch": 0.8874977427315958,
	"grad_norm": 2.3089113235473633,
	"learning_rate": 6.64382084310875e-06,
	"loss": 0.5783,
	"step": 1843
	},
	{
	"epoch": 0.8879792933245049,
	"grad_norm": 2.165722608566284,
	"learning_rate": 6.5880258728491905e-06,
	"loss": 0.354,
	"step": 1844
	},
	{
	"epoch": 0.8884608439174141,
	"grad_norm": 4.5781426429748535,
	"learning_rate": 6.532458194131763e-06,
	"loss": 0.8101,
	"step": 1845
	},
	{
	"epoch": 0.8889423945103232,
	"grad_norm": 3.533600091934204,
	"learning_rate": 6.477117942164657e-06,
	"loss": 0.9167,
	"step": 1846
	},
	{
	"epoch": 0.8894239451032324,
	"grad_norm": 1.031320333480835,
	"learning_rate": 6.422005251602658e-06,
	"loss": 0.4298,
	"step": 1847
	},
	{
	"epoch": 0.8899054956961415,
	"grad_norm": 2.306194543838501,
	"learning_rate": 6.367120256546888e-06,
	"loss": 0.4655,
	"step": 1848
	},
	{
	"epoch": 0.8903870462890507,
	"grad_norm": 1.633102297782898,
	"learning_rate": 6.312463090544396e-06,
	"loss": 0.4393,
	"step": 1849
	},
	{
	"epoch": 0.89086859688196,
	"grad_norm": 1.7181764841079712,
	"learning_rate": 6.258033886587911e-06,
	"loss": 0.8858,
	"step": 1850
	},
	{
	"epoch": 0.8913501474748691,
	"grad_norm": 2.9164364337921143,
	"learning_rate": 6.2038327771154485e-06,
	"loss": 0.4769,
	"step": 1851
	},
	{
	"epoch": 0.8918316980677783,
	"grad_norm": 3.113100290298462,
	"learning_rate": 6.1498598940100346e-06,
	"loss": 0.7217,
	"step": 1852
	},
	{
	"epoch": 0.8923132486606874,
	"grad_norm": 2.327969551086426,
	"learning_rate": 6.0961153685993646e-06,
	"loss": 0.5315,
	"step": 1853
	},
	{
	"epoch": 0.8927947992535966,
	"grad_norm": 2.7124183177948,
	"learning_rate": 6.0425993316554965e-06,
	"loss": 0.5386,
	"step": 1854
	},
	{
	"epoch": 0.8932763498465057,
	"grad_norm": 2.0107133388519287,
	"learning_rate": 5.989311913394546e-06,
	"loss": 0.4306,
	"step": 1855
	},
	{
	"epoch": 0.8937579004394149,
	"grad_norm": 1.8808348178863525,
	"learning_rate": 5.93625324347632e-06,
	"loss": 0.4791,
	"step": 1856
	},
	{
	"epoch": 0.894239451032324,
	"grad_norm": 3.451119899749756,
	"learning_rate": 5.8834234510040335e-06,
	"loss": 0.8056,
	"step": 1857
	},
	{
	"epoch": 0.8947210016252333,
	"grad_norm": 3.1976735591888428,
	"learning_rate": 5.830822664523994e-06,
	"loss": 0.3994,
	"step": 1858
	},
	{
	"epoch": 0.8952025522181424,
	"grad_norm": 2.1719510555267334,
	"learning_rate": 5.77845101202531e-06,
	"loss": 0.738,
	"step": 1859
	},
	{
	"epoch": 0.8956841028110516,
	"grad_norm": 4.383519172668457,
	"learning_rate": 5.726308620939536e-06,
	"loss": 0.5899,
	"step": 1860
	},
	{
	"epoch": 0.8961656534039607,
	"grad_norm": 2.9527339935302734,
	"learning_rate": 5.674395618140393e-06,
	"loss": 0.3893,
	"step": 1861
	},
	{
	"epoch": 0.8966472039968699,
	"grad_norm": 4.863363742828369,
	"learning_rate": 5.622712129943453e-06,
	"loss": 0.6031,
	"step": 1862
	},
	{
	"epoch": 0.8971287545897791,
	"grad_norm": 2.3513472080230713,
	"learning_rate": 5.571258282105829e-06,
	"loss": 0.8987,
	"step": 1863
	},
	{
	"epoch": 0.8976103051826883,
	"grad_norm": 4.060399055480957,
	"learning_rate": 5.520034199825841e-06,
	"loss": 0.9195,
	"step": 1864
	},
	{
	"epoch": 0.8980918557755975,
	"grad_norm": 3.192730188369751,
	"learning_rate": 5.469040007742776e-06,
	"loss": 0.6854,
	"step": 1865
	},
	{
	"epoch": 0.8985734063685066,
	"grad_norm": 3.811521530151367,
	"learning_rate": 5.418275829936537e-06,
	"loss": 1.2028,
	"step": 1866
	},
	{
	"epoch": 0.8990549569614158,
	"grad_norm": 2.9288651943206787,
	"learning_rate": 5.36774178992735e-06,
	"loss": 0.6378,
	"step": 1867
	},
	{
	"epoch": 0.8995365075543249,
	"grad_norm": 2.91579008102417,
	"learning_rate": 5.317438010675469e-06,
	"loss": 0.5374,
	"step": 1868
	},
	{
	"epoch": 0.9000180581472341,
	"grad_norm": 2.687274217605591,
	"learning_rate": 5.267364614580861e-06,
	"loss": 0.4775,
	"step": 1869
	},
	{
	"epoch": 0.9004996087401432,
	"grad_norm": 6.526017189025879,
	"learning_rate": 5.217521723482943e-06,
	"loss": 0.6156,
	"step": 1870
	},
	{
	"epoch": 0.9009811593330525,
	"grad_norm": 2.754613161087036,
	"learning_rate": 5.167909458660258e-06,
	"loss": 0.9845,
	"step": 1871
	},
	{
	"epoch": 0.9014627099259616,
	"grad_norm": 3.1940438747406006,
	"learning_rate": 5.118527940830165e-06,
	"loss": 1.0082,
	"step": 1872
	},
	{
	"epoch": 0.9019442605188708,
	"grad_norm": 1.7706068754196167,
	"learning_rate": 5.069377290148602e-06,
	"loss": 0.3283,
	"step": 1873
	},
	{
	"epoch": 0.9024258111117799,
	"grad_norm": 1.1937077045440674,
	"learning_rate": 5.020457626209707e-06,
	"loss": 0.252,
	"step": 1874
	},
	{
	"epoch": 0.9029073617046891,
	"grad_norm": 1.5468496084213257,
	"learning_rate": 4.971769068045628e-06,
	"loss": 0.6309,
	"step": 1875
	},
	{
	"epoch": 0.9033889122975982,
	"grad_norm": 2.2661030292510986,
	"learning_rate": 4.923311734126135e-06,
	"loss": 0.6594,
	"step": 1876
	},
	{
	"epoch": 0.9038704628905074,
	"grad_norm": 1.7421146631240845,
	"learning_rate": 4.875085742358432e-06,
	"loss": 0.6087,
	"step": 1877
	},
	{
	"epoch": 0.9043520134834167,
	"grad_norm": 2.4468908309936523,
	"learning_rate": 4.827091210086776e-06,
	"loss": 0.7757,
	"step": 1878
	},
	{
	"epoch": 0.9048335640763258,
	"grad_norm": 2.273754596710205,
	"learning_rate": 4.779328254092252e-06,
	"loss": 1.082,
	"step": 1879
	},
	{
	"epoch": 0.905315114669235,
	"grad_norm": 2.547562837600708,
	"learning_rate": 4.731796990592452e-06,
	"loss": 0.5908,
	"step": 1880
	},
	{
	"epoch": 0.9057966652621441,
	"grad_norm": 1.9437663555145264,
	"learning_rate": 4.68449753524125e-06,
	"loss": 0.9443,
	"step": 1881
	},
	{
	"epoch": 0.9062782158550533,
	"grad_norm": 1.4457284212112427,
	"learning_rate": 4.637430003128429e-06,
	"loss": 0.5939,
	"step": 1882
	},
	{
	"epoch": 0.9067597664479624,
	"grad_norm": 4.274806976318359,
	"learning_rate": 4.5905945087794996e-06,
	"loss": 0.756,
	"step": 1883
	},
	{
	"epoch": 0.9072413170408716,
	"grad_norm": 1.607393503189087,
	"learning_rate": 4.543991166155337e-06,
	"loss": 0.6922,
	"step": 1884
	},
	{
	"epoch": 0.9077228676337807,
	"grad_norm": 3.3161227703094482,
	"learning_rate": 4.497620088651966e-06,
	"loss": 0.7247,
	"step": 1885
	},
	{
	"epoch": 0.90820441822669,
	"grad_norm": 3.3778584003448486,
	"learning_rate": 4.451481389100232e-06,
	"loss": 0.5756,
	"step": 1886
	},
	{
	"epoch": 0.9086859688195991,
	"grad_norm": 4.653063774108887,
	"learning_rate": 4.405575179765586e-06,
	"loss": 0.6268,
	"step": 1887
	},
	{
	"epoch": 0.9091675194125083,
	"grad_norm": 1.695256233215332,
	"learning_rate": 4.359901572347758e-06,
	"loss": 0.8092,
	"step": 1888
	},
	{
	"epoch": 0.9096490700054174,
	"grad_norm": 2.5265443325042725,
	"learning_rate": 4.314460677980537e-06,
	"loss": 0.5014,
	"step": 1889
	},
	{
	"epoch": 0.9101306205983266,
	"grad_norm": 1.132360816001892,
	"learning_rate": 4.269252607231422e-06,
	"loss": 0.418,
	"step": 1890
	},
	{
	"epoch": 0.9106121711912358,
	"grad_norm": 1.7842097282409668,
	"learning_rate": 4.224277470101445e-06,
	"loss": 0.8378,
	"step": 1891
	},
	{
	"epoch": 0.9110937217841449,
	"grad_norm": 1.7560157775878906,
	"learning_rate": 4.179535376024857e-06,
	"loss": 0.7296,
	"step": 1892
	},
	{
	"epoch": 0.9115752723770542,
	"grad_norm": 1.5116153955459595,
	"learning_rate": 4.135026433868827e-06,
	"loss": 0.7794,
	"step": 1893
	},
	{
	"epoch": 0.9120568229699633,
	"grad_norm": 2.2078778743743896,
	"learning_rate": 4.090750751933248e-06,
	"loss": 0.9489,
	"step": 1894
	},
	{
	"epoch": 0.9125383735628725,
	"grad_norm": 2.9103267192840576,
	"learning_rate": 4.046708437950464e-06,
	"loss": 0.733,
	"step": 1895
	},
	{
	"epoch": 0.9130199241557816,
	"grad_norm": 2.259371280670166,
	"learning_rate": 4.0028995990849084e-06,
	"loss": 0.476,
	"step": 1896
	},
	{
	"epoch": 0.9135014747486908,
	"grad_norm": 2.4890007972717285,
	"learning_rate": 3.95932434193299e-06,
	"loss": 0.4656,
	"step": 1897
	},
	{
	"epoch": 0.9139830253415999,
	"grad_norm": 3.960632562637329,
	"learning_rate": 3.915982772522719e-06,
	"loss": 0.74,
	"step": 1898
	},
	{
	"epoch": 0.9144645759345091,
	"grad_norm": 1.7056382894515991,
	"learning_rate": 3.872874996313513e-06,
	"loss": 0.5293,
	"step": 1899
	},
	{
	"epoch": 0.9149461265274182,
	"grad_norm": 2.551649332046509,
	"learning_rate": 3.830001118195936e-06,
	"loss": 0.5079,
	"step": 1900
	},
	{
	"epoch": 0.9154276771203275,
	"grad_norm": 1.9389688968658447,
	"learning_rate": 3.787361242491394e-06,
	"loss": 0.3823,
	"step": 1901
	},
	{
	"epoch": 0.9159092277132366,
	"grad_norm": 1.6648590564727783,
	"learning_rate": 3.744955472951928e-06,
	"loss": 0.3093,
	"step": 1902
	},
	{
	"epoch": 0.9163907783061458,
	"grad_norm": 3.3412230014801025,
	"learning_rate": 3.702783912759955e-06,
	"loss": 0.8416,
	"step": 1903
	},
	{
	"epoch": 0.916872328899055,
	"grad_norm": 1.401397943496704,
	"learning_rate": 3.660846664528006e-06,
	"loss": 0.544,
	"step": 1904
	},
	{
	"epoch": 0.9173538794919641,
	"grad_norm": 3.2457292079925537,
	"learning_rate": 3.6191438302984772e-06,
	"loss": 0.7385,
	"step": 1905
	},
	{
	"epoch": 0.9178354300848733,
	"grad_norm": 2.4073991775512695,
	"learning_rate": 3.577675511543388e-06,
	"loss": 0.5313,
	"step": 1906
	},
	{
	"epoch": 0.9183169806777824,
	"grad_norm": 1.6431150436401367,
	"learning_rate": 3.5364418091641373e-06,
	"loss": 0.9428,
	"step": 1907
	},
	{
	"epoch": 0.9187985312706917,
	"grad_norm": 2.662550926208496,
	"learning_rate": 3.495442823491224e-06,
	"loss": 0.8064,
	"step": 1908
	},
	{
	"epoch": 0.9192800818636008,
	"grad_norm": 2.9634780883789062,
	"learning_rate": 3.4546786542840605e-06,
	"loss": 0.646,
	"step": 1909
	},
	{
	"epoch": 0.91976163245651,
	"grad_norm": 2.133837938308716,
	"learning_rate": 3.4141494007306816e-06,
	"loss": 0.419,
	"step": 1910
	},
	{
	"epoch": 0.9202431830494191,
	"grad_norm": 1.918086051940918,
	"learning_rate": 3.373855161447548e-06,
	"loss": 0.8063,
	"step": 1911
	},
	{
	"epoch": 0.9207247336423283,
	"grad_norm": 1.7618401050567627,
	"learning_rate": 3.333796034479242e-06,
	"loss": 0.5835,
	"step": 1912
	},
	{
	"epoch": 0.9212062842352374,
	"grad_norm": 3.4800868034362793,
	"learning_rate": 3.293972117298294e-06,
	"loss": 0.7598,
	"step": 1913
	},
	{
	"epoch": 0.9216878348281466,
	"grad_norm": 2.787062168121338,
	"learning_rate": 3.2543835068049255e-06,
	"loss": 0.4116,
	"step": 1914
	},
	{
	"epoch": 0.9221693854210558,
	"grad_norm": 2.82818341255188,
	"learning_rate": 3.21503029932676e-06,
	"loss": 0.7228,
	"step": 1915
	},
	{
	"epoch": 0.922650936013965,
	"grad_norm": 1.5212979316711426,
	"learning_rate": 3.1759125906186793e-06,
	"loss": 0.4513,
	"step": 1916
	},
	{
	"epoch": 0.9231324866068741,
	"grad_norm": 3.6214406490325928,
	"learning_rate": 3.137030475862535e-06,
	"loss": 1.0936,
	"step": 1917
	},
	{
	"epoch": 0.9236140371997833,
	"grad_norm": 2.9912490844726562,
	"learning_rate": 3.098384049666925e-06,
	"loss": 0.3953,
	"step": 1918
	},
	{
	"epoch": 0.9240955877926925,
	"grad_norm": 1.5457743406295776,
	"learning_rate": 3.059973406066963e-06,
	"loss": 0.4376,
	"step": 1919
	},
	{
	"epoch": 0.9245771383856016,
	"grad_norm": 3.1872262954711914,
	"learning_rate": 3.0217986385240537e-06,
	"loss": 0.5669,
	"step": 1920
	},
	{
	"epoch": 0.9250586889785108,
	"grad_norm": 2.594231605529785,
	"learning_rate": 2.983859839925662e-06,
	"loss": 0.6722,
	"step": 1921
	},
	{
	"epoch": 0.92554023957142,
	"grad_norm": 2.3845884799957275,
	"learning_rate": 2.94615710258509e-06,
	"loss": 0.8501,
	"step": 1922
	},
	{
	"epoch": 0.9260217901643292,
	"grad_norm": 3.532987117767334,
	"learning_rate": 2.908690518241275e-06,
	"loss": 0.544,
	"step": 1923
	},
	{
	"epoch": 0.9265033407572383,
	"grad_norm": 2.7089667320251465,
	"learning_rate": 2.8714601780584937e-06,
	"loss": 0.2766,
	"step": 1924
	},
	{
	"epoch": 0.9269848913501475,
	"grad_norm": 2.490257740020752,
	"learning_rate": 2.834466172626238e-06,
	"loss": 0.2841,
	"step": 1925
	},
	{
	"epoch": 0.9274664419430566,
	"grad_norm": 1.7176368236541748,
	"learning_rate": 2.7977085919589254e-06,
	"loss": 0.3851,
	"step": 1926
	},
	{
	"epoch": 0.9279479925359658,
	"grad_norm": 2.444751024246216,
	"learning_rate": 2.76118752549569e-06,
	"loss": 0.4477,
	"step": 1927
	},
	{
	"epoch": 0.9284295431288749,
	"grad_norm": 1.8582123517990112,
	"learning_rate": 2.7249030621001924e-06,
	"loss": 0.6806,
	"step": 1928
	},
	{
	"epoch": 0.9289110937217842,
	"grad_norm": 1.904004454612732,
	"learning_rate": 2.688855290060399e-06,
	"loss": 0.619,
	"step": 1929
	},
	{
	"epoch": 0.9293926443146933,
	"grad_norm": 2.0747194290161133,
	"learning_rate": 2.653044297088314e-06,
	"loss": 0.7597,
	"step": 1930
	},
	{
	"epoch": 0.9298741949076025,
	"grad_norm": 2.5333268642425537,
	"learning_rate": 2.6174701703198468e-06,
	"loss": 0.792,
	"step": 1931
	},
	{
	"epoch": 0.9303557455005117,
	"grad_norm": 2.685360908508301,
	"learning_rate": 2.5821329963145347e-06,
	"loss": 0.7001,
	"step": 1932
	},
	{
	"epoch": 0.9308372960934208,
	"grad_norm": 3.6770102977752686,
	"learning_rate": 2.547032861055376e-06,
	"loss": 0.6245,
	"step": 1933
	},
	{
	"epoch": 0.93131884668633,
	"grad_norm": 1.6112256050109863,
	"learning_rate": 2.5121698499485757e-06,
	"loss": 0.6619,
	"step": 1934
	},
	{
	"epoch": 0.9318003972792391,
	"grad_norm": 2.173598289489746,
	"learning_rate": 2.4775440478233993e-06,
	"loss": 0.7539,
	"step": 1935
	},
	{
	"epoch": 0.9322819478721484,
	"grad_norm": 2.8748226165771484,
	"learning_rate": 2.4431555389319074e-06,
	"loss": 0.6625,
	"step": 1936
	},
	{
	"epoch": 0.9327634984650575,
	"grad_norm": 2.1532397270202637,
	"learning_rate": 2.4090044069487784e-06,
	"loss": 0.9355,
	"step": 1937
	},
	{
	"epoch": 0.9332450490579667,
	"grad_norm": 4.027218341827393,
	"learning_rate": 2.3750907349711084e-06,
	"loss": 0.5421,
	"step": 1938
	},
	{
	"epoch": 0.9337265996508758,
	"grad_norm": 2.222975015640259,
	"learning_rate": 2.3414146055182106e-06,
	"loss": 0.8391,
	"step": 1939
	},
	{
	"epoch": 0.934208150243785,
	"grad_norm": 3.7854230403900146,
	"learning_rate": 2.307976100531384e-06,
	"loss": 0.4316,
	"step": 1940
	},
	{
	"epoch": 0.9346897008366941,
	"grad_norm": 1.5810474157333374,
	"learning_rate": 2.274775301373744e-06,
	"loss": 0.5887,
	"step": 1941
	},
	{
	"epoch": 0.9351712514296033,
	"grad_norm": 1.0357911586761475,
	"learning_rate": 2.241812288830003e-06,
	"loss": 0.594,
	"step": 1942
	},
	{
	"epoch": 0.9356528020225124,
	"grad_norm": 3.268486261367798,
	"learning_rate": 2.2090871431063253e-06,
	"loss": 0.4582,
	"step": 1943
	},
	{
	"epoch": 0.9361343526154217,
	"grad_norm": 2.9726314544677734,
	"learning_rate": 2.176599943830071e-06,
	"loss": 0.8042,
	"step": 1944
	},
	{
	"epoch": 0.9366159032083309,
	"grad_norm": 1.949102520942688,
	"learning_rate": 2.144350770049597e-06,
	"loss": 0.4591,
	"step": 1945
	},
	{
	"epoch": 0.93709745380124,
	"grad_norm": 2.1186540126800537,
	"learning_rate": 2.112339700234156e-06,
	"loss": 0.8347,
	"step": 1946
	},
	{
	"epoch": 0.9375790043941492,
	"grad_norm": 4.00067138671875,
	"learning_rate": 2.0805668122735767e-06,
	"loss": 0.5937,
	"step": 1947
	},
	{
	"epoch": 0.9380605549870583,
	"grad_norm": 2.5911052227020264,
	"learning_rate": 2.0490321834781833e-06,
	"loss": 0.5266,
	"step": 1948
	},
	{
	"epoch": 0.9385421055799675,
	"grad_norm": 1.883213758468628,
	"learning_rate": 2.0177358905785537e-06,
	"loss": 0.6082,
	"step": 1949
	},
	{
	"epoch": 0.9390236561728766,
	"grad_norm": 1.9823429584503174,
	"learning_rate": 1.986678009725329e-06,
	"loss": 0.5017,
	"step": 1950
	},
	{
	"epoch": 0.9395052067657859,
	"grad_norm": 1.3896251916885376,
	"learning_rate": 1.955858616489059e-06,
	"loss": 0.8347,
	"step": 1951
	},
	{
	"epoch": 0.939986757358695,
	"grad_norm": 6.104365348815918,
	"learning_rate": 1.9252777858599915e-06,
	"loss": 0.7993,
	"step": 1952
	},
	{
	"epoch": 0.9404683079516042,
	"grad_norm": 2.0376524925231934,
	"learning_rate": 1.8949355922479151e-06,
	"loss": 0.4812,
	"step": 1953
	},
	{
	"epoch": 0.9409498585445133,
	"grad_norm": 4.846323013305664,
	"learning_rate": 1.8648321094819287e-06,
	"loss": 0.5424,
	"step": 1954
	},
	{
	"epoch": 0.9414314091374225,
	"grad_norm": 1.7472106218338013,
	"learning_rate": 1.8349674108103288e-06,
	"loss": 0.56,
	"step": 1955
	},
	{
	"epoch": 0.9419129597303316,
	"grad_norm": 1.900530219078064,
	"learning_rate": 1.8053415689003872e-06,
	"loss": 0.6446,
	"step": 1956
	},
	{
	"epoch": 0.9423945103232408,
	"grad_norm": 4.014410972595215,
	"learning_rate": 1.7759546558381967e-06,
	"loss": 0.4733,
	"step": 1957
	},
	{
	"epoch": 0.94287606091615,
	"grad_norm": 1.2827335596084595,
	"learning_rate": 1.7468067431284707e-06,
	"loss": 0.4226,
	"step": 1958
	},
	{
	"epoch": 0.9433576115090592,
	"grad_norm": 1.8222554922103882,
	"learning_rate": 1.7178979016943764e-06,
	"loss": 0.3871,
	"step": 1959
	},
	{
	"epoch": 0.9438391621019684,
	"grad_norm": 2.0032083988189697,
	"learning_rate": 1.6892282018773908e-06,
	"loss": 0.6295,
	"step": 1960
	},
	{
	"epoch": 0.9443207126948775,
	"grad_norm": 3.710329055786133,
	"learning_rate": 1.6607977134370789e-06,
	"loss": 1.0069,
	"step": 1961
	},
	{
	"epoch": 0.9448022632877867,
	"grad_norm": 1.8269236087799072,
	"learning_rate": 1.6326065055510043e-06,
	"loss": 0.8347,
	"step": 1962
	},
	{
	"epoch": 0.9452838138806958,
	"grad_norm": 2.4813623428344727,
	"learning_rate": 1.6046546468144407e-06,
	"loss": 0.7641,
	"step": 1963
	},
	{
	"epoch": 0.945765364473605,
	"grad_norm": 3.3731300830841064,
	"learning_rate": 1.576942205240317e-06,
	"loss": 0.5967,
	"step": 1964
	},
	{
	"epoch": 0.9462469150665141,
	"grad_norm": 2.570126533508301,
	"learning_rate": 1.5494692482590057e-06,
	"loss": 0.5784,
	"step": 1965
	},
	{
	"epoch": 0.9467284656594234,
	"grad_norm": 2.100484609603882,
	"learning_rate": 1.522235842718156e-06,
	"loss": 0.4698,
	"step": 1966
	},
	{
	"epoch": 0.9472100162523325,
	"grad_norm": 2.475597620010376,
	"learning_rate": 1.4952420548825285e-06,
	"loss": 0.4489,
	"step": 1967
	},
	{
	"epoch": 0.9476915668452417,
	"grad_norm": 2.690720796585083,
	"learning_rate": 1.468487950433839e-06,
	"loss": 0.7515,
	"step": 1968
	},
	{
	"epoch": 0.9481731174381508,
	"grad_norm": 1.925948977470398,
	"learning_rate": 1.441973594470636e-06,
	"loss": 0.5974,
	"step": 1969
	},
	{
	"epoch": 0.94865466803106,
	"grad_norm": 3.1811184883117676,
	"learning_rate": 1.415699051508068e-06,
	"loss": 0.4421,
	"step": 1970
	},
	{
	"epoch": 0.9491362186239691,
	"grad_norm": 1.8990108966827393,
	"learning_rate": 1.3896643854777847e-06,
	"loss": 0.641,
	"step": 1971
	},
	{
	"epoch": 0.9496177692168783,
	"grad_norm": 0.7047016024589539,
	"learning_rate": 1.3638696597277679e-06,
	"loss": 0.273,
	"step": 1972
	},
	{
	"epoch": 0.9500993198097876,
	"grad_norm": 1.2902841567993164,
	"learning_rate": 1.3383149370221449e-06,
	"loss": 0.382,
	"step": 1973
	},
	{
	"epoch": 0.9505808704026967,
	"grad_norm": 2.200690984725952,
	"learning_rate": 1.313000279541121e-06,
	"loss": 0.677,
	"step": 1974
	},
	{
	"epoch": 0.9510624209956059,
	"grad_norm": 1.2776658535003662,
	"learning_rate": 1.287925748880703e-06,
	"loss": 0.4875,
	"step": 1975
	},
	{
	"epoch": 0.951543971588515,
	"grad_norm": 3.179433822631836,
	"learning_rate": 1.2630914060526522e-06,
	"loss": 0.7287,
	"step": 1976
	},
	{
	"epoch": 0.9520255221814242,
	"grad_norm": 1.7882678508758545,
	"learning_rate": 1.2384973114843101e-06,
	"loss": 0.6366,
	"step": 1977
	},
	{
	"epoch": 0.9525070727743333,
	"grad_norm": 2.128645420074463,
	"learning_rate": 1.2141435250184185e-06,
	"loss": 0.3159,
	"step": 1978
	},
	{
	"epoch": 0.9529886233672425,
	"grad_norm": 2.0305099487304688,
	"learning_rate": 1.1900301059130093e-06,
	"loss": 0.7256,
	"step": 1979
	},
	{
	"epoch": 0.9534701739601517,
	"grad_norm": 3.4359750747680664,
	"learning_rate": 1.1661571128412596e-06,
	"loss": 0.5136,
	"step": 1980
	},
	{
	"epoch": 0.9539517245530609,
	"grad_norm": 2.7383172512054443,
	"learning_rate": 1.142524603891315e-06,
	"loss": 1.0318,
	"step": 1981
	},
	{
	"epoch": 0.95443327514597,
	"grad_norm": 2.489351511001587,
	"learning_rate": 1.1191326365661892e-06,
	"loss": 0.6141,
	"step": 1982
	},
	{
	"epoch": 0.9549148257388792,
	"grad_norm": 2.3964600563049316,
	"learning_rate": 1.0959812677835968e-06,
	"loss": 0.4736,
	"step": 1983
	},
	{
	"epoch": 0.9553963763317883,
	"grad_norm": 5.277529716491699,
	"learning_rate": 1.0730705538758322e-06,
	"loss": 0.8058,
	"step": 1984
	},
	{
	"epoch": 0.9558779269246975,
	"grad_norm": 2.238236904144287,
	"learning_rate": 1.0504005505896141e-06,
	"loss": 0.5026,
	"step": 1985
	},
	{
	"epoch": 0.9563594775176067,
	"grad_norm": 1.928312063217163,
	"learning_rate": 1.0279713130859514e-06,
	"loss": 0.8141,
	"step": 1986
	},
	{
	"epoch": 0.9568410281105159,
	"grad_norm": 2.1432981491088867,
	"learning_rate": 1.005782895940055e-06,
	"loss": 1.2314,
	"step": 1987
	},
	{
	"epoch": 0.9573225787034251,
	"grad_norm": 5.187903881072998,
	"learning_rate": 9.838353531411272e-07,
	"loss": 0.5629,
	"step": 1988
	},
	{
	"epoch": 0.9578041292963342,
	"grad_norm": 2.8405728340148926,
	"learning_rate": 9.62128738092294e-07,
	"loss": 0.5505,
	"step": 1989
	},
	{
	"epoch": 0.9582856798892434,
	"grad_norm": 2.8619463443756104,
	"learning_rate": 9.406631036104508e-07,
	"loss": 0.3591,
	"step": 1990
	},
	{
	"epoch": 0.9587672304821525,
	"grad_norm": 2.144484758377075,
	"learning_rate": 9.194385019261287e-07,
	"loss": 0.7098,
	"step": 1991
	},
	{
	"epoch": 0.9592487810750617,
	"grad_norm": 1.5289490222930908,
	"learning_rate": 8.984549846833612e-07,
	"loss": 0.5621,
	"step": 1992
	},
	{
	"epoch": 0.9597303316679708,
	"grad_norm": 1.5724802017211914,
	"learning_rate": 8.777126029396065e-07,
	"loss": 0.3373,
	"step": 1993
	},
	{
	"epoch": 0.9602118822608801,
	"grad_norm": 3.027939796447754,
	"learning_rate": 8.572114071655479e-07,
	"loss": 0.4875,
	"step": 1994
	},
	{
	"epoch": 0.9606934328537892,
	"grad_norm": 2.6502504348754883,
	"learning_rate": 8.369514472450379e-07,
	"loss": 0.5928,
	"step": 1995
	},
	{
	"epoch": 0.9611749834466984,
	"grad_norm": 1.704206943511963,
	"learning_rate": 8.169327724749543e-07,
	"loss": 0.642,
	"step": 1996
	},
	{
	"epoch": 0.9616565340396075,
	"grad_norm": 2.1913046836853027,
	"learning_rate": 7.971554315650442e-07,
	"loss": 0.6125,
	"step": 1997
	},
	{
	"epoch": 0.9621380846325167,
	"grad_norm": 2.5087268352508545,
	"learning_rate": 7.776194726378583e-07,
	"loss": 0.7641,
	"step": 1998
	},
	{
	"epoch": 0.9626196352254258,
	"grad_norm": 2.2763671875,
	"learning_rate": 7.583249432286277e-07,
	"loss": 0.7061,
	"step": 1999
	},
	{
	"epoch": 0.963101185818335,
	"grad_norm": 3.4482977390289307,
	"learning_rate": 7.392718902850981e-07,
	"loss": 0.7786,
	"step": 2000
	},
	{
	"epoch": 0.9635827364112443,
	"grad_norm": 3.177415132522583,
	"learning_rate": 7.204603601674853e-07,
	"loss": 0.6386,
	"step": 2001
	},
	{
	"epoch": 0.9640642870041534,
	"grad_norm": 3.915245771408081,
	"learning_rate": 7.018903986483083e-07,
	"loss": 0.5658,
	"step": 2002
	},
	{
	"epoch": 0.9645458375970626,
	"grad_norm": 1.8997553586959839,
	"learning_rate": 6.835620509122897e-07,
	"loss": 0.571,
	"step": 2003
	},
	{
	"epoch": 0.9650273881899717,
	"grad_norm": 3.1082587242126465,
	"learning_rate": 6.65475361556267e-07,
	"loss": 0.6074,
	"step": 2004
	},
	{
	"epoch": 0.9655089387828809,
	"grad_norm": 2.994706153869629,
	"learning_rate": 6.47630374589081e-07,
	"loss": 0.7855,
	"step": 2005
	},
	{
	"epoch": 0.96599048937579,
	"grad_norm": 1.8663792610168457,
	"learning_rate": 6.300271334314434e-07,
	"loss": 0.2545,
	"step": 2006
	},
	{
	"epoch": 0.9664720399686992,
	"grad_norm": 1.586185097694397,
	"learning_rate": 6.126656809158359e-07,
	"loss": 0.8023,
	"step": 2007
	},
	{
	"epoch": 0.9669535905616083,
	"grad_norm": 1.375166416168213,
	"learning_rate": 5.955460592864337e-07,
	"loss": 0.4675,
	"step": 2008
	},
	{
	"epoch": 0.9674351411545176,
	"grad_norm": 2.125596761703491,
	"learning_rate": 5.78668310198982e-07,
	"loss": 0.7644,
	"step": 2009
	},
	{
	"epoch": 0.9679166917474267,
	"grad_norm": 1.8324317932128906,
	"learning_rate": 5.620324747207084e-07,
	"loss": 0.4852,
	"step": 2010
	},
	{
	"epoch": 0.9683982423403359,
	"grad_norm": 1.296980619430542,
	"learning_rate": 5.456385933301777e-07,
	"loss": 0.1305,
	"step": 2011
	},
	{
	"epoch": 0.968879792933245,
	"grad_norm": 3.3267123699188232,
	"learning_rate": 5.294867059172592e-07,
	"loss": 0.744,
	"step": 2012
	},
	{
	"epoch": 0.9693613435261542,
	"grad_norm": 1.4626914262771606,
	"learning_rate": 5.135768517829819e-07,
	"loss": 0.6947,
	"step": 2013
	},
	{
	"epoch": 0.9698428941190634,
	"grad_norm": 2.6075901985168457,
	"learning_rate": 4.979090696394795e-07,
	"loss": 0.4833,
	"step": 2014
	},
	{
	"epoch": 0.9703244447119725,
	"grad_norm": 1.679829478263855,
	"learning_rate": 4.824833976098453e-07,
	"loss": 0.6612,
	"step": 2015
	},
	{
	"epoch": 0.9708059953048818,
	"grad_norm": 3.0018815994262695,
	"learning_rate": 4.6729987322807757e-07,
	"loss": 0.8277,
	"step": 2016
	},
	{
	"epoch": 0.9712875458977909,
	"grad_norm": 1.4263994693756104,
	"learning_rate": 4.523585334389679e-07,
	"loss": 0.6912,
	"step": 2017
	},
	{
	"epoch": 0.9717690964907001,
	"grad_norm": 3.329765558242798,
	"learning_rate": 4.3765941459804614e-07,
	"loss": 0.7018,
	"step": 2018
	},
	{
	"epoch": 0.9722506470836092,
	"grad_norm": 3.2026407718658447,
	"learning_rate": 4.232025524714356e-07,
	"loss": 0.7325,
	"step": 2019
	},
	{
	"epoch": 0.9727321976765184,
	"grad_norm": 2.716898202896118,
	"learning_rate": 4.0898798223582e-07,
	"loss": 0.6388,
	"step": 2020
	},
	{
	"epoch": 0.9732137482694275,
	"grad_norm": 1.2587398290634155,
	"learning_rate": 3.950157384783104e-07,
	"loss": 0.7916,
	"step": 2021
	},
	{
	"epoch": 0.9736952988623367,
	"grad_norm": 1.3127981424331665,
	"learning_rate": 3.8128585519640046e-07,
	"loss": 0.4851,
	"step": 2022
	},
	{
	"epoch": 0.9741768494552459,
	"grad_norm": 2.5416173934936523,
	"learning_rate": 3.677983657978779e-07,
	"loss": 0.6746,
	"step": 2023
	},
	{
	"epoch": 0.9746584000481551,
	"grad_norm": 2.9663803577423096,
	"learning_rate": 3.545533031007131e-07,
	"loss": 0.7857,
	"step": 2024
	},
	{
	"epoch": 0.9751399506410642,
	"grad_norm": 4.189087867736816,
	"learning_rate": 3.415506993330153e-07,
	"loss": 0.4633,
	"step": 2025
	},
	{
	"epoch": 0.9756215012339734,
	"grad_norm": 3.007153034210205,
	"learning_rate": 3.2879058613292105e-07,
	"loss": 0.64,
	"step": 2026
	},
	{
	"epoch": 0.9761030518268826,
	"grad_norm": 1.7615995407104492,
	"learning_rate": 3.1627299454856095e-07,
	"loss": 0.6468,
	"step": 2027
	},
	{
	"epoch": 0.9765846024197917,
	"grad_norm": 3.9861655235290527,
	"learning_rate": 3.0399795503793793e-07,
	"loss": 1.0444,
	"step": 2028
	},
	{
	"epoch": 0.9770661530127009,
	"grad_norm": 1.9470893144607544,
	"learning_rate": 2.9196549746888235e-07,
	"loss": 0.516,
	"step": 2029
	},
	{
	"epoch": 0.97754770360561,
	"grad_norm": 1.5506056547164917,
	"learning_rate": 2.801756511189524e-07,
	"loss": 0.2129,
	"step": 2030
	},
	{
	"epoch": 0.9780292541985193,
	"grad_norm": 4.4033331871032715,
	"learning_rate": 2.686284446754006e-07,
	"loss": 0.7624,
	"step": 2031
	},
	{
	"epoch": 0.9785108047914284,
	"grad_norm": 2.394306182861328,
	"learning_rate": 2.573239062350963e-07,
	"loss": 0.8467,
	"step": 2032
	},
	{
	"epoch": 0.9789923553843376,
	"grad_norm": 2.18649959564209,
	"learning_rate": 2.4626206330440326e-07,
	"loss": 0.9826,
	"step": 2033
	},
	{
	"epoch": 0.9794739059772467,
	"grad_norm": 2.4696927070617676,
	"learning_rate": 2.3544294279918e-07,
	"loss": 0.6252,
	"step": 2034
	},
	{
	"epoch": 0.9799554565701559,
	"grad_norm": 0.9971011877059937,
	"learning_rate": 2.2486657104471286e-07,
	"loss": 0.6913,
	"step": 2035
	},
	{
	"epoch": 0.980437007163065,
	"grad_norm": 2.627206563949585,
	"learning_rate": 2.1453297377557191e-07,
	"loss": 0.3573,
	"step": 2036
	},
	{
	"epoch": 0.9809185577559743,
	"grad_norm": 2.772587299346924,
	"learning_rate": 2.044421761356552e-07,
	"loss": 0.8796,
	"step": 2037
	},
	{
	"epoch": 0.9814001083488834,
	"grad_norm": 1.3121188879013062,
	"learning_rate": 1.9459420267804452e-07,
	"loss": 0.7171,
	"step": 2038
	},
	{
	"epoch": 0.9818816589417926,
	"grad_norm": 3.698637008666992,
	"learning_rate": 1.8498907736499426e-07,
	"loss": 0.6321,
	"step": 2039
	},
	{
	"epoch": 0.9823632095347017,
	"grad_norm": 2.19526743888855,
	"learning_rate": 1.7562682356786487e-07,
	"loss": 0.961,
	"step": 2040
	},
	{
	"epoch": 0.9828447601276109,
	"grad_norm": 3.142582893371582,
	"learning_rate": 1.665074640670228e-07,
	"loss": 0.9126,
	"step": 2041
	},
	{
	"epoch": 0.9833263107205201,
	"grad_norm": 2.962191343307495,
	"learning_rate": 1.576310210518517e-07,
	"loss": 0.566,
	"step": 2042
	},
	{
	"epoch": 0.9838078613134292,
	"grad_norm": 2.7862355709075928,
	"learning_rate": 1.489975161206636e-07,
	"loss": 0.5524,
	"step": 2043
	},
	{
	"epoch": 0.9842894119063385,
	"grad_norm": 2.1677451133728027,
	"learning_rate": 1.406069702806323e-07,
	"loss": 0.4326,
	"step": 2044
	},
	{
	"epoch": 0.9847709624992476,
	"grad_norm": 2.6661462783813477,
	"learning_rate": 1.324594039477822e-07,
	"loss": 1.088,
	"step": 2045
	},
	{
	"epoch": 0.9852525130921568,
	"grad_norm": 1.7038089036941528,
	"learning_rate": 1.2455483694689962e-07,
	"loss": 0.8435,
	"step": 2046
	},
	{
	"epoch": 0.9857340636850659,
	"grad_norm": 2.608567237854004,
	"learning_rate": 1.1689328851151038e-07,
	"loss": 0.7363,
	"step": 2047
	},
	{
	"epoch": 0.9862156142779751,
	"grad_norm": 1.127517580986023,
	"learning_rate": 1.0947477728381339e-07,
	"loss": 0.9246,
	"step": 2048
	},
	{
	"epoch": 0.9866971648708842,
	"grad_norm": 3.015097141265869,
	"learning_rate": 1.0229932131465836e-07,
	"loss": 0.8423,
	"step": 2049
	},
	{
	"epoch": 0.9871787154637934,
	"grad_norm": 2.7446672916412354,
	"learning_rate": 9.536693806347919e-08,
	"loss": 1.2947,
	"step": 2050
	},
	{
	"epoch": 0.9876602660567025,
	"grad_norm": 2.5686793327331543,
	"learning_rate": 8.867764439826065e-08,
	"loss": 0.6687,
	"step": 2051
	},
	{
	"epoch": 0.9881418166496118,
	"grad_norm": 1.1108747720718384,
	"learning_rate": 8.223145659550513e-08,
	"loss": 0.7631,
	"step": 2052
	},
	{
	"epoch": 0.9886233672425209,
	"grad_norm": 2.3981330394744873,
	"learning_rate": 7.602839034017706e-08,
	"loss": 0.6197,
	"step": 2053
	},
	{
	"epoch": 0.9891049178354301,
	"grad_norm": 3.0359103679656982,
	"learning_rate": 7.006846072568074e-08,
	"loss": 0.5404,
	"step": 2054
	},
	{
	"epoch": 0.9895864684283393,
	"grad_norm": 2.437429189682007,
	"learning_rate": 6.435168225381594e-08,
	"loss": 0.5185,
	"step": 2055
	},
	{
	"epoch": 0.9900680190212484,
	"grad_norm": 1.1326676607131958,
	"learning_rate": 5.887806883474456e-08,
	"loss": 0.2804,
	"step": 2056
	},
	{
	"epoch": 0.9905495696141576,
	"grad_norm": 3.8812267780303955,
	"learning_rate": 5.364763378694626e-08,
	"loss": 0.6195,
	"step": 2057
	},
	{
	"epoch": 0.9910311202070667,
	"grad_norm": 2.3206794261932373,
	"learning_rate": 4.8660389837207334e-08,
	"loss": 0.9294,
	"step": 2058
	},
	{
	"epoch": 0.991512670799976,
	"grad_norm": 1.1168718338012695,
	"learning_rate": 4.391634912056519e-08,
	"loss": 0.8106,
	"step": 2059
	},
	{
	"epoch": 0.9919942213928851,
	"grad_norm": 1.8295259475708008,
	"learning_rate": 3.9415523180297286e-08,
	"loss": 0.4507,
	"step": 2060
	},
	{
	"epoch": 0.9924757719857943,
	"grad_norm": 2.970205068588257,
	"learning_rate": 3.515792296789888e-08,
	"loss": 0.3523,
	"step": 2061
	},
	{
	"epoch": 0.9929573225787034,
	"grad_norm": 1.4164469242095947,
	"learning_rate": 3.114355884301645e-08,
	"loss": 0.7157,
	"step": 2062
	},
	{
	"epoch": 0.9934388731716126,
	"grad_norm": 2.132336378097534,
	"learning_rate": 2.7372440573469883e-08,
	"loss": 0.5431,
	"step": 2063
	},
	{
	"epoch": 0.9939204237645217,
	"grad_norm": 1.5810383558273315,
	"learning_rate": 2.384457733520806e-08,
	"loss": 0.2308,
	"step": 2064
	},
	{
	"epoch": 0.9944019743574309,
	"grad_norm": 1.8615809679031372,
	"learning_rate": 2.0559977712297785e-08,
	"loss": 0.3219,
	"step": 2065
	},
	{
	"epoch": 0.99488352495034,
	"grad_norm": 3.2156498432159424,
	"learning_rate": 1.7518649696857126e-08,
	"loss": 0.835,
	"step": 2066
	},
	{
	"epoch": 0.9953650755432493,
	"grad_norm": 1.9598007202148438,
	"learning_rate": 1.4720600689110963e-08,
	"loss": 0.536,
	"step": 2067
	},
	{
	"epoch": 0.9958466261361584,
	"grad_norm": 0.9099422097206116,
	"learning_rate": 1.216583749731326e-08,
	"loss": 0.3856,
	"step": 2068
	},
	{
	"epoch": 0.9963281767290676,
	"grad_norm": 2.4464566707611084,
	"learning_rate": 9.854366337758159e-09,
	"loss": 0.9042,
	"step": 2069
	},
	{
	"epoch": 0.9968097273219768,
	"grad_norm": 1.8802378177642822,
	"learning_rate": 7.786192834746686e-09,
	"loss": 0.8939,
	"step": 2070
	},
	{
	"epoch": 0.9972912779148859,
	"grad_norm": 2.2916386127471924,
	"learning_rate": 5.961322020608951e-09,
	"loss": 0.7196,
	"step": 2071
	},
	{
	"epoch": 0.9977728285077951,
	"grad_norm": 0.9568644762039185,
	"learning_rate": 4.3797583356264275e-09,
	"loss": 0.1672,
	"step": 2072
	},
	{
	"epoch": 0.9982543791007042,
	"grad_norm": 1.4971035718917847,
	"learning_rate": 3.0415056281096755e-09,
	"loss": 0.4794,
	"step": 2073
	},
	{
	"epoch": 0.9987359296936135,
	"grad_norm": 3.3616390228271484,
	"learning_rate": 1.9465671543095197e-09,
	"loss": 1.0098,
	"step": 2074
	},
	{
	"epoch": 0.9992174802865226,
	"grad_norm": 3.415937662124634,
	"learning_rate": 1.094945578439255e-09,
	"loss": 1.0445,
	"step": 2075
	},
	{
	"epoch": 0.9996990308794318,
	"grad_norm": 3.9460153579711914,
	"learning_rate": 4.866429726857469e-10,
	"loss": 1.0316,
	"step": 2076
	},
	{
	"epoch": 1.0,
	"grad_norm": 2.570456027984619,
	"learning_rate": 1.2166081717612797e-10,
	"loss": 0.9203,
	"step": 2077
	},
	{
	"epoch": 1.0,
	"step": 2077,
	"total_flos": 6.565805779711427e+17,
	"train_loss": 0.7756532824952038,
	"train_runtime": 4587.6806,
	"train_samples_per_second": 7.242,
	"train_steps_per_second": 0.453
	}
	],
	"logging_steps": 1,
	"max_steps": 2077,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 2400000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 6.565805779711427e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}