| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9958368026644462, |
| "eval_steps": 500, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0033305578684429643, |
| "grad_norm": 2.1135175063937415, |
| "learning_rate": 1.3333333333333334e-07, |
| "loss": 1.6413, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.01, |
| "memory/max_mem_allocated(gib)": 56.7, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006661115736885929, |
| "grad_norm": 2.0196598114735065, |
| "learning_rate": 2.6666666666666667e-07, |
| "loss": 1.6382, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.009991673605328892, |
| "grad_norm": 2.037892565480129, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.6536, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.013322231473771857, |
| "grad_norm": 1.980939710918612, |
| "learning_rate": 5.333333333333333e-07, |
| "loss": 1.6712, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01665278934221482, |
| "grad_norm": 2.0553629965683196, |
| "learning_rate": 6.666666666666666e-07, |
| "loss": 1.591, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.019983347210657785, |
| "grad_norm": 2.1321442384194493, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.6275, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02331390507910075, |
| "grad_norm": 2.0224554441924147, |
| "learning_rate": 9.333333333333333e-07, |
| "loss": 1.6802, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.026644462947543714, |
| "grad_norm": 2.0657857283218144, |
| "learning_rate": 1.0666666666666667e-06, |
| "loss": 1.5768, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02997502081598668, |
| "grad_norm": 2.0104233987359206, |
| "learning_rate": 1.2e-06, |
| "loss": 1.6026, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03330557868442964, |
| "grad_norm": 2.098692014200002, |
| "learning_rate": 1.3333333333333332e-06, |
| "loss": 1.682, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03663613655287261, |
| "grad_norm": 2.0879014611232116, |
| "learning_rate": 1.4666666666666667e-06, |
| "loss": 1.6368, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03996669442131557, |
| "grad_norm": 2.0701872996726443, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.629, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04329725228975854, |
| "grad_norm": 2.105064067100562, |
| "learning_rate": 1.7333333333333332e-06, |
| "loss": 1.6568, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0466278101582015, |
| "grad_norm": 2.1084872575258733, |
| "learning_rate": 1.8666666666666667e-06, |
| "loss": 1.597, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04995836802664446, |
| "grad_norm": 2.0616161807879965, |
| "learning_rate": 2e-06, |
| "loss": 1.6008, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05328892589508743, |
| "grad_norm": 1.92970469468585, |
| "learning_rate": 2.1333333333333334e-06, |
| "loss": 1.6815, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05661948376353039, |
| "grad_norm": 2.0527427262697855, |
| "learning_rate": 2.266666666666667e-06, |
| "loss": 1.6873, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05995004163197336, |
| "grad_norm": 1.9622305052083537, |
| "learning_rate": 2.4e-06, |
| "loss": 1.6334, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06328059950041633, |
| "grad_norm": 1.9979390122219929, |
| "learning_rate": 2.533333333333333e-06, |
| "loss": 1.6623, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06661115736885928, |
| "grad_norm": 2.0311968068371367, |
| "learning_rate": 2.6666666666666664e-06, |
| "loss": 1.607, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06994171523730225, |
| "grad_norm": 1.968344786501615, |
| "learning_rate": 2.8e-06, |
| "loss": 1.6087, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.07327227310574522, |
| "grad_norm": 2.1145916019697952, |
| "learning_rate": 2.9333333333333333e-06, |
| "loss": 1.5926, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07660283097418817, |
| "grad_norm": 2.0129475295050496, |
| "learning_rate": 3.066666666666667e-06, |
| "loss": 1.6171, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.07993338884263114, |
| "grad_norm": 1.8817164699193898, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.6552, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08326394671107411, |
| "grad_norm": 1.9306634203997992, |
| "learning_rate": 3.333333333333333e-06, |
| "loss": 1.6288, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08659450457951708, |
| "grad_norm": 1.8839715974459492, |
| "learning_rate": 3.4666666666666664e-06, |
| "loss": 1.5772, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08992506244796003, |
| "grad_norm": 1.9004207576591563, |
| "learning_rate": 3.6e-06, |
| "loss": 1.6019, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.093255620316403, |
| "grad_norm": 1.8508009396241183, |
| "learning_rate": 3.7333333333333333e-06, |
| "loss": 1.6347, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09658617818484597, |
| "grad_norm": 1.6521817439090796, |
| "learning_rate": 3.866666666666666e-06, |
| "loss": 1.6425, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09991673605328892, |
| "grad_norm": 1.5825237347457706, |
| "learning_rate": 4e-06, |
| "loss": 1.4999, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10324729392173189, |
| "grad_norm": 1.4406934972277887, |
| "learning_rate": 4.133333333333333e-06, |
| "loss": 1.537, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.10657785179017486, |
| "grad_norm": 1.3686252476380623, |
| "learning_rate": 4.266666666666667e-06, |
| "loss": 1.5054, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10990840965861781, |
| "grad_norm": 1.190989973623068, |
| "learning_rate": 4.399999999999999e-06, |
| "loss": 1.5673, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.11323896752706078, |
| "grad_norm": 1.0921718147815354, |
| "learning_rate": 4.533333333333334e-06, |
| "loss": 1.5383, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11656952539550375, |
| "grad_norm": 0.9720091603452963, |
| "learning_rate": 4.666666666666666e-06, |
| "loss": 1.5698, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11990008326394672, |
| "grad_norm": 0.8634677699569875, |
| "learning_rate": 4.8e-06, |
| "loss": 1.5286, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.12323064113238967, |
| "grad_norm": 0.7720350215206407, |
| "learning_rate": 4.933333333333333e-06, |
| "loss": 1.5897, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.12656119900083265, |
| "grad_norm": 0.7351438783567595, |
| "learning_rate": 5.066666666666666e-06, |
| "loss": 1.471, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1298917568692756, |
| "grad_norm": 0.6436527036047347, |
| "learning_rate": 5.2e-06, |
| "loss": 1.5523, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.13322231473771856, |
| "grad_norm": 0.5914433909472115, |
| "learning_rate": 5.333333333333333e-06, |
| "loss": 1.5169, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13655287260616153, |
| "grad_norm": 0.5708899134928395, |
| "learning_rate": 5.466666666666667e-06, |
| "loss": 1.4727, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1398834304746045, |
| "grad_norm": 0.562979308505682, |
| "learning_rate": 5.6e-06, |
| "loss": 1.5101, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.14321398834304747, |
| "grad_norm": 0.5333098859373814, |
| "learning_rate": 5.733333333333332e-06, |
| "loss": 1.5053, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.14654454621149043, |
| "grad_norm": 0.518700589700869, |
| "learning_rate": 5.866666666666667e-06, |
| "loss": 1.5522, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1498751040799334, |
| "grad_norm": 0.5123134702021855, |
| "learning_rate": 5.999999999999999e-06, |
| "loss": 1.4581, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.15320566194837634, |
| "grad_norm": 0.5233023339387923, |
| "learning_rate": 6.133333333333334e-06, |
| "loss": 1.4503, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1565362198168193, |
| "grad_norm": 0.4984395351799732, |
| "learning_rate": 6.266666666666666e-06, |
| "loss": 1.4698, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.15986677768526228, |
| "grad_norm": 0.48116733820243823, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 1.5399, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.16319733555370525, |
| "grad_norm": 0.4917159508967155, |
| "learning_rate": 6.533333333333333e-06, |
| "loss": 1.4674, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.16652789342214822, |
| "grad_norm": 0.4631697484027289, |
| "learning_rate": 6.666666666666666e-06, |
| "loss": 1.5063, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.16985845129059118, |
| "grad_norm": 0.4506097490342786, |
| "learning_rate": 6.8e-06, |
| "loss": 1.4787, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.17318900915903415, |
| "grad_norm": 0.4808943580292107, |
| "learning_rate": 6.933333333333333e-06, |
| "loss": 1.5355, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1765195670274771, |
| "grad_norm": 0.4353655566788618, |
| "learning_rate": 7.066666666666667e-06, |
| "loss": 1.4545, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.17985012489592006, |
| "grad_norm": 0.42881276266179474, |
| "learning_rate": 7.2e-06, |
| "loss": 1.4726, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.18318068276436303, |
| "grad_norm": 0.4243886425058161, |
| "learning_rate": 7.333333333333332e-06, |
| "loss": 1.5364, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.186511240632806, |
| "grad_norm": 0.4078516232902407, |
| "learning_rate": 7.466666666666667e-06, |
| "loss": 1.5441, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.18984179850124897, |
| "grad_norm": 0.39819776399963164, |
| "learning_rate": 7.599999999999999e-06, |
| "loss": 1.5394, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.19317235636969193, |
| "grad_norm": 0.3993196408685462, |
| "learning_rate": 7.733333333333333e-06, |
| "loss": 1.4883, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1965029142381349, |
| "grad_norm": 0.3992257742362516, |
| "learning_rate": 7.866666666666667e-06, |
| "loss": 1.4933, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.19983347210657784, |
| "grad_norm": 0.39782096872195477, |
| "learning_rate": 8e-06, |
| "loss": 1.4729, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2031640299750208, |
| "grad_norm": 0.38436305350726707, |
| "learning_rate": 7.99851604526062e-06, |
| "loss": 1.4777, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.20649458784346378, |
| "grad_norm": 0.3782583438569582, |
| "learning_rate": 7.99702712746191e-06, |
| "loss": 1.535, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.20982514571190675, |
| "grad_norm": 0.3910775225847348, |
| "learning_rate": 7.995533221663874e-06, |
| "loss": 1.4643, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.21315570358034971, |
| "grad_norm": 0.37376830993433585, |
| "learning_rate": 7.994034302759135e-06, |
| "loss": 1.4265, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.21648626144879268, |
| "grad_norm": 0.37205196740456564, |
| "learning_rate": 7.99253034547152e-06, |
| "loss": 1.484, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.21981681931723562, |
| "grad_norm": 0.37012462931708767, |
| "learning_rate": 7.991021324354658e-06, |
| "loss": 1.4668, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2231473771856786, |
| "grad_norm": 0.36609254541987934, |
| "learning_rate": 7.989507213790519e-06, |
| "loss": 1.4512, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.22647793505412156, |
| "grad_norm": 0.36389643029866026, |
| "learning_rate": 7.987987987987988e-06, |
| "loss": 1.4666, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.22980849292256453, |
| "grad_norm": 0.3835942907380993, |
| "learning_rate": 7.986463620981386e-06, |
| "loss": 1.5581, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.2331390507910075, |
| "grad_norm": 0.3709505537460329, |
| "learning_rate": 7.984934086629002e-06, |
| "loss": 1.4942, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.23646960865945046, |
| "grad_norm": 0.3734284694232727, |
| "learning_rate": 7.983399358611582e-06, |
| "loss": 1.5449, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.23980016652789343, |
| "grad_norm": 0.38168285139161445, |
| "learning_rate": 7.981859410430838e-06, |
| "loss": 1.4972, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.24313072439633637, |
| "grad_norm": 0.4166060644404285, |
| "learning_rate": 7.98031421540791e-06, |
| "loss": 1.5273, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.24646128226477934, |
| "grad_norm": 0.3721773268353121, |
| "learning_rate": 7.978763746681835e-06, |
| "loss": 1.5459, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.2497918401332223, |
| "grad_norm": 0.3785109036596187, |
| "learning_rate": 7.977207977207977e-06, |
| "loss": 1.5221, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2531223980016653, |
| "grad_norm": 0.3798544993330551, |
| "learning_rate": 7.975646879756469e-06, |
| "loss": 1.447, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.25645295587010825, |
| "grad_norm": 0.3676458399839075, |
| "learning_rate": 7.974080426910615e-06, |
| "loss": 1.552, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2597835137385512, |
| "grad_norm": 0.3906392619486636, |
| "learning_rate": 7.972508591065292e-06, |
| "loss": 1.5524, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2631140716069942, |
| "grad_norm": 0.4086059406499793, |
| "learning_rate": 7.97093134442532e-06, |
| "loss": 1.5347, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2664446294754371, |
| "grad_norm": 0.37866878925235237, |
| "learning_rate": 7.969348659003832e-06, |
| "loss": 1.4263, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2697751873438801, |
| "grad_norm": 0.36630829174020924, |
| "learning_rate": 7.96776050662061e-06, |
| "loss": 1.4882, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.27310574521232306, |
| "grad_norm": 0.3572953266857883, |
| "learning_rate": 7.966166858900421e-06, |
| "loss": 1.4996, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.27643630308076605, |
| "grad_norm": 0.37034991529495037, |
| "learning_rate": 7.964567687271327e-06, |
| "loss": 1.4558, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.279766860949209, |
| "grad_norm": 0.39454254411893813, |
| "learning_rate": 7.962962962962963e-06, |
| "loss": 1.481, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.28309741881765194, |
| "grad_norm": 0.36598142148212737, |
| "learning_rate": 7.961352657004831e-06, |
| "loss": 1.4647, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.28642797668609493, |
| "grad_norm": 0.40131025635004997, |
| "learning_rate": 7.959736740224545e-06, |
| "loss": 1.486, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.28975853455453787, |
| "grad_norm": 0.36813808559204136, |
| "learning_rate": 7.958115183246073e-06, |
| "loss": 1.5104, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.29308909242298087, |
| "grad_norm": 0.4399054897841581, |
| "learning_rate": 7.956487956487956e-06, |
| "loss": 1.5511, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2964196502914238, |
| "grad_norm": 0.4137480663423791, |
| "learning_rate": 7.95485503016151e-06, |
| "loss": 1.5431, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2997502081598668, |
| "grad_norm": 0.39082659570701933, |
| "learning_rate": 7.953216374269006e-06, |
| "loss": 1.5094, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.30308076602830974, |
| "grad_norm": 0.4222547479458042, |
| "learning_rate": 7.951571958601836e-06, |
| "loss": 1.528, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.3064113238967527, |
| "grad_norm": 0.4565848989524497, |
| "learning_rate": 7.949921752738653e-06, |
| "loss": 1.4345, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3097418817651957, |
| "grad_norm": 0.3909465393349193, |
| "learning_rate": 7.948265726043504e-06, |
| "loss": 1.4885, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3130724396336386, |
| "grad_norm": 0.40399439020361494, |
| "learning_rate": 7.946603847663918e-06, |
| "loss": 1.4836, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3164029975020816, |
| "grad_norm": 0.3940685084379771, |
| "learning_rate": 7.944936086529007e-06, |
| "loss": 1.4894, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.31973355537052456, |
| "grad_norm": 0.36455393248573603, |
| "learning_rate": 7.943262411347517e-06, |
| "loss": 1.4765, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.32306411323896755, |
| "grad_norm": 0.42216219555871026, |
| "learning_rate": 7.94158279060588e-06, |
| "loss": 1.505, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.3263946711074105, |
| "grad_norm": 0.3833612688097333, |
| "learning_rate": 7.93989719256623e-06, |
| "loss": 1.4803, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.32972522897585343, |
| "grad_norm": 0.3793312412105176, |
| "learning_rate": 7.938205585264408e-06, |
| "loss": 1.4721, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.33305578684429643, |
| "grad_norm": 0.6231405275420779, |
| "learning_rate": 7.936507936507936e-06, |
| "loss": 1.4941, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.33638634471273937, |
| "grad_norm": 0.39916108511305454, |
| "learning_rate": 7.934804213873981e-06, |
| "loss": 1.5113, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.33971690258118237, |
| "grad_norm": 0.39832888981715536, |
| "learning_rate": 7.933094384707288e-06, |
| "loss": 1.4616, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3430474604496253, |
| "grad_norm": 0.35554379353616694, |
| "learning_rate": 7.931378416118093e-06, |
| "loss": 1.4754, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3463780183180683, |
| "grad_norm": 0.3778786204869107, |
| "learning_rate": 7.929656274980016e-06, |
| "loss": 1.5204, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.34970857618651124, |
| "grad_norm": 0.3979509981477904, |
| "learning_rate": 7.927927927927927e-06, |
| "loss": 1.4972, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3530391340549542, |
| "grad_norm": 0.3829152377900939, |
| "learning_rate": 7.926193341355797e-06, |
| "loss": 1.4852, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3563696919233972, |
| "grad_norm": 0.3783230292732417, |
| "learning_rate": 7.924452481414507e-06, |
| "loss": 1.4605, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3597002497918401, |
| "grad_norm": 0.3702225917786687, |
| "learning_rate": 7.922705314009662e-06, |
| "loss": 1.4751, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3630308076602831, |
| "grad_norm": 0.423076463648796, |
| "learning_rate": 7.920951804799353e-06, |
| "loss": 1.5043, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.36636136552872606, |
| "grad_norm": 0.4015775298544568, |
| "learning_rate": 7.919191919191919e-06, |
| "loss": 1.4993, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.36969192339716905, |
| "grad_norm": 0.395772531232646, |
| "learning_rate": 7.917425622343655e-06, |
| "loss": 1.5074, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.373022481265612, |
| "grad_norm": 0.4063035915678222, |
| "learning_rate": 7.915652879156528e-06, |
| "loss": 1.5005, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.37635303913405493, |
| "grad_norm": 0.3749049780160411, |
| "learning_rate": 7.913873654275848e-06, |
| "loss": 1.5016, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.37968359700249793, |
| "grad_norm": 0.40207184709524446, |
| "learning_rate": 7.912087912087911e-06, |
| "loss": 1.5112, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.38301415487094087, |
| "grad_norm": 0.3761596500147066, |
| "learning_rate": 7.910295616717634e-06, |
| "loss": 1.4226, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.38634471273938387, |
| "grad_norm": 0.34919530357614503, |
| "learning_rate": 7.908496732026144e-06, |
| "loss": 1.454, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3896752706078268, |
| "grad_norm": 0.3783249892281946, |
| "learning_rate": 7.906691221608348e-06, |
| "loss": 1.3926, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3930058284762698, |
| "grad_norm": 0.38789047851939196, |
| "learning_rate": 7.904879048790487e-06, |
| "loss": 1.5148, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.39633638634471274, |
| "grad_norm": 0.38028310552438055, |
| "learning_rate": 7.903060176627645e-06, |
| "loss": 1.5512, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3996669442131557, |
| "grad_norm": 0.3557857851285413, |
| "learning_rate": 7.901234567901235e-06, |
| "loss": 1.5145, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4029975020815987, |
| "grad_norm": 0.3648126505851961, |
| "learning_rate": 7.89940218511647e-06, |
| "loss": 1.4616, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.4063280599500416, |
| "grad_norm": 0.3518641114757544, |
| "learning_rate": 7.897562990499793e-06, |
| "loss": 1.4444, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.4096586178184846, |
| "grad_norm": 0.3812409352914946, |
| "learning_rate": 7.895716945996276e-06, |
| "loss": 1.4524, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.41298917568692756, |
| "grad_norm": 0.37136499335096407, |
| "learning_rate": 7.893864013266998e-06, |
| "loss": 1.4495, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4163197335553705, |
| "grad_norm": 0.36965696298407785, |
| "learning_rate": 7.892004153686396e-06, |
| "loss": 1.454, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4196502914238135, |
| "grad_norm": 0.37625883797439813, |
| "learning_rate": 7.890137328339575e-06, |
| "loss": 1.4738, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.42298084929225643, |
| "grad_norm": 0.3891305395409707, |
| "learning_rate": 7.888263498019595e-06, |
| "loss": 1.4336, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.42631140716069943, |
| "grad_norm": 0.33836499033668194, |
| "learning_rate": 7.886382623224727e-06, |
| "loss": 1.4435, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.42964196502914237, |
| "grad_norm": 0.39084535016086686, |
| "learning_rate": 7.88449466415568e-06, |
| "loss": 1.4598, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.43297252289758537, |
| "grad_norm": 0.3896756879145717, |
| "learning_rate": 7.882599580712787e-06, |
| "loss": 1.5065, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4363030807660283, |
| "grad_norm": 0.4252827004151611, |
| "learning_rate": 7.880697332493174e-06, |
| "loss": 1.4083, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.43963363863447125, |
| "grad_norm": 0.3608365697753635, |
| "learning_rate": 7.878787878787878e-06, |
| "loss": 1.441, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.44296419650291424, |
| "grad_norm": 0.403123415092978, |
| "learning_rate": 7.876871178578958e-06, |
| "loss": 1.4627, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4462947543713572, |
| "grad_norm": 0.40013457143727, |
| "learning_rate": 7.874947190536545e-06, |
| "loss": 1.4955, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.4496253122398002, |
| "grad_norm": 0.3883976625001682, |
| "learning_rate": 7.873015873015873e-06, |
| "loss": 1.4298, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4529558701082431, |
| "grad_norm": 0.47893723454397114, |
| "learning_rate": 7.871077184054283e-06, |
| "loss": 1.4706, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.4562864279766861, |
| "grad_norm": 0.3939594731251799, |
| "learning_rate": 7.869131081368174e-06, |
| "loss": 1.4659, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.45961698584512906, |
| "grad_norm": 0.39872483940488357, |
| "learning_rate": 7.867177522349935e-06, |
| "loss": 1.4428, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.462947543713572, |
| "grad_norm": 0.41681968734219343, |
| "learning_rate": 7.865216464064831e-06, |
| "loss": 1.5116, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.466278101582015, |
| "grad_norm": 0.3950334535334994, |
| "learning_rate": 7.863247863247863e-06, |
| "loss": 1.4453, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.46960865945045793, |
| "grad_norm": 0.3569883912128034, |
| "learning_rate": 7.861271676300578e-06, |
| "loss": 1.462, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.47293921731890093, |
| "grad_norm": 0.3784473417547298, |
| "learning_rate": 7.85928785928786e-06, |
| "loss": 1.4961, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.47626977518734387, |
| "grad_norm": 0.35459480974078084, |
| "learning_rate": 7.857296367934665e-06, |
| "loss": 1.5362, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.47960033305578686, |
| "grad_norm": 0.3662426670901604, |
| "learning_rate": 7.85529715762274e-06, |
| "loss": 1.3832, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4829308909242298, |
| "grad_norm": 0.4066610425979986, |
| "learning_rate": 7.85329018338727e-06, |
| "loss": 1.4641, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.48626144879267275, |
| "grad_norm": 0.3545713986492447, |
| "learning_rate": 7.851275399913532e-06, |
| "loss": 1.4675, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.48959200666111574, |
| "grad_norm": 0.3664688735051096, |
| "learning_rate": 7.849252761533463e-06, |
| "loss": 1.4683, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4929225645295587, |
| "grad_norm": 0.3733605661751341, |
| "learning_rate": 7.847222222222221e-06, |
| "loss": 1.4315, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4962531223980017, |
| "grad_norm": 0.3380374141462393, |
| "learning_rate": 7.845183735594695e-06, |
| "loss": 1.4401, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.4995836802664446, |
| "grad_norm": 0.407518229964574, |
| "learning_rate": 7.84313725490196e-06, |
| "loss": 1.4437, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5029142381348876, |
| "grad_norm": 0.36142186690399497, |
| "learning_rate": 7.841082733027723e-06, |
| "loss": 1.4444, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.5062447960033306, |
| "grad_norm": 0.35245555484230673, |
| "learning_rate": 7.839020122484688e-06, |
| "loss": 1.4013, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5095753538717736, |
| "grad_norm": 0.3751518274944043, |
| "learning_rate": 7.836949375410913e-06, |
| "loss": 1.4325, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5129059117402165, |
| "grad_norm": 0.364299926744196, |
| "learning_rate": 7.834870443566096e-06, |
| "loss": 1.4757, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5162364696086594, |
| "grad_norm": 0.352709296353453, |
| "learning_rate": 7.832783278327833e-06, |
| "loss": 1.4405, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5195670274771024, |
| "grad_norm": 0.3595882748979197, |
| "learning_rate": 7.830687830687831e-06, |
| "loss": 1.5005, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.5228975853455454, |
| "grad_norm": 0.3663062717533196, |
| "learning_rate": 7.828584051248068e-06, |
| "loss": 1.4916, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5262281432139884, |
| "grad_norm": 0.39230898190550817, |
| "learning_rate": 7.82647189021691e-06, |
| "loss": 1.5925, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5295587010824313, |
| "grad_norm": 0.34764213510621217, |
| "learning_rate": 7.824351297405189e-06, |
| "loss": 1.533, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5328892589508742, |
| "grad_norm": 0.4356036173061448, |
| "learning_rate": 7.822222222222222e-06, |
| "loss": 1.4768, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5362198168193172, |
| "grad_norm": 0.3650633676087402, |
| "learning_rate": 7.820084613671788e-06, |
| "loss": 1.4834, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5395503746877602, |
| "grad_norm": 0.36003662026404476, |
| "learning_rate": 7.81793842034806e-06, |
| "loss": 1.4745, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5428809325562032, |
| "grad_norm": 0.45089712637002705, |
| "learning_rate": 7.815783590431477e-06, |
| "loss": 1.4762, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5462114904246461, |
| "grad_norm": 0.3727325869359898, |
| "learning_rate": 7.813620071684589e-06, |
| "loss": 1.4605, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5495420482930891, |
| "grad_norm": 0.3396845072209277, |
| "learning_rate": 7.81144781144781e-06, |
| "loss": 1.4793, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5528726061615321, |
| "grad_norm": 0.35005093334327886, |
| "learning_rate": 7.809266756635177e-06, |
| "loss": 1.4699, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.556203164029975, |
| "grad_norm": 0.3836826797224187, |
| "learning_rate": 7.807076853729998e-06, |
| "loss": 1.4727, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.559533721898418, |
| "grad_norm": 0.3866747204941054, |
| "learning_rate": 7.804878048780487e-06, |
| "loss": 1.4656, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5628642797668609, |
| "grad_norm": 0.3754060351606817, |
| "learning_rate": 7.802670287395338e-06, |
| "loss": 1.4427, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5661948376353039, |
| "grad_norm": 0.3560392764595894, |
| "learning_rate": 7.80045351473923e-06, |
| "loss": 1.469, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5695253955037469, |
| "grad_norm": 0.38308734497417124, |
| "learning_rate": 7.79822767552829e-06, |
| "loss": 1.5086, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5728559533721899, |
| "grad_norm": 0.359072776955667, |
| "learning_rate": 7.7959927140255e-06, |
| "loss": 1.4531, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5761865112406328, |
| "grad_norm": 0.3922686356985507, |
| "learning_rate": 7.793748574036049e-06, |
| "loss": 1.5004, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5795170691090757, |
| "grad_norm": 0.38139432931110967, |
| "learning_rate": 7.791495198902606e-06, |
| "loss": 1.4596, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5828476269775187, |
| "grad_norm": 0.386725308323352, |
| "learning_rate": 7.789232531500573e-06, |
| "loss": 1.4107, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5861781848459617, |
| "grad_norm": 0.3590860738790805, |
| "learning_rate": 7.786960514233242e-06, |
| "loss": 1.4748, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5895087427144047, |
| "grad_norm": 0.38618438471328675, |
| "learning_rate": 7.784679089026915e-06, |
| "loss": 1.481, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5928393005828476, |
| "grad_norm": 0.35271293932202913, |
| "learning_rate": 7.782388197325957e-06, |
| "loss": 1.4445, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5961698584512906, |
| "grad_norm": 0.3587339394337467, |
| "learning_rate": 7.78008778008778e-06, |
| "loss": 1.482, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5995004163197336, |
| "grad_norm": 0.4051854093619042, |
| "learning_rate": 7.777777777777777e-06, |
| "loss": 1.4538, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6028309741881765, |
| "grad_norm": 0.3624830177676393, |
| "learning_rate": 7.775458130364185e-06, |
| "loss": 1.3882, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.6061615320566195, |
| "grad_norm": 0.40327439887058536, |
| "learning_rate": 7.773128777312878e-06, |
| "loss": 1.4439, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.6094920899250624, |
| "grad_norm": 0.3849485884337272, |
| "learning_rate": 7.77078965758211e-06, |
| "loss": 1.4598, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.6128226477935054, |
| "grad_norm": 0.3800546336095655, |
| "learning_rate": 7.76844070961718e-06, |
| "loss": 1.5077, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.6161532056619484, |
| "grad_norm": 0.4058514640829756, |
| "learning_rate": 7.766081871345029e-06, |
| "loss": 1.4557, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6194837635303914, |
| "grad_norm": 0.3547116281420189, |
| "learning_rate": 7.763713080168775e-06, |
| "loss": 1.4465, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.6228143213988343, |
| "grad_norm": 0.36935786461716674, |
| "learning_rate": 7.76133427296218e-06, |
| "loss": 1.3674, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6261448792672772, |
| "grad_norm": 0.3465066682351456, |
| "learning_rate": 7.75894538606403e-06, |
| "loss": 1.5018, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6294754371357202, |
| "grad_norm": 0.37821929789921876, |
| "learning_rate": 7.75654635527247e-06, |
| "loss": 1.46, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6328059950041632, |
| "grad_norm": 0.42147975033129337, |
| "learning_rate": 7.754137115839244e-06, |
| "loss": 1.4324, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6361365528726062, |
| "grad_norm": 0.395112631651776, |
| "learning_rate": 7.751717602463872e-06, |
| "loss": 1.4682, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.6394671107410491, |
| "grad_norm": 0.3866087697502269, |
| "learning_rate": 7.749287749287749e-06, |
| "loss": 1.4845, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.6427976686094921, |
| "grad_norm": 0.39380953384339784, |
| "learning_rate": 7.746847489888173e-06, |
| "loss": 1.4628, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6461282264779351, |
| "grad_norm": 0.38499086799547566, |
| "learning_rate": 7.744396757272294e-06, |
| "loss": 1.4485, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.649458784346378, |
| "grad_norm": 0.3628021970554608, |
| "learning_rate": 7.741935483870966e-06, |
| "loss": 1.4306, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.652789342214821, |
| "grad_norm": 0.37886204626432507, |
| "learning_rate": 7.739463601532567e-06, |
| "loss": 1.4178, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6561199000832639, |
| "grad_norm": 0.36347566586862995, |
| "learning_rate": 7.736981041516678e-06, |
| "loss": 1.3917, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6594504579517069, |
| "grad_norm": 0.3808525608826558, |
| "learning_rate": 7.734487734487733e-06, |
| "loss": 1.425, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6627810158201499, |
| "grad_norm": 0.36703672958616185, |
| "learning_rate": 7.731983610508556e-06, |
| "loss": 1.3963, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6661115736885929, |
| "grad_norm": 0.3449284331155099, |
| "learning_rate": 7.729468599033817e-06, |
| "loss": 1.5389, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6694421315570358, |
| "grad_norm": 0.38098871722055255, |
| "learning_rate": 7.726942628903412e-06, |
| "loss": 1.4354, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6727726894254787, |
| "grad_norm": 0.37447535098026113, |
| "learning_rate": 7.72440562833576e-06, |
| "loss": 1.4238, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6761032472939217, |
| "grad_norm": 0.3815996192127943, |
| "learning_rate": 7.721857524920983e-06, |
| "loss": 1.4465, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6794338051623647, |
| "grad_norm": 0.43830167523580127, |
| "learning_rate": 7.719298245614036e-06, |
| "loss": 1.4464, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6827643630308077, |
| "grad_norm": 0.49374391843463344, |
| "learning_rate": 7.716727716727717e-06, |
| "loss": 1.4326, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6860949208992506, |
| "grad_norm": 0.40611516537871767, |
| "learning_rate": 7.714145863925599e-06, |
| "loss": 1.4867, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6894254787676936, |
| "grad_norm": 0.39306412548059455, |
| "learning_rate": 7.711552612214863e-06, |
| "loss": 1.4879, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6927560366361366, |
| "grad_norm": 0.3732547746311456, |
| "learning_rate": 7.708947885939036e-06, |
| "loss": 1.5305, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6960865945045795, |
| "grad_norm": 0.3749992070235647, |
| "learning_rate": 7.706331608770632e-06, |
| "loss": 1.4422, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6994171523730225, |
| "grad_norm": 0.4236632648954227, |
| "learning_rate": 7.703703703703702e-06, |
| "loss": 1.4362, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7027477102414654, |
| "grad_norm": 0.3799687473741569, |
| "learning_rate": 7.701064093046274e-06, |
| "loss": 1.512, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.7060782681099084, |
| "grad_norm": 0.3724271784543797, |
| "learning_rate": 7.698412698412699e-06, |
| "loss": 1.469, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.7094088259783514, |
| "grad_norm": 0.364477503994216, |
| "learning_rate": 7.695749440715883e-06, |
| "loss": 1.4811, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7127393838467944, |
| "grad_norm": 0.3925520005032744, |
| "learning_rate": 7.693074240159441e-06, |
| "loss": 1.5027, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.7160699417152373, |
| "grad_norm": 0.40921223587397654, |
| "learning_rate": 7.690387016229713e-06, |
| "loss": 1.488, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7194004995836802, |
| "grad_norm": 0.3981162315328969, |
| "learning_rate": 7.687687687687688e-06, |
| "loss": 1.4343, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7227310574521232, |
| "grad_norm": 0.35388766488814566, |
| "learning_rate": 7.684976172560823e-06, |
| "loss": 1.4599, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7260616153205662, |
| "grad_norm": 0.3449802535833205, |
| "learning_rate": 7.682252388134742e-06, |
| "loss": 1.442, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.7293921731890092, |
| "grad_norm": 0.34627676487411824, |
| "learning_rate": 7.679516250944822e-06, |
| "loss": 1.4461, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.7327227310574521, |
| "grad_norm": 0.35799089084524466, |
| "learning_rate": 7.676767676767677e-06, |
| "loss": 1.4731, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.736053288925895, |
| "grad_norm": 0.3820520257947768, |
| "learning_rate": 7.674006580612503e-06, |
| "loss": 1.4566, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.7393838467943381, |
| "grad_norm": 0.3641120307221186, |
| "learning_rate": 7.671232876712327e-06, |
| "loss": 1.4525, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.742714404662781, |
| "grad_norm": 0.37136269720782134, |
| "learning_rate": 7.668446478515128e-06, |
| "loss": 1.4548, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.746044962531224, |
| "grad_norm": 0.4138383130083843, |
| "learning_rate": 7.665647298674822e-06, |
| "loss": 1.5395, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.7493755203996669, |
| "grad_norm": 0.37512729325167443, |
| "learning_rate": 7.662835249042145e-06, |
| "loss": 1.4348, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7527060782681099, |
| "grad_norm": 0.3574220209010036, |
| "learning_rate": 7.660010240655401e-06, |
| "loss": 1.4205, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.7560366361365529, |
| "grad_norm": 0.3509015504877034, |
| "learning_rate": 7.657172183731076e-06, |
| "loss": 1.4074, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7593671940049959, |
| "grad_norm": 0.4191818637620366, |
| "learning_rate": 7.654320987654322e-06, |
| "loss": 1.434, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7626977518734388, |
| "grad_norm": 0.38073125720358314, |
| "learning_rate": 7.651456560969322e-06, |
| "loss": 1.4315, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.7660283097418817, |
| "grad_norm": 0.3489534004367162, |
| "learning_rate": 7.648578811369509e-06, |
| "loss": 1.4292, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7693588676103247, |
| "grad_norm": 0.39880199669766575, |
| "learning_rate": 7.645687645687645e-06, |
| "loss": 1.4797, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7726894254787677, |
| "grad_norm": 0.3377554646810836, |
| "learning_rate": 7.642782969885774e-06, |
| "loss": 1.3638, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7760199833472107, |
| "grad_norm": 0.45577113603344144, |
| "learning_rate": 7.639864689045015e-06, |
| "loss": 1.5272, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7793505412156536, |
| "grad_norm": 0.3872639106321951, |
| "learning_rate": 7.636932707355241e-06, |
| "loss": 1.5223, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7826810990840966, |
| "grad_norm": 0.41241615465906434, |
| "learning_rate": 7.633986928104575e-06, |
| "loss": 1.4047, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7860116569525396, |
| "grad_norm": 0.350902547985464, |
| "learning_rate": 7.631027253668762e-06, |
| "loss": 1.4599, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7893422148209825, |
| "grad_norm": 0.36780129033305325, |
| "learning_rate": 7.6280535855003936e-06, |
| "loss": 1.4872, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7926727726894255, |
| "grad_norm": 0.3504301681190647, |
| "learning_rate": 7.625065824117956e-06, |
| "loss": 1.4508, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7960033305578684, |
| "grad_norm": 0.425786005279154, |
| "learning_rate": 7.622063869094748e-06, |
| "loss": 1.5359, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.7993338884263114, |
| "grad_norm": 0.3423914333711706, |
| "learning_rate": 7.619047619047619e-06, |
| "loss": 1.5116, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8026644462947544, |
| "grad_norm": 0.39752748882813016, |
| "learning_rate": 7.616016971625564e-06, |
| "loss": 1.3967, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.8059950041631974, |
| "grad_norm": 0.35349720101513005, |
| "learning_rate": 7.61297182349814e-06, |
| "loss": 1.428, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.8093255620316403, |
| "grad_norm": 0.3592529486243108, |
| "learning_rate": 7.609912070343725e-06, |
| "loss": 1.4716, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.8126561199000832, |
| "grad_norm": 0.41007914987868593, |
| "learning_rate": 7.606837606837607e-06, |
| "loss": 1.4601, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8159866777685262, |
| "grad_norm": 0.4368820717106569, |
| "learning_rate": 7.603748326639893e-06, |
| "loss": 1.4299, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8193172356369692, |
| "grad_norm": 0.34781376516299506, |
| "learning_rate": 7.600644122383253e-06, |
| "loss": 1.3832, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.8226477935054122, |
| "grad_norm": 0.4378928638690168, |
| "learning_rate": 7.597524885660478e-06, |
| "loss": 1.5006, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.8259783513738551, |
| "grad_norm": 0.38866511125189074, |
| "learning_rate": 7.594390507011865e-06, |
| "loss": 1.3808, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.829308909242298, |
| "grad_norm": 0.3796151796802332, |
| "learning_rate": 7.591240875912408e-06, |
| "loss": 1.4048, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.832639467110741, |
| "grad_norm": 0.47512939093169254, |
| "learning_rate": 7.588075880758807e-06, |
| "loss": 1.4533, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.835970024979184, |
| "grad_norm": 0.4002177494781384, |
| "learning_rate": 7.584895408856289e-06, |
| "loss": 1.4364, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.839300582847627, |
| "grad_norm": 0.41628446885968545, |
| "learning_rate": 7.581699346405228e-06, |
| "loss": 1.5213, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.8426311407160699, |
| "grad_norm": 0.41586597700526384, |
| "learning_rate": 7.578487578487578e-06, |
| "loss": 1.4163, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.8459616985845129, |
| "grad_norm": 0.37080164887555395, |
| "learning_rate": 7.575259989053093e-06, |
| "loss": 1.4262, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.8492922564529559, |
| "grad_norm": 0.44276862899193814, |
| "learning_rate": 7.57201646090535e-06, |
| "loss": 1.4434, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8526228143213989, |
| "grad_norm": 0.3565514945143501, |
| "learning_rate": 7.568756875687569e-06, |
| "loss": 1.4628, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8559533721898418, |
| "grad_norm": 0.3424453222650746, |
| "learning_rate": 7.565481113868211e-06, |
| "loss": 1.4397, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.8592839300582847, |
| "grad_norm": 0.36361177745212486, |
| "learning_rate": 7.562189054726368e-06, |
| "loss": 1.434, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8626144879267277, |
| "grad_norm": 0.39961369778575284, |
| "learning_rate": 7.558880576336936e-06, |
| "loss": 1.3263, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.8659450457951707, |
| "grad_norm": 0.3694683835624918, |
| "learning_rate": 7.555555555555556e-06, |
| "loss": 1.465, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8692756036636137, |
| "grad_norm": 0.38926907075141, |
| "learning_rate": 7.552213868003341e-06, |
| "loss": 1.4639, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.8726061615320566, |
| "grad_norm": 0.41002402289266, |
| "learning_rate": 7.548855388051367e-06, |
| "loss": 1.4583, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8759367194004996, |
| "grad_norm": 0.39476689396263037, |
| "learning_rate": 7.545479988804925e-06, |
| "loss": 1.5369, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8792672772689425, |
| "grad_norm": 0.42338165790994337, |
| "learning_rate": 7.542087542087541e-06, |
| "loss": 1.4149, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.8825978351373855, |
| "grad_norm": 0.37580056414171503, |
| "learning_rate": 7.538677918424753e-06, |
| "loss": 1.4767, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8859283930058285, |
| "grad_norm": 0.35524395112624974, |
| "learning_rate": 7.535250987027637e-06, |
| "loss": 1.4565, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8892589508742714, |
| "grad_norm": 0.333001020301385, |
| "learning_rate": 7.531806615776081e-06, |
| "loss": 1.4653, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8925895087427144, |
| "grad_norm": 0.325740334034441, |
| "learning_rate": 7.5283446712018136e-06, |
| "loss": 1.4583, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8959200666111574, |
| "grad_norm": 0.3579186582787629, |
| "learning_rate": 7.524865018471157e-06, |
| "loss": 1.441, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.8992506244796004, |
| "grad_norm": 0.38100337783570354, |
| "learning_rate": 7.521367521367521e-06, |
| "loss": 1.4725, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9025811823480433, |
| "grad_norm": 0.4439224251441086, |
| "learning_rate": 7.5178520422736365e-06, |
| "loss": 1.4433, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.9059117402164862, |
| "grad_norm": 0.36404138775247186, |
| "learning_rate": 7.514318442153494e-06, |
| "loss": 1.4502, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.9092422980849292, |
| "grad_norm": 0.3739016590981095, |
| "learning_rate": 7.5107665805340226e-06, |
| "loss": 1.5614, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.9125728559533722, |
| "grad_norm": 0.34618213523089303, |
| "learning_rate": 7.5071963154864715e-06, |
| "loss": 1.4818, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.9159034138218152, |
| "grad_norm": 0.3516565079552471, |
| "learning_rate": 7.5036075036075024e-06, |
| "loss": 1.4811, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9192339716902581, |
| "grad_norm": 0.372847709765313, |
| "learning_rate": 7.499999999999999e-06, |
| "loss": 1.4314, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.922564529558701, |
| "grad_norm": 0.33633722585110437, |
| "learning_rate": 7.496373658253553e-06, |
| "loss": 1.467, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.925895087427144, |
| "grad_norm": 0.3502677047499933, |
| "learning_rate": 7.4927283304246645e-06, |
| "loss": 1.465, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.929225645295587, |
| "grad_norm": 0.3301559549021256, |
| "learning_rate": 7.4890638670166225e-06, |
| "loss": 1.4345, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.93255620316403, |
| "grad_norm": 0.3995138440783666, |
| "learning_rate": 7.485380116959064e-06, |
| "loss": 1.3383, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9358867610324729, |
| "grad_norm": 0.3871081436839965, |
| "learning_rate": 7.481676927587217e-06, |
| "loss": 1.4796, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.9392173189009159, |
| "grad_norm": 0.4376413636164512, |
| "learning_rate": 7.4779541446208115e-06, |
| "loss": 1.4644, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9425478767693589, |
| "grad_norm": 0.43206377875237645, |
| "learning_rate": 7.474211612142647e-06, |
| "loss": 1.4107, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.9458784346378019, |
| "grad_norm": 0.4025303715871277, |
| "learning_rate": 7.470449172576832e-06, |
| "loss": 1.4318, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.9492089925062448, |
| "grad_norm": 0.37724819780360036, |
| "learning_rate": 7.466666666666667e-06, |
| "loss": 1.4454, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9525395503746877, |
| "grad_norm": 0.35328542805788227, |
| "learning_rate": 7.462863933452169e-06, |
| "loss": 1.4175, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.9558701082431307, |
| "grad_norm": 0.3788093515621439, |
| "learning_rate": 7.459040810247245e-06, |
| "loss": 1.4429, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.9592006661115737, |
| "grad_norm": 0.4018461850957888, |
| "learning_rate": 7.455197132616486e-06, |
| "loss": 1.4679, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.9625312239800167, |
| "grad_norm": 0.3792573314031364, |
| "learning_rate": 7.451332734351601e-06, |
| "loss": 1.5191, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.9658617818484596, |
| "grad_norm": 0.4173737668171256, |
| "learning_rate": 7.447447447447447e-06, |
| "loss": 1.4999, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9691923397169026, |
| "grad_norm": 0.34073643176316165, |
| "learning_rate": 7.443541102077687e-06, |
| "loss": 1.3667, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.9725228975853455, |
| "grad_norm": 0.37255255694817807, |
| "learning_rate": 7.439613526570048e-06, |
| "loss": 1.4196, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.9758534554537885, |
| "grad_norm": 0.3751657637349412, |
| "learning_rate": 7.435664547381168e-06, |
| "loss": 1.4545, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.9791840133222315, |
| "grad_norm": 0.45200307278108437, |
| "learning_rate": 7.431693989071039e-06, |
| "loss": 1.4199, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9825145711906744, |
| "grad_norm": 0.3889800375255201, |
| "learning_rate": 7.427701674277017e-06, |
| "loss": 1.4801, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9858451290591174, |
| "grad_norm": 0.3473904572951369, |
| "learning_rate": 7.4236874236874235e-06, |
| "loss": 1.393, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.9891756869275604, |
| "grad_norm": 0.3581161377664693, |
| "learning_rate": 7.419651056014692e-06, |
| "loss": 1.4073, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9925062447960034, |
| "grad_norm": 0.35620326479274533, |
| "learning_rate": 7.415592387968079e-06, |
| "loss": 1.43, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.9958368026644463, |
| "grad_norm": 0.3676482591149261, |
| "learning_rate": 7.4115112342259155e-06, |
| "loss": 1.4148, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.9991673605328892, |
| "grad_norm": 0.43615077194471996, |
| "learning_rate": 7.407407407407408e-06, |
| "loss": 1.4154, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5905255990041776, |
| "learning_rate": 7.403280718043948e-06, |
| "loss": 1.4231, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.003330557868443, |
| "grad_norm": 0.4020383385971024, |
| "learning_rate": 7.399130974549968e-06, |
| "loss": 1.4394, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.0066611157368859, |
| "grad_norm": 0.3865309278317666, |
| "learning_rate": 7.394957983193276e-06, |
| "loss": 1.4413, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.009991673605329, |
| "grad_norm": 0.40327921143010825, |
| "learning_rate": 7.390761548064918e-06, |
| "loss": 1.4576, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.0133222314737718, |
| "grad_norm": 0.4070700680157323, |
| "learning_rate": 7.386541471048513e-06, |
| "loss": 1.4817, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.0166527893422148, |
| "grad_norm": 0.43460108634631706, |
| "learning_rate": 7.382297551789077e-06, |
| "loss": 1.3939, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.0199833472106579, |
| "grad_norm": 0.42277608172713, |
| "learning_rate": 7.378029587661315e-06, |
| "loss": 1.4239, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.0233139050791007, |
| "grad_norm": 0.37583795807106635, |
| "learning_rate": 7.373737373737374e-06, |
| "loss": 1.4878, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.0266444629475437, |
| "grad_norm": 0.4013147771199415, |
| "learning_rate": 7.3694207027540355e-06, |
| "loss": 1.3758, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.0299750208159866, |
| "grad_norm": 0.38150977748656323, |
| "learning_rate": 7.365079365079365e-06, |
| "loss": 1.4122, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.0333055786844296, |
| "grad_norm": 0.36288526161353013, |
| "learning_rate": 7.360713148678764e-06, |
| "loss": 1.4775, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.0366361365528727, |
| "grad_norm": 0.5122154405495047, |
| "learning_rate": 7.35632183908046e-06, |
| "loss": 1.4385, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.0399666944213155, |
| "grad_norm": 0.4907680124574417, |
| "learning_rate": 7.351905219340377e-06, |
| "loss": 1.4321, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.0432972522897586, |
| "grad_norm": 0.3750039319171418, |
| "learning_rate": 7.347463070006422e-06, |
| "loss": 1.4609, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.0466278101582014, |
| "grad_norm": 0.415847010986813, |
| "learning_rate": 7.342995169082125e-06, |
| "loss": 1.399, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.0499583680266444, |
| "grad_norm": 0.40484373034787197, |
| "learning_rate": 7.338501291989663e-06, |
| "loss": 1.4082, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.0532889258950875, |
| "grad_norm": 0.3556695114896482, |
| "learning_rate": 7.333981211532231e-06, |
| "loss": 1.5045, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.0566194837635303, |
| "grad_norm": 0.4217178250762373, |
| "learning_rate": 7.329434697855749e-06, |
| "loss": 1.5051, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.0599500416319734, |
| "grad_norm": 0.44541116731095065, |
| "learning_rate": 7.324861518409905e-06, |
| "loss": 1.454, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.0632805995004164, |
| "grad_norm": 0.3722519430085194, |
| "learning_rate": 7.320261437908496e-06, |
| "loss": 1.4864, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0666111573688593, |
| "grad_norm": 0.49955941789670055, |
| "learning_rate": 7.315634218289086e-06, |
| "loss": 1.4328, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.0699417152373023, |
| "grad_norm": 0.559077472675475, |
| "learning_rate": 7.310979618671926e-06, |
| "loss": 1.4387, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.0732722731057451, |
| "grad_norm": 0.38492000673298576, |
| "learning_rate": 7.306297395318167e-06, |
| "loss": 1.4173, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.0766028309741882, |
| "grad_norm": 0.46264263086480695, |
| "learning_rate": 7.301587301587301e-06, |
| "loss": 1.458, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.0799333888426312, |
| "grad_norm": 0.48393689092527553, |
| "learning_rate": 7.296849087893865e-06, |
| "loss": 1.4984, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.083263946711074, |
| "grad_norm": 0.3833552546352091, |
| "learning_rate": 7.29208250166334e-06, |
| "loss": 1.4801, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.0865945045795171, |
| "grad_norm": 0.45636346067253053, |
| "learning_rate": 7.287287287287286e-06, |
| "loss": 1.4335, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.08992506244796, |
| "grad_norm": 0.5170759134234261, |
| "learning_rate": 7.282463186077643e-06, |
| "loss": 1.4619, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.093255620316403, |
| "grad_norm": 0.41413948804668765, |
| "learning_rate": 7.277609936220207e-06, |
| "loss": 1.4976, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.096586178184846, |
| "grad_norm": 0.47675818666743885, |
| "learning_rate": 7.272727272727272e-06, |
| "loss": 1.5166, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0999167360532889, |
| "grad_norm": 0.4446284191251516, |
| "learning_rate": 7.267814927389396e-06, |
| "loss": 1.3756, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.103247293921732, |
| "grad_norm": 0.3712448236233366, |
| "learning_rate": 7.262872628726287e-06, |
| "loss": 1.4177, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.1065778517901748, |
| "grad_norm": 0.354780077610888, |
| "learning_rate": 7.257900101936799e-06, |
| "loss": 1.3888, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.1099084096586178, |
| "grad_norm": 0.4287556354375581, |
| "learning_rate": 7.252897068847988e-06, |
| "loss": 1.463, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.1132389675270609, |
| "grad_norm": 0.43855718184558823, |
| "learning_rate": 7.247863247863247e-06, |
| "loss": 1.4375, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.1165695253955037, |
| "grad_norm": 0.36652449074881177, |
| "learning_rate": 7.242798353909463e-06, |
| "loss": 1.4724, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.1199000832639467, |
| "grad_norm": 0.41471476618444547, |
| "learning_rate": 7.237702098383213e-06, |
| "loss": 1.4368, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.1232306411323896, |
| "grad_norm": 0.3584246686612814, |
| "learning_rate": 7.2325741890959285e-06, |
| "loss": 1.507, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.1265611990008326, |
| "grad_norm": 0.35472951006324893, |
| "learning_rate": 7.227414330218068e-06, |
| "loss": 1.3847, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.1298917568692757, |
| "grad_norm": 0.40770232084467445, |
| "learning_rate": 7.222222222222222e-06, |
| "loss": 1.4722, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.1332223147377185, |
| "grad_norm": 0.3854760192656062, |
| "learning_rate": 7.216997561825147e-06, |
| "loss": 1.4397, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.1365528726061616, |
| "grad_norm": 0.3425435570180868, |
| "learning_rate": 7.211740041928721e-06, |
| "loss": 1.3917, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.1398834304746046, |
| "grad_norm": 0.3629363871231361, |
| "learning_rate": 7.206449351559762e-06, |
| "loss": 1.4329, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.1432139883430474, |
| "grad_norm": 0.3746351865474382, |
| "learning_rate": 7.20112517580872e-06, |
| "loss": 1.4325, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.1465445462114905, |
| "grad_norm": 0.35633065876642767, |
| "learning_rate": 7.195767195767195e-06, |
| "loss": 1.4802, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.1498751040799333, |
| "grad_norm": 0.41086591430313346, |
| "learning_rate": 7.1903750884642605e-06, |
| "loss": 1.386, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.1532056619483764, |
| "grad_norm": 0.4248601636564269, |
| "learning_rate": 7.184948526801562e-06, |
| "loss": 1.3764, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.1565362198168194, |
| "grad_norm": 0.3677689809276377, |
| "learning_rate": 7.179487179487179e-06, |
| "loss": 1.3977, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.1598667776852623, |
| "grad_norm": 0.4562607243713519, |
| "learning_rate": 7.173990710968203e-06, |
| "loss": 1.4743, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.1631973355537053, |
| "grad_norm": 0.36851546433374166, |
| "learning_rate": 7.168458781362006e-06, |
| "loss": 1.3998, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.1665278934221481, |
| "grad_norm": 0.3440172529786023, |
| "learning_rate": 7.1628910463861915e-06, |
| "loss": 1.4388, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.1698584512905912, |
| "grad_norm": 0.34899417175359176, |
| "learning_rate": 7.157287157287158e-06, |
| "loss": 1.4109, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.1731890091590342, |
| "grad_norm": 0.3369095274891404, |
| "learning_rate": 7.151646760767281e-06, |
| "loss": 1.4721, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.176519567027477, |
| "grad_norm": 0.3373083746918916, |
| "learning_rate": 7.145969498910675e-06, |
| "loss": 1.3879, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.1798501248959201, |
| "grad_norm": 0.3127699546260214, |
| "learning_rate": 7.140255009107467e-06, |
| "loss": 1.4067, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.183180682764363, |
| "grad_norm": 0.3455714541263257, |
| "learning_rate": 7.1345029239766076e-06, |
| "loss": 1.4729, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.186511240632806, |
| "grad_norm": 0.35459135197814406, |
| "learning_rate": 7.128712871287129e-06, |
| "loss": 1.4845, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.189841798501249, |
| "grad_norm": 0.3382392537839561, |
| "learning_rate": 7.122884473877851e-06, |
| "loss": 1.4796, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.1931723563696919, |
| "grad_norm": 0.3229617810865785, |
| "learning_rate": 7.117017349575488e-06, |
| "loss": 1.4258, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.196502914238135, |
| "grad_norm": 0.4132878845320615, |
| "learning_rate": 7.11111111111111e-06, |
| "loss": 1.4344, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1998334721065778, |
| "grad_norm": 0.3909252234186588, |
| "learning_rate": 7.105165366034932e-06, |
| "loss": 1.4136, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.2031640299750208, |
| "grad_norm": 0.4166542946239009, |
| "learning_rate": 7.0991797166293805e-06, |
| "loss": 1.418, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.2064945878434639, |
| "grad_norm": 0.3978265092622875, |
| "learning_rate": 7.093153759820426e-06, |
| "loss": 1.4778, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.2098251457119067, |
| "grad_norm": 0.3362476483926624, |
| "learning_rate": 7.087087087087086e-06, |
| "loss": 1.4045, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.2131557035803497, |
| "grad_norm": 0.34227618124914144, |
| "learning_rate": 7.0809792843691135e-06, |
| "loss": 1.3691, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.2164862614487926, |
| "grad_norm": 0.4154500404546309, |
| "learning_rate": 7.074829931972789e-06, |
| "loss": 1.4296, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.2198168193172356, |
| "grad_norm": 0.3910386371341375, |
| "learning_rate": 7.068638604474782e-06, |
| "loss": 1.4128, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.2231473771856787, |
| "grad_norm": 0.32973016037230485, |
| "learning_rate": 7.062404870624048e-06, |
| "loss": 1.3952, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.2264779350541215, |
| "grad_norm": 0.3476414929125133, |
| "learning_rate": 7.056128293241695e-06, |
| "loss": 1.4132, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.2298084929225646, |
| "grad_norm": 0.3490075847160727, |
| "learning_rate": 7.0498084291187725e-06, |
| "loss": 1.5034, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.2331390507910074, |
| "grad_norm": 0.4213209017684047, |
| "learning_rate": 7.043444828911956e-06, |
| "loss": 1.4407, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.2364696086594504, |
| "grad_norm": 0.41156726116014214, |
| "learning_rate": 7.037037037037037e-06, |
| "loss": 1.4922, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.2398001665278935, |
| "grad_norm": 0.3274736563867899, |
| "learning_rate": 7.0305845915602e-06, |
| "loss": 1.4443, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.2431307243963363, |
| "grad_norm": 0.4016877039684572, |
| "learning_rate": 7.024087024087023e-06, |
| "loss": 1.4765, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.2464612822647794, |
| "grad_norm": 0.37926187648963133, |
| "learning_rate": 7.017543859649123e-06, |
| "loss": 1.4944, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.2497918401332222, |
| "grad_norm": 0.3995775555374175, |
| "learning_rate": 7.0109546165884185e-06, |
| "loss": 1.4737, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.2531223980016652, |
| "grad_norm": 0.4179330927454956, |
| "learning_rate": 7.0043188064389475e-06, |
| "loss": 1.396, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.2564529558701083, |
| "grad_norm": 0.4026676583822718, |
| "learning_rate": 6.997635933806146e-06, |
| "loss": 1.5024, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.2597835137385511, |
| "grad_norm": 0.3729935293489866, |
| "learning_rate": 6.9909054962435735e-06, |
| "loss": 1.5035, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.2631140716069942, |
| "grad_norm": 0.37785861617292904, |
| "learning_rate": 6.984126984126983e-06, |
| "loss": 1.4859, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.266444629475437, |
| "grad_norm": 0.34618072727066834, |
| "learning_rate": 6.977299880525687e-06, |
| "loss": 1.3753, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.26977518734388, |
| "grad_norm": 0.3603657688818211, |
| "learning_rate": 6.970423661071143e-06, |
| "loss": 1.4396, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.2731057452123231, |
| "grad_norm": 0.31695078316874364, |
| "learning_rate": 6.963497793822704e-06, |
| "loss": 1.4512, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.2764363030807662, |
| "grad_norm": 0.3998834526312468, |
| "learning_rate": 6.956521739130433e-06, |
| "loss": 1.4068, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.279766860949209, |
| "grad_norm": 0.40218592316674945, |
| "learning_rate": 6.949494949494949e-06, |
| "loss": 1.4314, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.2830974188176518, |
| "grad_norm": 0.4377216092057675, |
| "learning_rate": 6.942416869424169e-06, |
| "loss": 1.4159, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.2864279766860949, |
| "grad_norm": 0.3806613338175727, |
| "learning_rate": 6.935286935286935e-06, |
| "loss": 1.4383, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.289758534554538, |
| "grad_norm": 0.41315217581288083, |
| "learning_rate": 6.928104575163398e-06, |
| "loss": 1.4639, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.293089092422981, |
| "grad_norm": 0.4242068360276873, |
| "learning_rate": 6.920869208692086e-06, |
| "loss": 1.5043, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.2964196502914238, |
| "grad_norm": 0.40526133848179174, |
| "learning_rate": 6.913580246913578e-06, |
| "loss": 1.4969, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.2997502081598669, |
| "grad_norm": 0.4390648977103527, |
| "learning_rate": 6.9062370921106965e-06, |
| "loss": 1.4634, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.3030807660283097, |
| "grad_norm": 0.3293053257002483, |
| "learning_rate": 6.898839137645108e-06, |
| "loss": 1.4837, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.3064113238967527, |
| "grad_norm": 0.3741205703169676, |
| "learning_rate": 6.891385767790261e-06, |
| "loss": 1.3888, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.3097418817651958, |
| "grad_norm": 0.36736277922290345, |
| "learning_rate": 6.883876357560567e-06, |
| "loss": 1.4422, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.3130724396336386, |
| "grad_norm": 0.34987451065304387, |
| "learning_rate": 6.876310272536688e-06, |
| "loss": 1.4384, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.3164029975020817, |
| "grad_norm": 0.3574591374681954, |
| "learning_rate": 6.868686868686868e-06, |
| "loss": 1.4453, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.3197335553705245, |
| "grad_norm": 0.31108139602911883, |
| "learning_rate": 6.861005492184199e-06, |
| "loss": 1.4302, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.3230641132389676, |
| "grad_norm": 0.3317920901111113, |
| "learning_rate": 6.853265479219677e-06, |
| "loss": 1.4599, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.3263946711074106, |
| "grad_norm": 0.3319586529185681, |
| "learning_rate": 6.8454661558109825e-06, |
| "loss": 1.4349, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.3297252289758534, |
| "grad_norm": 0.35385561486286676, |
| "learning_rate": 6.837606837606837e-06, |
| "loss": 1.4262, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.3330557868442965, |
| "grad_norm": 0.36123629997437273, |
| "learning_rate": 6.82968682968683e-06, |
| "loss": 1.4475, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.3363863447127393, |
| "grad_norm": 0.3678679588945442, |
| "learning_rate": 6.821705426356589e-06, |
| "loss": 1.4662, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.3397169025811824, |
| "grad_norm": 0.35623277676543963, |
| "learning_rate": 6.813661910938175e-06, |
| "loss": 1.4157, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.3430474604496254, |
| "grad_norm": 0.3670378130601921, |
| "learning_rate": 6.805555555555554e-06, |
| "loss": 1.4289, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.3463780183180682, |
| "grad_norm": 0.37365353793241013, |
| "learning_rate": 6.797385620915031e-06, |
| "loss": 1.4758, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.3497085761865113, |
| "grad_norm": 0.4227767618895852, |
| "learning_rate": 6.78915135608049e-06, |
| "loss": 1.4522, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.3530391340549541, |
| "grad_norm": 0.392419028331304, |
| "learning_rate": 6.780851998243303e-06, |
| "loss": 1.4386, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.3563696919233972, |
| "grad_norm": 0.34332622194519336, |
| "learning_rate": 6.772486772486772e-06, |
| "loss": 1.4143, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.3597002497918402, |
| "grad_norm": 0.39554534288670906, |
| "learning_rate": 6.76405489154493e-06, |
| "loss": 1.4289, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.363030807660283, |
| "grad_norm": 0.3680781980427255, |
| "learning_rate": 6.7555555555555545e-06, |
| "loss": 1.4604, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.3663613655287261, |
| "grad_norm": 0.359696703224119, |
| "learning_rate": 6.7469879518072274e-06, |
| "loss": 1.4552, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.369691923397169, |
| "grad_norm": 0.43946142988468057, |
| "learning_rate": 6.738351254480287e-06, |
| "loss": 1.4649, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.373022481265612, |
| "grad_norm": 0.38158471914984216, |
| "learning_rate": 6.729644624381466e-06, |
| "loss": 1.4553, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.376353039134055, |
| "grad_norm": 0.3785264160376015, |
| "learning_rate": 6.720867208672086e-06, |
| "loss": 1.457, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.3796835970024979, |
| "grad_norm": 0.40275915279313634, |
| "learning_rate": 6.712018140589569e-06, |
| "loss": 1.4665, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.383014154870941, |
| "grad_norm": 0.37268382461278277, |
| "learning_rate": 6.703096539162113e-06, |
| "loss": 1.377, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.3863447127393838, |
| "grad_norm": 0.3362832443073036, |
| "learning_rate": 6.694101508916324e-06, |
| "loss": 1.4122, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.3896752706078268, |
| "grad_norm": 0.3428291854645596, |
| "learning_rate": 6.6850321395775945e-06, |
| "loss": 1.3466, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.3930058284762699, |
| "grad_norm": 0.38976496538071015, |
| "learning_rate": 6.675887505763023e-06, |
| "loss": 1.4711, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.3963363863447127, |
| "grad_norm": 0.4034367524201395, |
| "learning_rate": 6.666666666666666e-06, |
| "loss": 1.5079, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.3996669442131557, |
| "grad_norm": 0.38251655422807695, |
| "learning_rate": 6.657368665736867e-06, |
| "loss": 1.4715, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.4029975020815986, |
| "grad_norm": 0.3636615753904805, |
| "learning_rate": 6.647992530345471e-06, |
| "loss": 1.4175, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.4063280599500416, |
| "grad_norm": 0.3543871236347375, |
| "learning_rate": 6.6385372714486634e-06, |
| "loss": 1.4008, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.4096586178184847, |
| "grad_norm": 0.4270397824248548, |
| "learning_rate": 6.6290018832391705e-06, |
| "loss": 1.4082, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.4129891756869275, |
| "grad_norm": 0.41956585580281563, |
| "learning_rate": 6.6193853427895966e-06, |
| "loss": 1.4075, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.4163197335553706, |
| "grad_norm": 0.47761484099497725, |
| "learning_rate": 6.60968660968661e-06, |
| "loss": 1.4104, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.4196502914238134, |
| "grad_norm": 0.4078261265233408, |
| "learning_rate": 6.599904625655699e-06, |
| "loss": 1.4317, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.4229808492922564, |
| "grad_norm": 0.37691692681004796, |
| "learning_rate": 6.590038314176245e-06, |
| "loss": 1.391, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.4263114071606995, |
| "grad_norm": 0.3288508827565593, |
| "learning_rate": 6.580086580086579e-06, |
| "loss": 1.401, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.4296419650291423, |
| "grad_norm": 0.338319616372442, |
| "learning_rate": 6.570048309178745e-06, |
| "loss": 1.4178, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.4329725228975854, |
| "grad_norm": 0.414862604672987, |
| "learning_rate": 6.559922367782628e-06, |
| "loss": 1.4642, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.4363030807660282, |
| "grad_norm": 0.3949017633125201, |
| "learning_rate": 6.54970760233918e-06, |
| "loss": 1.3643, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.4396336386344712, |
| "grad_norm": 0.3872663647349424, |
| "learning_rate": 6.53940283896231e-06, |
| "loss": 1.3998, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.4429641965029143, |
| "grad_norm": 0.3778182716944692, |
| "learning_rate": 6.529006882989183e-06, |
| "loss": 1.421, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.4462947543713571, |
| "grad_norm": 0.3368637084806252, |
| "learning_rate": 6.518518518518519e-06, |
| "loss": 1.4562, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.4496253122398002, |
| "grad_norm": 0.37088068849156625, |
| "learning_rate": 6.507936507936509e-06, |
| "loss": 1.389, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.452955870108243, |
| "grad_norm": 0.4171977510324979, |
| "learning_rate": 6.497259591429994e-06, |
| "loss": 1.4314, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.456286427976686, |
| "grad_norm": 0.36493233792748947, |
| "learning_rate": 6.486486486486486e-06, |
| "loss": 1.4239, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.4596169858451291, |
| "grad_norm": 0.36409025362836434, |
| "learning_rate": 6.475615887380592e-06, |
| "loss": 1.4011, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.462947543713572, |
| "grad_norm": 0.3432633374051585, |
| "learning_rate": 6.464646464646463e-06, |
| "loss": 1.4706, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.466278101582015, |
| "grad_norm": 0.36918146681400343, |
| "learning_rate": 6.453576864535769e-06, |
| "loss": 1.4048, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.4696086594504578, |
| "grad_norm": 0.3558974109435063, |
| "learning_rate": 6.442405708460755e-06, |
| "loss": 1.4233, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.4729392173189009, |
| "grad_norm": 0.3319935101093491, |
| "learning_rate": 6.431131592421914e-06, |
| "loss": 1.4557, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.476269775187344, |
| "grad_norm": 0.3957835276431251, |
| "learning_rate": 6.419753086419752e-06, |
| "loss": 1.4974, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.479600333055787, |
| "grad_norm": 0.46743126820019115, |
| "learning_rate": 6.408268733850127e-06, |
| "loss": 1.3428, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.4829308909242298, |
| "grad_norm": 0.32072751511352704, |
| "learning_rate": 6.396677050882658e-06, |
| "loss": 1.4252, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.4862614487926726, |
| "grad_norm": 0.3691624108782593, |
| "learning_rate": 6.384976525821596e-06, |
| "loss": 1.4288, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.4895920066611157, |
| "grad_norm": 0.41832466518878647, |
| "learning_rate": 6.373165618448636e-06, |
| "loss": 1.4287, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.4929225645295587, |
| "grad_norm": 0.3728200914294547, |
| "learning_rate": 6.361242759347024e-06, |
| "loss": 1.391, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.4962531223980018, |
| "grad_norm": 0.3489172461380398, |
| "learning_rate": 6.349206349206349e-06, |
| "loss": 1.4012, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.4995836802664446, |
| "grad_norm": 0.45831242097179337, |
| "learning_rate": 6.337054758107389e-06, |
| "loss": 1.4062, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.5029142381348874, |
| "grad_norm": 0.4485083988308969, |
| "learning_rate": 6.324786324786324e-06, |
| "loss": 1.4077, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.5062447960033305, |
| "grad_norm": 0.3469124587165823, |
| "learning_rate": 6.312399355877616e-06, |
| "loss": 1.3635, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.5095753538717736, |
| "grad_norm": 0.4359834643536742, |
| "learning_rate": 6.299892125134842e-06, |
| "loss": 1.3951, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.5129059117402166, |
| "grad_norm": 0.43347338145656295, |
| "learning_rate": 6.287262872628726e-06, |
| "loss": 1.438, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.5162364696086594, |
| "grad_norm": 0.3544519721589859, |
| "learning_rate": 6.274509803921569e-06, |
| "loss": 1.4028, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.5195670274771023, |
| "grad_norm": 0.4175623558211923, |
| "learning_rate": 6.261631089217296e-06, |
| "loss": 1.4649, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.5228975853455453, |
| "grad_norm": 0.47794327593006264, |
| "learning_rate": 6.248624862486248e-06, |
| "loss": 1.4552, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.5262281432139884, |
| "grad_norm": 0.5102221497723193, |
| "learning_rate": 6.235489220563847e-06, |
| "loss": 1.5577, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.5295587010824314, |
| "grad_norm": 0.361727454686882, |
| "learning_rate": 6.2222222222222215e-06, |
| "loss": 1.4977, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.5328892589508742, |
| "grad_norm": 0.43568797487755334, |
| "learning_rate": 6.208821887213847e-06, |
| "loss": 1.4417, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.536219816819317, |
| "grad_norm": 0.39795557103291623, |
| "learning_rate": 6.195286195286195e-06, |
| "loss": 1.4479, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.5395503746877601, |
| "grad_norm": 0.3699426752838303, |
| "learning_rate": 6.181613085166384e-06, |
| "loss": 1.4379, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.5428809325562032, |
| "grad_norm": 0.5138765482501748, |
| "learning_rate": 6.167800453514738e-06, |
| "loss": 1.4433, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.5462114904246462, |
| "grad_norm": 0.5597671339637968, |
| "learning_rate": 6.153846153846153e-06, |
| "loss": 1.4255, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.549542048293089, |
| "grad_norm": 0.4443208189107028, |
| "learning_rate": 6.1397479954180976e-06, |
| "loss": 1.4458, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.552872606161532, |
| "grad_norm": 0.41782304334586917, |
| "learning_rate": 6.125503742084053e-06, |
| "loss": 1.4362, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.556203164029975, |
| "grad_norm": 0.511701451750574, |
| "learning_rate": 6.11111111111111e-06, |
| "loss": 1.4378, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.559533721898418, |
| "grad_norm": 0.4272528437058103, |
| "learning_rate": 6.096567771960442e-06, |
| "loss": 1.4315, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.562864279766861, |
| "grad_norm": 0.42099653002903337, |
| "learning_rate": 6.0818713450292395e-06, |
| "loss": 1.4092, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.5661948376353039, |
| "grad_norm": 0.4635591149261861, |
| "learning_rate": 6.067019400352732e-06, |
| "loss": 1.4357, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.569525395503747, |
| "grad_norm": 0.5318262046494987, |
| "learning_rate": 6.052009456264775e-06, |
| "loss": 1.4753, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.5728559533721898, |
| "grad_norm": 0.4098578230232083, |
| "learning_rate": 6.036838978015449e-06, |
| "loss": 1.4192, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.5761865112406328, |
| "grad_norm": 0.4563174114919455, |
| "learning_rate": 6.021505376344085e-06, |
| "loss": 1.4676, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.5795170691090759, |
| "grad_norm": 0.5270544922424331, |
| "learning_rate": 6.006006006006005e-06, |
| "loss": 1.4267, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.5828476269775187, |
| "grad_norm": 0.3910787909582668, |
| "learning_rate": 5.990338164251208e-06, |
| "loss": 1.3766, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.5861781848459617, |
| "grad_norm": 0.4736515430850208, |
| "learning_rate": 5.974499089253187e-06, |
| "loss": 1.4437, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.5895087427144046, |
| "grad_norm": 0.5430796464569592, |
| "learning_rate": 5.958485958485957e-06, |
| "loss": 1.4482, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.5928393005828476, |
| "grad_norm": 0.38226206389298173, |
| "learning_rate": 5.942295887047268e-06, |
| "loss": 1.412, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.5961698584512907, |
| "grad_norm": 0.3721223079028304, |
| "learning_rate": 5.925925925925925e-06, |
| "loss": 1.45, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.5995004163197337, |
| "grad_norm": 0.3827064109331823, |
| "learning_rate": 5.909373060211049e-06, |
| "loss": 1.4217, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.6028309741881765, |
| "grad_norm": 0.33684324932641296, |
| "learning_rate": 5.892634207240949e-06, |
| "loss": 1.3557, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.6061615320566194, |
| "grad_norm": 0.31468847211788964, |
| "learning_rate": 5.875706214689265e-06, |
| "loss": 1.4122, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.6094920899250624, |
| "grad_norm": 0.4442799216781044, |
| "learning_rate": 5.858585858585859e-06, |
| "loss": 1.4285, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.6128226477935055, |
| "grad_norm": 0.4567121702156198, |
| "learning_rate": 5.841269841269841e-06, |
| "loss": 1.4764, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.6161532056619485, |
| "grad_norm": 0.3590206566567271, |
| "learning_rate": 5.82375478927203e-06, |
| "loss": 1.4229, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.6194837635303914, |
| "grad_norm": 0.3652198930331244, |
| "learning_rate": 5.806037251123956e-06, |
| "loss": 1.4151, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.6228143213988342, |
| "grad_norm": 0.35866861963268476, |
| "learning_rate": 5.7881136950904385e-06, |
| "loss": 1.3369, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.6261448792672772, |
| "grad_norm": 0.4750936045573692, |
| "learning_rate": 5.7699805068226105e-06, |
| "loss": 1.4715, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.6294754371357203, |
| "grad_norm": 0.3613198830707804, |
| "learning_rate": 5.7516339869281045e-06, |
| "loss": 1.4291, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.6328059950041633, |
| "grad_norm": 0.43606379412430957, |
| "learning_rate": 5.733070348454964e-06, |
| "loss": 1.4011, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.6361365528726062, |
| "grad_norm": 0.35042984426925494, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 1.4368, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.639467110741049, |
| "grad_norm": 0.31661366243629, |
| "learning_rate": 5.695276114437791e-06, |
| "loss": 1.4541, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.642797668609492, |
| "grad_norm": 0.3561358967067642, |
| "learning_rate": 5.676037483266399e-06, |
| "loss": 1.433, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.646128226477935, |
| "grad_norm": 0.3931637346563919, |
| "learning_rate": 5.656565656565656e-06, |
| "loss": 1.4193, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.6494587843463782, |
| "grad_norm": 0.48631366553960975, |
| "learning_rate": 5.6368563685636855e-06, |
| "loss": 1.4012, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.652789342214821, |
| "grad_norm": 0.41348933242163105, |
| "learning_rate": 5.616905248807089e-06, |
| "loss": 1.3883, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.6561199000832638, |
| "grad_norm": 0.3541766139316355, |
| "learning_rate": 5.59670781893004e-06, |
| "loss": 1.363, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.6594504579517069, |
| "grad_norm": 0.410383164470969, |
| "learning_rate": 5.576259489302967e-06, |
| "loss": 1.3955, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.66278101582015, |
| "grad_norm": 0.4100549908496841, |
| "learning_rate": 5.555555555555555e-06, |
| "loss": 1.3663, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.666111573688593, |
| "grad_norm": 0.4122832272958553, |
| "learning_rate": 5.534591194968553e-06, |
| "loss": 1.5108, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.6694421315570358, |
| "grad_norm": 0.33209617039282874, |
| "learning_rate": 5.513361462728551e-06, |
| "loss": 1.4069, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.6727726894254786, |
| "grad_norm": 0.34650064809899755, |
| "learning_rate": 5.491861288039631e-06, |
| "loss": 1.3953, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.6761032472939217, |
| "grad_norm": 0.3583592015376779, |
| "learning_rate": 5.47008547008547e-06, |
| "loss": 1.4181, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.6794338051623647, |
| "grad_norm": 0.34343414571245584, |
| "learning_rate": 5.448028673835125e-06, |
| "loss": 1.418, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.6827643630308078, |
| "grad_norm": 0.35638669107128673, |
| "learning_rate": 5.425685425685425e-06, |
| "loss": 1.4052, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.6860949208992506, |
| "grad_norm": 0.3467424021658532, |
| "learning_rate": 5.403050108932461e-06, |
| "loss": 1.4581, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.6894254787676934, |
| "grad_norm": 0.32381127071831955, |
| "learning_rate": 5.3801169590643285e-06, |
| "loss": 1.459, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.6927560366361365, |
| "grad_norm": 0.3811936086039866, |
| "learning_rate": 5.356880058866813e-06, |
| "loss": 1.5033, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.6960865945045795, |
| "grad_norm": 0.3612050754686712, |
| "learning_rate": 5.333333333333333e-06, |
| "loss": 1.4137, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.6994171523730226, |
| "grad_norm": 0.35765265665477713, |
| "learning_rate": 5.309470544369873e-06, |
| "loss": 1.4087, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.7027477102414654, |
| "grad_norm": 0.3357163323849947, |
| "learning_rate": 5.285285285285285e-06, |
| "loss": 1.4851, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.7060782681099083, |
| "grad_norm": 0.3449646759899252, |
| "learning_rate": 5.260770975056689e-06, |
| "loss": 1.442, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.7094088259783513, |
| "grad_norm": 0.3335919341097906, |
| "learning_rate": 5.235920852359208e-06, |
| "loss": 1.454, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.7127393838467944, |
| "grad_norm": 0.3414007515866483, |
| "learning_rate": 5.210727969348659e-06, |
| "loss": 1.4762, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.7160699417152374, |
| "grad_norm": 0.37174665041283544, |
| "learning_rate": 5.185185185185185e-06, |
| "loss": 1.4615, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.7194004995836802, |
| "grad_norm": 0.37265087217053033, |
| "learning_rate": 5.159285159285159e-06, |
| "loss": 1.4072, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.722731057452123, |
| "grad_norm": 0.3445160578098801, |
| "learning_rate": 5.1330203442879505e-06, |
| "loss": 1.4337, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.7260616153205661, |
| "grad_norm": 0.3675807887019101, |
| "learning_rate": 5.106382978723403e-06, |
| "loss": 1.4147, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.7293921731890092, |
| "grad_norm": 0.34584285856367675, |
| "learning_rate": 5.079365079365079e-06, |
| "loss": 1.4193, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.7327227310574522, |
| "grad_norm": 0.3685778739128953, |
| "learning_rate": 5.051958433253396e-06, |
| "loss": 1.4466, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.736053288925895, |
| "grad_norm": 0.35632916296360506, |
| "learning_rate": 5.02415458937198e-06, |
| "loss": 1.4299, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.739383846794338, |
| "grad_norm": 0.3941128956001842, |
| "learning_rate": 4.995944849959448e-06, |
| "loss": 1.4264, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.742714404662781, |
| "grad_norm": 0.3481786883352737, |
| "learning_rate": 4.967320261437908e-06, |
| "loss": 1.4279, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.746044962531224, |
| "grad_norm": 0.3627527951339854, |
| "learning_rate": 4.938271604938271e-06, |
| "loss": 1.5152, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.749375520399667, |
| "grad_norm": 0.34527513358988937, |
| "learning_rate": 4.9087893864013265e-06, |
| "loss": 1.4088, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.7527060782681099, |
| "grad_norm": 0.368823414133051, |
| "learning_rate": 4.878863826232247e-06, |
| "loss": 1.3944, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.756036636136553, |
| "grad_norm": 0.3471938836863914, |
| "learning_rate": 4.848484848484849e-06, |
| "loss": 1.3809, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.7593671940049957, |
| "grad_norm": 0.39760839658681035, |
| "learning_rate": 4.817642069550467e-06, |
| "loss": 1.4081, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.7626977518734388, |
| "grad_norm": 0.35630999152948084, |
| "learning_rate": 4.786324786324786e-06, |
| "loss": 1.4049, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.7660283097418819, |
| "grad_norm": 0.3123127862091999, |
| "learning_rate": 4.754521963824289e-06, |
| "loss": 1.4033, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.7693588676103247, |
| "grad_norm": 0.3565716669933871, |
| "learning_rate": 4.722222222222222e-06, |
| "loss": 1.4548, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.7726894254787677, |
| "grad_norm": 0.3415824605451111, |
| "learning_rate": 4.68941382327209e-06, |
| "loss": 1.3379, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.7760199833472106, |
| "grad_norm": 0.37445157627374487, |
| "learning_rate": 4.6560846560846555e-06, |
| "loss": 1.5023, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.7793505412156536, |
| "grad_norm": 0.4140970552339397, |
| "learning_rate": 4.622222222222222e-06, |
| "loss": 1.4982, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.7826810990840967, |
| "grad_norm": 0.3696216853055909, |
| "learning_rate": 4.587813620071684e-06, |
| "loss": 1.3795, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.7860116569525397, |
| "grad_norm": 0.3374796769034963, |
| "learning_rate": 4.552845528455284e-06, |
| "loss": 1.4356, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.7893422148209825, |
| "grad_norm": 0.4227610049072286, |
| "learning_rate": 4.517304189435337e-06, |
| "loss": 1.4625, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.7926727726894254, |
| "grad_norm": 0.36612259553982557, |
| "learning_rate": 4.4811753902663e-06, |
| "loss": 1.4274, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.7960033305578684, |
| "grad_norm": 0.4222638209328834, |
| "learning_rate": 4.444444444444443e-06, |
| "loss": 1.5129, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.7993338884263115, |
| "grad_norm": 0.41009576553628174, |
| "learning_rate": 4.407096171802053e-06, |
| "loss": 1.4873, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.8026644462947545, |
| "grad_norm": 0.35086922544434007, |
| "learning_rate": 4.369114877589454e-06, |
| "loss": 1.3718, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.8059950041631974, |
| "grad_norm": 0.35855015526438827, |
| "learning_rate": 4.33048433048433e-06, |
| "loss": 1.4031, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.8093255620316402, |
| "grad_norm": 0.42477533100459036, |
| "learning_rate": 4.291187739463601e-06, |
| "loss": 1.4473, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.8126561199000832, |
| "grad_norm": 0.39791782472493653, |
| "learning_rate": 4.251207729468599e-06, |
| "loss": 1.4374, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.8159866777685263, |
| "grad_norm": 0.3444343384513091, |
| "learning_rate": 4.210526315789473e-06, |
| "loss": 1.4048, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.8193172356369693, |
| "grad_norm": 0.3453119165966736, |
| "learning_rate": 4.169124877089478e-06, |
| "loss": 1.3581, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.8226477935054122, |
| "grad_norm": 0.38186414289634574, |
| "learning_rate": 4.126984126984126e-06, |
| "loss": 1.4774, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.825978351373855, |
| "grad_norm": 0.3371300332212375, |
| "learning_rate": 4.084084084084084e-06, |
| "loss": 1.3565, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.829308909242298, |
| "grad_norm": 0.32042065002080106, |
| "learning_rate": 4.0404040404040395e-06, |
| "loss": 1.3807, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.832639467110741, |
| "grad_norm": 0.3776475075214216, |
| "learning_rate": 3.995922528032619e-06, |
| "loss": 1.4305, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.8359700249791842, |
| "grad_norm": 0.3351717661136717, |
| "learning_rate": 3.9506172839506175e-06, |
| "loss": 1.4133, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.839300582847627, |
| "grad_norm": 0.37528610178789024, |
| "learning_rate": 3.904465212876428e-06, |
| "loss": 1.4994, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.8426311407160698, |
| "grad_norm": 0.38936785329254486, |
| "learning_rate": 3.857442348008385e-06, |
| "loss": 1.393, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.8459616985845129, |
| "grad_norm": 0.40525496168183883, |
| "learning_rate": 3.8095238095238094e-06, |
| "loss": 1.4019, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.849292256452956, |
| "grad_norm": 0.4169994094961459, |
| "learning_rate": 3.7606837606837604e-06, |
| "loss": 1.4208, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.852622814321399, |
| "grad_norm": 0.4093560262894869, |
| "learning_rate": 3.710895361380798e-06, |
| "loss": 1.44, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.8559533721898418, |
| "grad_norm": 0.35662577286334196, |
| "learning_rate": 3.660130718954248e-06, |
| "loss": 1.4168, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.8592839300582846, |
| "grad_norm": 0.3469062029498766, |
| "learning_rate": 3.6083608360836084e-06, |
| "loss": 1.4109, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.8626144879267277, |
| "grad_norm": 0.35913894186601036, |
| "learning_rate": 3.5555555555555546e-06, |
| "loss": 1.3026, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.8659450457951707, |
| "grad_norm": 0.3601783041537011, |
| "learning_rate": 3.501683501683501e-06, |
| "loss": 1.4429, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.8692756036636138, |
| "grad_norm": 0.4301246312907219, |
| "learning_rate": 3.4467120181405894e-06, |
| "loss": 1.4415, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.8726061615320566, |
| "grad_norm": 0.44543619950365937, |
| "learning_rate": 3.390607101947308e-06, |
| "loss": 1.4354, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.8759367194004994, |
| "grad_norm": 0.4110006980910609, |
| "learning_rate": 3.333333333333333e-06, |
| "loss": 1.5156, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.8792672772689425, |
| "grad_norm": 0.36681035057341954, |
| "learning_rate": 3.27485380116959e-06, |
| "loss": 1.3926, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.8825978351373855, |
| "grad_norm": 0.3639552416710322, |
| "learning_rate": 3.215130023640661e-06, |
| "loss": 1.4537, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.8859283930058286, |
| "grad_norm": 0.3809201109198225, |
| "learning_rate": 3.154121863799283e-06, |
| "loss": 1.4344, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.8892589508742714, |
| "grad_norm": 0.5067748995958425, |
| "learning_rate": 3.0917874396135263e-06, |
| "loss": 1.444, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.8925895087427143, |
| "grad_norm": 0.39339545860925257, |
| "learning_rate": 3.028083028083028e-06, |
| "loss": 1.4368, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.8959200666111573, |
| "grad_norm": 0.3645143242760266, |
| "learning_rate": 2.9629629629629625e-06, |
| "loss": 1.4189, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.8992506244796004, |
| "grad_norm": 0.41301404150023885, |
| "learning_rate": 2.8963795255930087e-06, |
| "loss": 1.4513, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.9025811823480434, |
| "grad_norm": 0.35445322756534786, |
| "learning_rate": 2.828282828282828e-06, |
| "loss": 1.4212, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.9059117402164862, |
| "grad_norm": 0.31609898679838344, |
| "learning_rate": 2.758620689655172e-06, |
| "loss": 1.4282, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.909242298084929, |
| "grad_norm": 0.38641815454972966, |
| "learning_rate": 2.6873385012919895e-06, |
| "loss": 1.5401, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.9125728559533721, |
| "grad_norm": 0.38729985084754753, |
| "learning_rate": 2.6143790849673204e-06, |
| "loss": 1.461, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.9159034138218152, |
| "grad_norm": 0.4550081298663739, |
| "learning_rate": 2.5396825396825395e-06, |
| "loss": 1.4602, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.9192339716902582, |
| "grad_norm": 0.3605173725442084, |
| "learning_rate": 2.4631860776439087e-06, |
| "loss": 1.4104, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.922564529558701, |
| "grad_norm": 0.38548981376382463, |
| "learning_rate": 2.384823848238482e-06, |
| "loss": 1.4465, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.9258950874271439, |
| "grad_norm": 0.39748551935246357, |
| "learning_rate": 2.304526748971193e-06, |
| "loss": 1.443, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.929225645295587, |
| "grad_norm": 0.3638886379639791, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.4129, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.93255620316403, |
| "grad_norm": 0.36953734209449074, |
| "learning_rate": 2.1378340365682133e-06, |
| "loss": 1.3176, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.935886761032473, |
| "grad_norm": 0.3267944344034355, |
| "learning_rate": 2.051282051282051e-06, |
| "loss": 1.4588, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.9392173189009159, |
| "grad_norm": 0.3915434082543582, |
| "learning_rate": 1.962481962481962e-06, |
| "loss": 1.4441, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.942547876769359, |
| "grad_norm": 0.3556155258308632, |
| "learning_rate": 1.871345029239766e-06, |
| "loss": 1.3898, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.9458784346378017, |
| "grad_norm": 0.35583427100431714, |
| "learning_rate": 1.7777777777777775e-06, |
| "loss": 1.4117, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.9492089925062448, |
| "grad_norm": 0.3312617219719275, |
| "learning_rate": 1.6816816816816814e-06, |
| "loss": 1.4243, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.9525395503746878, |
| "grad_norm": 0.3171322439070156, |
| "learning_rate": 1.582952815829528e-06, |
| "loss": 1.3974, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.9558701082431307, |
| "grad_norm": 0.2931034713127486, |
| "learning_rate": 1.4814814814814812e-06, |
| "loss": 1.4232, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.9592006661115737, |
| "grad_norm": 0.31803832338980526, |
| "learning_rate": 1.3771517996870107e-06, |
| "loss": 1.4475, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.9625312239800166, |
| "grad_norm": 0.3102745879861819, |
| "learning_rate": 1.2698412698412697e-06, |
| "loss": 1.4991, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.9658617818484596, |
| "grad_norm": 0.35190966791382605, |
| "learning_rate": 1.1594202898550724e-06, |
| "loss": 1.4806, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.9691923397169027, |
| "grad_norm": 0.3133274529689738, |
| "learning_rate": 1.045751633986928e-06, |
| "loss": 1.347, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.9725228975853455, |
| "grad_norm": 0.30605048339614954, |
| "learning_rate": 9.286898839137644e-07, |
| "loss": 1.3999, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.9758534554537885, |
| "grad_norm": 0.3151090112991302, |
| "learning_rate": 8.08080808080808e-07, |
| "loss": 1.4339, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.9791840133222314, |
| "grad_norm": 0.35650599063479166, |
| "learning_rate": 6.837606837606837e-07, |
| "loss": 1.4009, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.9825145711906744, |
| "grad_norm": 0.3384958491564326, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 1.4611, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.9858451290591175, |
| "grad_norm": 0.3335636198476521, |
| "learning_rate": 4.2328042328042324e-07, |
| "loss": 1.3728, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.9891756869275605, |
| "grad_norm": 0.32277296814250667, |
| "learning_rate": 2.8673835125448024e-07, |
| "loss": 1.387, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.9925062447960034, |
| "grad_norm": 0.3467254801927619, |
| "learning_rate": 1.4571948998178507e-07, |
| "loss": 1.4114, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.9958368026644462, |
| "grad_norm": 0.3207639144479259, |
| "learning_rate": 0, |
| "loss": 1.3956, |
| "memory/device_mem_reserved(gib)": 59.75, |
| "memory/max_mem_active(gib)": 57.09, |
| "memory/max_mem_allocated(gib)": 56.77, |
| "step": 600 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 150, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.439031159441326e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|