| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.3529411764705883, |
| "eval_steps": 10, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 169.93887329101562, |
| "learning_rate": 6e-06, |
| "loss": 4.8387, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "eval_loss": 4.6919026374816895, |
| "eval_runtime": 15.5369, |
| "eval_samples_per_second": 350.456, |
| "eval_steps_per_second": 5.535, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 103.59686279296875, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 4.3444, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "eval_loss": 3.9225239753723145, |
| "eval_runtime": 16.2767, |
| "eval_samples_per_second": 334.526, |
| "eval_steps_per_second": 5.284, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 96.45588684082031, |
| "learning_rate": 1.9333333333333333e-05, |
| "loss": 3.5537, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "eval_loss": 3.1923811435699463, |
| "eval_runtime": 17.7651, |
| "eval_samples_per_second": 306.5, |
| "eval_steps_per_second": 4.841, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 116.93628692626953, |
| "learning_rate": 1.8222222222222224e-05, |
| "loss": 2.4677, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "eval_loss": 2.3630847930908203, |
| "eval_runtime": 19.3532, |
| "eval_samples_per_second": 281.349, |
| "eval_steps_per_second": 4.444, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 123.0512924194336, |
| "learning_rate": 1.7111111111111112e-05, |
| "loss": 1.5596, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "eval_loss": 1.779540777206421, |
| "eval_runtime": 17.9711, |
| "eval_samples_per_second": 302.987, |
| "eval_steps_per_second": 4.785, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 116.81210327148438, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.9314, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "eval_loss": 1.3096877336502075, |
| "eval_runtime": 17.7299, |
| "eval_samples_per_second": 307.109, |
| "eval_steps_per_second": 4.851, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 78.21733093261719, |
| "learning_rate": 1.488888888888889e-05, |
| "loss": 0.4562, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "eval_loss": 1.0128556489944458, |
| "eval_runtime": 18.4122, |
| "eval_samples_per_second": 295.729, |
| "eval_steps_per_second": 4.671, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 65.50968933105469, |
| "learning_rate": 1.377777777777778e-05, |
| "loss": 0.3587, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "eval_loss": 0.9688291549682617, |
| "eval_runtime": 18.5333, |
| "eval_samples_per_second": 293.795, |
| "eval_steps_per_second": 4.64, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0588235294117647, |
| "grad_norm": 61.12400817871094, |
| "learning_rate": 1.2666666666666667e-05, |
| "loss": 0.3667, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.0588235294117647, |
| "eval_loss": 0.9096461534500122, |
| "eval_runtime": 18.1026, |
| "eval_samples_per_second": 300.785, |
| "eval_steps_per_second": 4.751, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 50.41886901855469, |
| "learning_rate": 1.1555555555555556e-05, |
| "loss": 0.3104, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "eval_loss": 0.9075976610183716, |
| "eval_runtime": 18.2417, |
| "eval_samples_per_second": 298.492, |
| "eval_steps_per_second": 4.714, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.2941176470588236, |
| "grad_norm": 49.723411560058594, |
| "learning_rate": 1.0444444444444445e-05, |
| "loss": 0.3057, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.2941176470588236, |
| "eval_loss": 0.8963654637336731, |
| "eval_runtime": 18.3711, |
| "eval_samples_per_second": 296.389, |
| "eval_steps_per_second": 4.681, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.4117647058823528, |
| "grad_norm": 66.96435546875, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 0.2852, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.4117647058823528, |
| "eval_loss": 0.8938003778457642, |
| "eval_runtime": 18.4942, |
| "eval_samples_per_second": 294.417, |
| "eval_steps_per_second": 4.65, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.5294117647058822, |
| "grad_norm": 67.33085632324219, |
| "learning_rate": 8.222222222222222e-06, |
| "loss": 0.2527, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.5294117647058822, |
| "eval_loss": 0.9134606122970581, |
| "eval_runtime": 18.4294, |
| "eval_samples_per_second": 295.452, |
| "eval_steps_per_second": 4.666, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.6470588235294117, |
| "grad_norm": 60.442684173583984, |
| "learning_rate": 7.111111111111112e-06, |
| "loss": 0.3592, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.6470588235294117, |
| "eval_loss": 0.8834967017173767, |
| "eval_runtime": 18.3629, |
| "eval_samples_per_second": 296.522, |
| "eval_steps_per_second": 4.683, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 59.17316818237305, |
| "learning_rate": 6e-06, |
| "loss": 0.1998, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "eval_loss": 0.862636923789978, |
| "eval_runtime": 18.2959, |
| "eval_samples_per_second": 297.608, |
| "eval_steps_per_second": 4.701, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.8823529411764706, |
| "grad_norm": 33.52590560913086, |
| "learning_rate": 4.888888888888889e-06, |
| "loss": 0.2258, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.8823529411764706, |
| "eval_loss": 0.8706350326538086, |
| "eval_runtime": 18.3223, |
| "eval_samples_per_second": 297.179, |
| "eval_steps_per_second": 4.694, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 48.038719177246094, |
| "learning_rate": 3.777777777777778e-06, |
| "loss": 0.1933, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.875869870185852, |
| "eval_runtime": 18.2474, |
| "eval_samples_per_second": 298.399, |
| "eval_steps_per_second": 4.713, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.1176470588235294, |
| "grad_norm": 42.39509201049805, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.1987, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.1176470588235294, |
| "eval_loss": 0.875755250453949, |
| "eval_runtime": 18.2272, |
| "eval_samples_per_second": 298.729, |
| "eval_steps_per_second": 4.718, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.235294117647059, |
| "grad_norm": 24.651744842529297, |
| "learning_rate": 1.5555555555555558e-06, |
| "loss": 0.1684, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.235294117647059, |
| "eval_loss": 0.8625762462615967, |
| "eval_runtime": 18.1576, |
| "eval_samples_per_second": 299.874, |
| "eval_steps_per_second": 4.736, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.3529411764705883, |
| "grad_norm": 27.64773941040039, |
| "learning_rate": 4.444444444444445e-07, |
| "loss": 0.1612, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.3529411764705883, |
| "eval_loss": 0.8577004075050354, |
| "eval_runtime": 18.1098, |
| "eval_samples_per_second": 300.666, |
| "eval_steps_per_second": 4.749, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 200, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 2500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|