| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 324, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.030864197530864196, |
| "grad_norm": 0.2539002488078261, |
| "learning_rate": 1e-05, |
| "loss": 0.4363, |
| "num_input_tokens_seen": 0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06172839506172839, |
| "grad_norm": 0.2024392131106122, |
| "learning_rate": 9.681528662420384e-06, |
| "loss": 0.2931, |
| "num_input_tokens_seen": 0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09259259259259259, |
| "grad_norm": 0.1711436526142984, |
| "learning_rate": 9.363057324840765e-06, |
| "loss": 0.2336, |
| "num_input_tokens_seen": 0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12345679012345678, |
| "grad_norm": 0.13488565924080936, |
| "learning_rate": 9.044585987261148e-06, |
| "loss": 0.2006, |
| "num_input_tokens_seen": 0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15432098765432098, |
| "grad_norm": 0.13503015545972158, |
| "learning_rate": 8.726114649681529e-06, |
| "loss": 0.1855, |
| "num_input_tokens_seen": 0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18518518518518517, |
| "grad_norm": 0.16484545379786694, |
| "learning_rate": 8.407643312101912e-06, |
| "loss": 0.1708, |
| "num_input_tokens_seen": 0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.21604938271604937, |
| "grad_norm": 0.16373587348563579, |
| "learning_rate": 8.089171974522295e-06, |
| "loss": 0.1622, |
| "num_input_tokens_seen": 0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.24691358024691357, |
| "grad_norm": 0.1524418787913334, |
| "learning_rate": 7.770700636942676e-06, |
| "loss": 0.1491, |
| "num_input_tokens_seen": 0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 0.13239482123815, |
| "learning_rate": 7.452229299363057e-06, |
| "loss": 0.1615, |
| "num_input_tokens_seen": 0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.30864197530864196, |
| "grad_norm": 0.21888788342573567, |
| "learning_rate": 7.13375796178344e-06, |
| "loss": 0.1531, |
| "num_input_tokens_seen": 0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3395061728395062, |
| "grad_norm": 0.14773397182859288, |
| "learning_rate": 6.815286624203822e-06, |
| "loss": 0.1549, |
| "num_input_tokens_seen": 0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.37037037037037035, |
| "grad_norm": 0.14063670969844863, |
| "learning_rate": 6.496815286624204e-06, |
| "loss": 0.1451, |
| "num_input_tokens_seen": 0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4012345679012346, |
| "grad_norm": 0.1483882380802712, |
| "learning_rate": 6.178343949044586e-06, |
| "loss": 0.1411, |
| "num_input_tokens_seen": 0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.43209876543209874, |
| "grad_norm": 0.19499270485543743, |
| "learning_rate": 5.859872611464969e-06, |
| "loss": 0.1435, |
| "num_input_tokens_seen": 0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.46296296296296297, |
| "grad_norm": 0.14731278284216748, |
| "learning_rate": 5.541401273885351e-06, |
| "loss": 0.1399, |
| "num_input_tokens_seen": 0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.49382716049382713, |
| "grad_norm": 0.14099478047141387, |
| "learning_rate": 5.222929936305733e-06, |
| "loss": 0.1375, |
| "num_input_tokens_seen": 0, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5246913580246914, |
| "grad_norm": 0.13980815390161488, |
| "learning_rate": 4.904458598726115e-06, |
| "loss": 0.1447, |
| "num_input_tokens_seen": 0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.15870399104525085, |
| "learning_rate": 4.585987261146497e-06, |
| "loss": 0.1349, |
| "num_input_tokens_seen": 0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5864197530864198, |
| "grad_norm": 0.13469923079260826, |
| "learning_rate": 4.26751592356688e-06, |
| "loss": 0.1353, |
| "num_input_tokens_seen": 0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6172839506172839, |
| "grad_norm": 0.16157642492469387, |
| "learning_rate": 3.949044585987262e-06, |
| "loss": 0.1436, |
| "num_input_tokens_seen": 0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6481481481481481, |
| "grad_norm": 0.16708609597150495, |
| "learning_rate": 3.6305732484076435e-06, |
| "loss": 0.1264, |
| "num_input_tokens_seen": 0, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6790123456790124, |
| "grad_norm": 0.14770437980798296, |
| "learning_rate": 3.3121019108280255e-06, |
| "loss": 0.1228, |
| "num_input_tokens_seen": 0, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7098765432098766, |
| "grad_norm": 0.13900965859775397, |
| "learning_rate": 2.993630573248408e-06, |
| "loss": 0.1196, |
| "num_input_tokens_seen": 0, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 0.12123168814466377, |
| "learning_rate": 2.67515923566879e-06, |
| "loss": 0.1291, |
| "num_input_tokens_seen": 0, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7716049382716049, |
| "grad_norm": 0.12106750215748638, |
| "learning_rate": 2.356687898089172e-06, |
| "loss": 0.1306, |
| "num_input_tokens_seen": 0, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8024691358024691, |
| "grad_norm": 0.1393388456599626, |
| "learning_rate": 2.0382165605095544e-06, |
| "loss": 0.1228, |
| "num_input_tokens_seen": 0, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.18012074978813084, |
| "learning_rate": 1.7197452229299363e-06, |
| "loss": 0.1178, |
| "num_input_tokens_seen": 0, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8641975308641975, |
| "grad_norm": 0.12683446108621385, |
| "learning_rate": 1.4012738853503185e-06, |
| "loss": 0.1176, |
| "num_input_tokens_seen": 0, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8950617283950617, |
| "grad_norm": 0.14596666878755418, |
| "learning_rate": 1.0828025477707007e-06, |
| "loss": 0.1126, |
| "num_input_tokens_seen": 0, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9259259259259259, |
| "grad_norm": 0.1345601273071694, |
| "learning_rate": 7.643312101910829e-07, |
| "loss": 0.1236, |
| "num_input_tokens_seen": 0, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9567901234567902, |
| "grad_norm": 0.13789896926760864, |
| "learning_rate": 4.45859872611465e-07, |
| "loss": 0.1103, |
| "num_input_tokens_seen": 0, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9876543209876543, |
| "grad_norm": 0.13336398116511375, |
| "learning_rate": 1.2738853503184715e-07, |
| "loss": 0.1111, |
| "num_input_tokens_seen": 0, |
| "step": 320 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 324, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 727599714140160.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|