| { |
| "best_global_step": 3498, |
| "best_metric": 0.9396295674217445, |
| "best_model_checkpoint": "./roberta_urdu_multilabel/checkpoint-3498", |
| "epoch": 6.0, |
| "eval_steps": 500, |
| "global_step": 3498, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.17152658662092624, |
| "grad_norm": 0.4993675649166107, |
| "learning_rate": 4.858490566037736e-05, |
| "loss": 0.2695, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.34305317324185247, |
| "grad_norm": 0.32874006032943726, |
| "learning_rate": 4.7155517438536305e-05, |
| "loss": 0.116, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5145797598627787, |
| "grad_norm": 0.22341817617416382, |
| "learning_rate": 4.5726129216695255e-05, |
| "loss": 0.0827, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6861063464837049, |
| "grad_norm": 0.6156010031700134, |
| "learning_rate": 4.4296740994854205e-05, |
| "loss": 0.0596, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8576329331046312, |
| "grad_norm": 0.1617479920387268, |
| "learning_rate": 4.2867352773013155e-05, |
| "loss": 0.0518, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9099485420240138, |
| "eval_f1_macro": 0.8772930595964492, |
| "eval_loss": 0.03939065709710121, |
| "eval_runtime": 4.0802, |
| "eval_samples_per_second": 285.774, |
| "eval_steps_per_second": 35.783, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.0291595197255574, |
| "grad_norm": 0.4286077916622162, |
| "learning_rate": 4.14379645511721e-05, |
| "loss": 0.0414, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2006861063464838, |
| "grad_norm": 0.45919153094291687, |
| "learning_rate": 4.000857632933105e-05, |
| "loss": 0.0347, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.3722126929674099, |
| "grad_norm": 0.5688769221305847, |
| "learning_rate": 3.857918810748999e-05, |
| "loss": 0.0283, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.5437392795883362, |
| "grad_norm": 0.3409585654735565, |
| "learning_rate": 3.714979988564895e-05, |
| "loss": 0.0259, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.7152658662092626, |
| "grad_norm": 0.303937166929245, |
| "learning_rate": 3.572041166380789e-05, |
| "loss": 0.0244, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.8867924528301887, |
| "grad_norm": 0.04824039712548256, |
| "learning_rate": 3.429102344196684e-05, |
| "loss": 0.0217, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9716981132075472, |
| "eval_f1_macro": 0.9048159496552155, |
| "eval_loss": 0.01790458895266056, |
| "eval_runtime": 4.0828, |
| "eval_samples_per_second": 285.59, |
| "eval_steps_per_second": 35.76, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.058319039451115, |
| "grad_norm": 0.07180823385715485, |
| "learning_rate": 3.2861635220125784e-05, |
| "loss": 0.0179, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.2298456260720414, |
| "grad_norm": 0.3384993374347687, |
| "learning_rate": 3.1432246998284733e-05, |
| "loss": 0.0163, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.4013722126929675, |
| "grad_norm": 0.0720255896449089, |
| "learning_rate": 3.0002858776443683e-05, |
| "loss": 0.0148, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.5728987993138936, |
| "grad_norm": 0.07835888862609863, |
| "learning_rate": 2.8573470554602633e-05, |
| "loss": 0.0137, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.7444253859348198, |
| "grad_norm": 0.03401586785912514, |
| "learning_rate": 2.7144082332761576e-05, |
| "loss": 0.0129, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.915951972555746, |
| "grad_norm": 0.04987897351384163, |
| "learning_rate": 2.571469411092053e-05, |
| "loss": 0.0119, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9785591766723842, |
| "eval_f1_macro": 0.9069474297862891, |
| "eval_loss": 0.011610370129346848, |
| "eval_runtime": 4.0899, |
| "eval_samples_per_second": 285.091, |
| "eval_steps_per_second": 35.698, |
| "step": 1749 |
| }, |
| { |
| "epoch": 3.0874785591766725, |
| "grad_norm": 0.038767341524362564, |
| "learning_rate": 2.4285305889079473e-05, |
| "loss": 0.0111, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.2590051457975986, |
| "grad_norm": 0.048064444214105606, |
| "learning_rate": 2.2855917667238423e-05, |
| "loss": 0.0107, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.4305317324185247, |
| "grad_norm": 0.08297387510538101, |
| "learning_rate": 2.142652944539737e-05, |
| "loss": 0.0103, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.6020583190394513, |
| "grad_norm": 0.027825674042105675, |
| "learning_rate": 1.999714122355632e-05, |
| "loss": 0.0087, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.7735849056603774, |
| "grad_norm": 0.024125738069415092, |
| "learning_rate": 1.8567753001715266e-05, |
| "loss": 0.0077, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.9451114922813035, |
| "grad_norm": 0.04014933481812477, |
| "learning_rate": 1.7138364779874212e-05, |
| "loss": 0.0088, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9854202401372213, |
| "eval_f1_macro": 0.9392697973755549, |
| "eval_loss": 0.009160671383142471, |
| "eval_runtime": 4.1026, |
| "eval_samples_per_second": 284.212, |
| "eval_steps_per_second": 35.587, |
| "step": 2332 |
| }, |
| { |
| "epoch": 4.11663807890223, |
| "grad_norm": 0.09714564681053162, |
| "learning_rate": 1.5708976558033162e-05, |
| "loss": 0.0088, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.288164665523156, |
| "grad_norm": 0.02462666854262352, |
| "learning_rate": 1.427958833619211e-05, |
| "loss": 0.007, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.459691252144083, |
| "grad_norm": 0.03808571770787239, |
| "learning_rate": 1.2850200114351058e-05, |
| "loss": 0.0075, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.631217838765009, |
| "grad_norm": 0.0727507695555687, |
| "learning_rate": 1.1420811892510007e-05, |
| "loss": 0.0069, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.802744425385935, |
| "grad_norm": 0.02290419489145279, |
| "learning_rate": 9.991423670668955e-06, |
| "loss": 0.0075, |
| "step": 2800 |
| }, |
| { |
| "epoch": 4.974271012006861, |
| "grad_norm": 0.032202959060668945, |
| "learning_rate": 8.562035448827901e-06, |
| "loss": 0.0073, |
| "step": 2900 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.983704974271012, |
| "eval_f1_macro": 0.9105312017109829, |
| "eval_loss": 0.007727212272584438, |
| "eval_runtime": 4.1477, |
| "eval_samples_per_second": 281.123, |
| "eval_steps_per_second": 35.201, |
| "step": 2915 |
| }, |
| { |
| "epoch": 5.145797598627787, |
| "grad_norm": 0.0157458633184433, |
| "learning_rate": 7.132647226986849e-06, |
| "loss": 0.0068, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.317324185248713, |
| "grad_norm": 0.02146329917013645, |
| "learning_rate": 5.7032590051457984e-06, |
| "loss": 0.0061, |
| "step": 3100 |
| }, |
| { |
| "epoch": 5.4888507718696395, |
| "grad_norm": 0.017605546861886978, |
| "learning_rate": 4.273870783304746e-06, |
| "loss": 0.0066, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.660377358490566, |
| "grad_norm": 0.017790155485272408, |
| "learning_rate": 2.8444825614636936e-06, |
| "loss": 0.0067, |
| "step": 3300 |
| }, |
| { |
| "epoch": 5.831903945111492, |
| "grad_norm": 0.033828429877758026, |
| "learning_rate": 1.4150943396226415e-06, |
| "loss": 0.0059, |
| "step": 3400 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9854202401372213, |
| "eval_f1_macro": 0.9396295674217445, |
| "eval_loss": 0.0071120294742286205, |
| "eval_runtime": 4.1615, |
| "eval_samples_per_second": 280.185, |
| "eval_steps_per_second": 35.083, |
| "step": 3498 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3498, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3680565061920768.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|