AdaVaR-7B / trainer_state.json
ZejunLi's picture
Upload folder using huggingface_hub
dfb93a4 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.18198362147406733,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.16666666666666666,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.08333333333333333,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0625,
"TT_Grounding/mode_1": 0.3125,
"TT_Math/mode_0": 0.4605263157894737,
"TT_Math/mode_1": 0.4605263157894737,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 377.5625,
"completion_length/mode_0": 383.5234375,
"completion_length/mode_1": 371.6015625,
"epoch": 0.0009099181073703367,
"format_confidence": 0.5,
"grad_norm": 1.5429338116205915,
"grounded_proportion": 0.5,
"kl": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"over_lengthy_sequences": 0.00390625,
"reward": 1.3203125,
"reward_std": 0.33722585439682007,
"rewards/format_reward": 0.9765625,
"rewards/general_task_reward": 0.34375,
"step": 1
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5833333333333334,
"TT_Counting/mode_0": 0.0625,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.40384615384615385,
"TT_Math/mode_1": 0.38461538461538464,
"TT_OCR/mode_0": 0.125,
"TT_OCR/mode_1": 0.375,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 0.4375,
"completion_length": 304.2890625,
"completion_length/mode_0": 337.1171875,
"completion_length/mode_1": 271.4609375,
"epoch": 0.0018198362147406734,
"format_confidence": 0.5,
"grad_norm": 1.1197813674763613,
"grounded_proportion": 0.5,
"kl": 0.000415802001953125,
"learning_rate": 9.990900818926296e-07,
"loss": 0.0,
"over_lengthy_sequences": 0.0,
"reward": 1.375,
"reward_std": 0.2749903202056885,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.375,
"step": 2
},
{
"TT_Chart/mode_0": 0.9166666666666666,
"TT_Chart/mode_1": 0.9166666666666666,
"TT_Counting/mode_0": 0.0625,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.625,
"TT_Math/mode_1": 0.5340909090909091,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.375,
"completion_length": 367.0625,
"completion_length/mode_0": 395.9765625,
"completion_length/mode_1": 338.1484375,
"epoch": 0.00272975432211101,
"format_confidence": 0.5,
"grad_norm": 2.0139122368418265,
"grounded_proportion": 0.5,
"kl": 0.0003833770751953125,
"learning_rate": 9.981801637852592e-07,
"loss": 0.0,
"over_lengthy_sequences": 0.0,
"reward": 1.5078125,
"reward_std": 0.33350804448127747,
"rewards/format_reward": 0.9921875,
"rewards/general_task_reward": 0.515625,
"step": 3
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.703125,
"TT_Math/mode_1": 0.59375,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.375,
"completion_length": 311.7890625,
"completion_length/mode_0": 331.875,
"completion_length/mode_1": 291.703125,
"epoch": 0.003639672429481347,
"format_confidence": 0.5,
"grad_norm": 0.9407069347152167,
"grounded_proportion": 0.5,
"kl": 0.0003643035888671875,
"learning_rate": 9.97270245677889e-07,
"loss": 0.0,
"over_lengthy_sequences": 0.0,
"reward": 1.3828125,
"reward_std": 0.27103859186172485,
"rewards/format_reward": 0.98046875,
"rewards/general_task_reward": 0.40234375,
"step": 4
},
{
"TT_Chart/mode_0": 0.625,
"TT_Chart/mode_1": 0.5625,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.625,
"TT_Math/mode_0": 0.36764705882352944,
"TT_Math/mode_1": 0.2647058823529412,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.375,
"completion_length": 366.9765625,
"completion_length/mode_0": 407.4140625,
"completion_length/mode_1": 326.5390625,
"epoch": 0.004549590536851683,
"format_confidence": 0.5,
"grad_norm": 0.9635797377965721,
"grounded_proportion": 0.5,
"kl": 0.00054931640625,
"learning_rate": 9.963603275705185e-07,
"loss": 0.0,
"over_lengthy_sequences": 0.00390625,
"reward": 1.32421875,
"reward_std": 0.26956743001937866,
"rewards/format_reward": 0.9921875,
"rewards/general_task_reward": 0.33203125,
"step": 5
},
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.08333333333333333,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.3125,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.3333333333333333,
"TT_Document/mode_1": 0.08333333333333333,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.4,
"TT_Math/mode_1": 0.48333333333333334,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.75,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.5,
"completion_length": 322.3125,
"completion_length/mode_0": 345.171875,
"completion_length/mode_1": 299.453125,
"epoch": 0.00545950864422202,
"format_confidence": 0.5,
"grad_norm": 1.073086357937483,
"grounded_proportion": 0.5,
"kl": 0.000858306884765625,
"learning_rate": 9.954504094631483e-07,
"loss": 0.0,
"over_lengthy_sequences": 0.0,
"reward": 1.3046875,
"reward_std": 0.3800785541534424,
"rewards/format_reward": 0.98046875,
"rewards/general_task_reward": 0.32421875,
"step": 6
},
{
"TT_Chart/mode_0": 0.75,
"TT_Chart/mode_1": 0.75,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.5588235294117647,
"TT_Math/mode_1": 0.4264705882352941,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.25,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.625,
"TT_Science/mode_1": 0.25,
"completion_length": 292.5859375,
"completion_length/mode_0": 318.9609375,
"completion_length/mode_1": 266.2109375,
"epoch": 0.006369426751592357,
"format_confidence": 0.5,
"grad_norm": 0.8168275363403603,
"grounded_proportion": 0.5,
"kl": 0.0014495849609375,
"learning_rate": 9.94540491355778e-07,
"loss": 0.0001,
"over_lengthy_sequences": 0.0,
"reward": 1.40625,
"reward_std": 0.32825323939323425,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.40625,
"step": 7
},
{
"TT_Chart/mode_0": 0.15,
"TT_Chart/mode_1": 0.15,
"TT_Counting/mode_0": 0.6666666666666666,
"TT_Counting/mode_1": 0.16666666666666666,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.65,
"TT_Math/mode_1": 0.525,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0625,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.375,
"completion_length": 350.88671875,
"completion_length/mode_0": 379.875,
"completion_length/mode_1": 321.8984375,
"epoch": 0.007279344858962694,
"format_confidence": 0.5,
"grad_norm": 0.9037954457959693,
"grounded_proportion": 0.5,
"kl": 0.000934600830078125,
"learning_rate": 9.936305732484076e-07,
"loss": 0.0,
"over_lengthy_sequences": 0.0,
"reward": 1.34375,
"reward_std": 0.3212430477142334,
"rewards/format_reward": 0.98828125,
"rewards/general_task_reward": 0.35546875,
"step": 8
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.5454545454545454,
"TT_Math/mode_1": 0.5227272727272727,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0625,
"TT_Science/mode_1": 0.375,
"completion_length": 373.3203125,
"completion_length/mode_0": 413.8046875,
"completion_length/mode_1": 332.8359375,
"epoch": 0.00818926296633303,
"format_confidence": 0.5,
"grad_norm": 1.5911208325130124,
"grounded_proportion": 0.5,
"kl": 0.001251220703125,
"learning_rate": 9.927206551410372e-07,
"loss": 0.0001,
"over_lengthy_sequences": 0.0,
"reward": 1.4296875,
"reward_std": 0.3262837529182434,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.43359375,
"step": 9
},
{
"TT_Chart/mode_0": 0.6666666666666666,
"TT_Chart/mode_1": 0.5833333333333334,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5833333333333334,
"TT_Math/mode_0": 0.5357142857142857,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0625,
"TT_Science/mode_1": 0.375,
"completion_length": 372.32421875,
"completion_length/mode_0": 402.265625,
"completion_length/mode_1": 342.3828125,
"epoch": 0.009099181073703366,
"format_confidence": 0.5,
"grad_norm": 0.9600405491472309,
"grounded_proportion": 0.5,
"kl": 0.0033721923828125,
"learning_rate": 9.918107370336669e-07,
"loss": 0.0001,
"over_lengthy_sequences": 0.0,
"reward": 1.4375,
"reward_std": 0.270779013633728,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4375,
"step": 10
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.375,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.47368421052631576,
"TT_Math/mode_1": 0.39473684210526316,
"TT_OCR/mode_0": 0.125,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.875,
"TT_Science/mode_1": 0.625,
"completion_length": 387.62109375,
"completion_length/mode_0": 418.53125,
"completion_length/mode_1": 356.7109375,
"epoch": 0.010009099181073703,
"format_confidence": 0.5,
"grad_norm": 0.8976670020100668,
"grounded_proportion": 0.5,
"kl": 0.0032501220703125,
"learning_rate": 9.909008189262967e-07,
"loss": 0.0001,
"over_lengthy_sequences": 0.0,
"reward": 1.40625,
"reward_std": 0.24920988082885742,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.40625,
"step": 11
},
{
"TT_Chart/mode_0": 0.15,
"TT_Chart/mode_1": 0.3,
"TT_Counting/mode_0": 0.1875,
"TT_Counting/mode_1": 0.3125,
"TT_Detection/mode_0": 0.375,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.375,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.546875,
"TT_Math/mode_1": 0.484375,
"TT_OCR/mode_0": 0.125,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.3333333333333333,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.5,
"completion_length": 373.30859375,
"completion_length/mode_0": 404.21875,
"completion_length/mode_1": 342.3984375,
"epoch": 0.01091901728844404,
"format_confidence": 0.5,
"grad_norm": 0.7415423083376748,
"grounded_proportion": 0.5,
"kl": 0.006988525390625,
"learning_rate": 9.899909008189261e-07,
"loss": 0.0003,
"over_lengthy_sequences": 0.0,
"reward": 1.3828125,
"reward_std": 0.29287609457969666,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3828125,
"step": 12
},
{
"TT_Chart/mode_0": 0.3125,
"TT_Chart/mode_1": 0.4375,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.4166666666666667,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.4666666666666667,
"TT_Math/mode_1": 0.55,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 1.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.2,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.0,
"completion_length": 272.83984375,
"completion_length/mode_0": 305.1953125,
"completion_length/mode_1": 240.484375,
"epoch": 0.011828935395814377,
"format_confidence": 0.5,
"grad_norm": 1.416444491434725,
"grounded_proportion": 0.5,
"kl": 0.01019287109375,
"learning_rate": 9.89080982711556e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.3671875,
"reward_std": 0.2200184315443039,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.37109375,
"step": 13
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.375,
"TT_Detection/mode_0": 0.08333333333333333,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.875,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.08333333333333333,
"TT_Math/mode_0": 0.5166666666666667,
"TT_Math/mode_1": 0.43333333333333335,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 334.30859375,
"completion_length/mode_0": 376.7578125,
"completion_length/mode_1": 291.859375,
"epoch": 0.012738853503184714,
"format_confidence": 0.5,
"grad_norm": 1.2195596640953166,
"grounded_proportion": 0.5,
"kl": 0.0172119140625,
"learning_rate": 9.881710646041856e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.33984375,
"reward_std": 0.25836920738220215,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.34375,
"step": 14
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.0625,
"TT_Document/mode_1": 0.1875,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.5454545454545454,
"TT_Math/mode_1": 0.4431818181818182,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 350.69921875,
"completion_length/mode_0": 371.78125,
"completion_length/mode_1": 329.6171875,
"epoch": 0.01364877161055505,
"format_confidence": 0.5,
"grad_norm": 0.9960620613889194,
"grounded_proportion": 0.5,
"kl": 0.025146484375,
"learning_rate": 9.872611464968153e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.00390625,
"reward": 1.39453125,
"reward_std": 0.29446732997894287,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.3984375,
"step": 15
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.15,
"TT_Grounding/mode_1": 0.35,
"TT_Math/mode_0": 0.5294117647058824,
"TT_Math/mode_1": 0.5147058823529411,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 351.96484375,
"completion_length/mode_0": 384.609375,
"completion_length/mode_1": 319.3203125,
"epoch": 0.014558689717925387,
"format_confidence": 0.5,
"grad_norm": 0.9260258786381415,
"grounded_proportion": 0.5,
"kl": 0.0201416015625,
"learning_rate": 9.863512283894449e-07,
"loss": 0.0008,
"over_lengthy_sequences": 0.0,
"reward": 1.3359375,
"reward_std": 0.19503436982631683,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3359375,
"step": 16
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.45,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.47619047619047616,
"TT_Math/mode_1": 0.42857142857142855,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.5,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.25,
"completion_length": 368.91796875,
"completion_length/mode_0": 401.9609375,
"completion_length/mode_1": 335.875,
"epoch": 0.015468607825295723,
"format_confidence": 0.5,
"grad_norm": 0.8885940593044405,
"grounded_proportion": 0.5,
"kl": 0.007049560546875,
"learning_rate": 9.854413102820745e-07,
"loss": 0.0003,
"over_lengthy_sequences": 0.0,
"reward": 1.38671875,
"reward_std": 0.2695994973182678,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.390625,
"step": 17
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.16666666666666666,
"TT_Counting/mode_1": 0.16666666666666666,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.16666666666666666,
"TT_Document/mode_1": 0.16666666666666666,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.125,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.5384615384615384,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.08333333333333333,
"TT_Others/mode_1": 0.3333333333333333,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.75,
"TT_Science/mode_0": 0.625,
"TT_Science/mode_1": 0.75,
"completion_length": 266.00390625,
"completion_length/mode_0": 279.5390625,
"completion_length/mode_1": 252.46875,
"epoch": 0.01637852593266606,
"format_confidence": 0.5,
"grad_norm": 1.4954519102348234,
"grounded_proportion": 0.5,
"kl": 0.00970458984375,
"learning_rate": 9.845313921747044e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.3671875,
"reward_std": 0.30023884773254395,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.37109375,
"step": 18
},
{
"TT_Chart/mode_0": 0.3333333333333333,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.4375,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.375,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5192307692307693,
"TT_Math/mode_1": 0.4423076923076923,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 305.93359375,
"completion_length/mode_0": 336.4140625,
"completion_length/mode_1": 275.453125,
"epoch": 0.017288444040036398,
"format_confidence": 0.5,
"grad_norm": 1.2648060156416405,
"grounded_proportion": 0.5,
"kl": 0.00762939453125,
"learning_rate": 9.836214740673338e-07,
"loss": 0.0003,
"over_lengthy_sequences": 0.0,
"reward": 1.3359375,
"reward_std": 0.22764958441257477,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3359375,
"step": 19
},
{
"TT_Chart/mode_0": 0.3333333333333333,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.125,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.8333333333333334,
"TT_Document/mode_1": 0.16666666666666666,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.4,
"TT_Math/mode_0": 0.45588235294117646,
"TT_Math/mode_1": 0.4117647058823529,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.375,
"completion_length": 350.97265625,
"completion_length/mode_0": 366.796875,
"completion_length/mode_1": 335.1484375,
"epoch": 0.018198362147406732,
"format_confidence": 0.5,
"grad_norm": 2.2680753597810743,
"grounded_proportion": 0.5,
"kl": 0.01123046875,
"learning_rate": 9.827115559599636e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.00390625,
"reward": 1.3515625,
"reward_std": 0.35683149099349976,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.35546875,
"step": 20
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.35,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.55,
"TT_Math/mode_1": 0.4666666666666667,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.5,
"completion_length": 300.5703125,
"completion_length/mode_0": 328.75,
"completion_length/mode_1": 272.390625,
"epoch": 0.01910828025477707,
"format_confidence": 0.5,
"grad_norm": 1.1844820805839822,
"grounded_proportion": 0.5,
"kl": 0.0118408203125,
"learning_rate": 9.818016378525933e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.3515625,
"reward_std": 0.27275240421295166,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3515625,
"step": 21
},
{
"TT_Chart/mode_0": 0.20833333333333334,
"TT_Chart/mode_1": 0.20833333333333334,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.45,
"TT_Math/mode_1": 0.45,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.4166666666666667,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 331.59765625,
"completion_length/mode_0": 350.2890625,
"completion_length/mode_1": 312.90625,
"epoch": 0.020018198362147407,
"format_confidence": 0.5,
"grad_norm": 0.8541575651698342,
"grounded_proportion": 0.5,
"kl": 0.010986328125,
"learning_rate": 9.80891719745223e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.30078125,
"reward_std": 0.2321278154850006,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.30078125,
"step": 22
},
{
"TT_Chart/mode_0": 0.5833333333333334,
"TT_Chart/mode_1": 0.4166666666666667,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.48863636363636365,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.6666666666666666,
"TT_Science/mode_1": 0.5,
"completion_length": 387.9765625,
"completion_length/mode_0": 414.9453125,
"completion_length/mode_1": 361.0078125,
"epoch": 0.020928116469517744,
"format_confidence": 0.5,
"grad_norm": 1.5808055185099863,
"grounded_proportion": 0.5,
"kl": 0.006439208984375,
"learning_rate": 9.799818016378525e-07,
"loss": 0.0003,
"over_lengthy_sequences": 0.0,
"reward": 1.46484375,
"reward_std": 0.32852408289909363,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.46484375,
"step": 23
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.125,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.453125,
"TT_Math/mode_1": 0.515625,
"TT_OCR/mode_0": 0.5625,
"TT_OCR/mode_1": 0.6875,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 331.4140625,
"completion_length/mode_0": 352.4453125,
"completion_length/mode_1": 310.3828125,
"epoch": 0.02183803457688808,
"format_confidence": 0.5,
"grad_norm": 0.8593245043140431,
"grounded_proportion": 0.5,
"kl": 0.01055908203125,
"learning_rate": 9.790718835304822e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.3671875,
"reward_std": 0.24446570873260498,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3671875,
"step": 24
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.3125,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.4166666666666667,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.484375,
"TT_OCR/mode_0": 0.5625,
"TT_OCR/mode_1": 0.6875,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.1875,
"completion_length": 346.8046875,
"completion_length/mode_0": 340.640625,
"completion_length/mode_1": 352.96875,
"epoch": 0.022747952684258416,
"format_confidence": 0.5,
"grad_norm": 0.9872564452072786,
"grounded_proportion": 0.5,
"kl": 0.00830078125,
"learning_rate": 9.78161965423112e-07,
"loss": 0.0003,
"over_lengthy_sequences": 0.0078125,
"reward": 1.3359375,
"reward_std": 0.3047879636287689,
"rewards/format_reward": 0.9921875,
"rewards/general_task_reward": 0.34375,
"step": 25
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.6666666666666666,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.4166666666666667,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.4722222222222222,
"TT_Math/mode_1": 0.3611111111111111,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.3125,
"completion_length": 370.4453125,
"completion_length/mode_0": 427.0,
"completion_length/mode_1": 313.890625,
"epoch": 0.023657870791628753,
"format_confidence": 0.5,
"grad_norm": 1.0075122997992207,
"grounded_proportion": 0.5,
"kl": 0.0067138671875,
"learning_rate": 9.772520473157414e-07,
"loss": 0.0003,
"over_lengthy_sequences": 0.0,
"reward": 1.34375,
"reward_std": 0.301014244556427,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.34765625,
"step": 26
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.125,
"TT_Counting/mode_1": 0.375,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.3125,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.453125,
"TT_Math/mode_1": 0.46875,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.05,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 331.84375,
"completion_length/mode_0": 353.1015625,
"completion_length/mode_1": 310.5859375,
"epoch": 0.02456778889899909,
"format_confidence": 0.5,
"grad_norm": 0.9025821168695539,
"grounded_proportion": 0.5,
"kl": 0.0146484375,
"learning_rate": 9.763421292083713e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.28125,
"reward_std": 0.2187202274799347,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.28125,
"step": 27
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.3125,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.6666666666666666,
"TT_Math/mode_0": 0.3333333333333333,
"TT_Math/mode_1": 0.2777777777777778,
"TT_OCR/mode_0": 1.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 390.828125,
"completion_length/mode_0": 404.609375,
"completion_length/mode_1": 377.046875,
"epoch": 0.025477707006369428,
"format_confidence": 0.5,
"grad_norm": 1.1882479091934361,
"grounded_proportion": 0.5,
"kl": 0.021728515625,
"learning_rate": 9.75432211101001e-07,
"loss": 0.0009,
"over_lengthy_sequences": 0.0,
"reward": 1.29296875,
"reward_std": 0.25501734018325806,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.29296875,
"step": 28
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.16666666666666666,
"TT_Counting/mode_1": 0.08333333333333333,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.125,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5138888888888888,
"TT_Math/mode_1": 0.4583333333333333,
"TT_OCR/mode_0": 1.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 455.8359375,
"completion_length/mode_0": 462.328125,
"completion_length/mode_1": 449.34375,
"epoch": 0.026387625113739762,
"format_confidence": 0.5,
"grad_norm": 0.6446278395164787,
"grounded_proportion": 0.5,
"kl": 0.005828857421875,
"learning_rate": 9.745222929936306e-07,
"loss": 0.0002,
"over_lengthy_sequences": 0.0078125,
"reward": 1.3125,
"reward_std": 0.22071683406829834,
"rewards/format_reward": 0.9921875,
"rewards/general_task_reward": 0.3203125,
"step": 29
},
{
"TT_Chart/mode_0": 0.4166666666666667,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.125,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.4090909090909091,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 1.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.125,
"completion_length": 432.3046875,
"completion_length/mode_0": 476.96875,
"completion_length/mode_1": 387.640625,
"epoch": 0.0272975432211101,
"format_confidence": 0.5,
"grad_norm": 0.7387045634169115,
"grounded_proportion": 0.5,
"kl": 0.01116943359375,
"learning_rate": 9.736123748862602e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.38671875,
"reward_std": 0.3345615267753601,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.38671875,
"step": 30
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.125,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5125,
"TT_Math/mode_1": 0.525,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.08333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.75,
"completion_length": 387.2578125,
"completion_length/mode_0": 424.34375,
"completion_length/mode_1": 350.171875,
"epoch": 0.028207461328480437,
"format_confidence": 0.5,
"grad_norm": 0.9075215710759794,
"grounded_proportion": 0.5,
"kl": 0.0091552734375,
"learning_rate": 9.727024567788898e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.40234375,
"reward_std": 0.2678895890712738,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.40234375,
"step": 31
},
{
"TT_Chart/mode_0": 0.125,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.16666666666666666,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5681818181818182,
"TT_Math/mode_1": 0.5227272727272727,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.08333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 0.0,
"completion_length": 367.25,
"completion_length/mode_0": 399.828125,
"completion_length/mode_1": 334.671875,
"epoch": 0.029117379435850774,
"format_confidence": 0.5,
"grad_norm": 0.7678387402875528,
"grounded_proportion": 0.5,
"kl": 0.01202392578125,
"learning_rate": 9.717925386715195e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.42578125,
"reward_std": 0.284709632396698,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.42578125,
"step": 32
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.625,
"TT_Math/mode_0": 0.42391304347826086,
"TT_Math/mode_1": 0.5434782608695652,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.08333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 0.0,
"completion_length": 412.55859375,
"completion_length/mode_0": 444.4921875,
"completion_length/mode_1": 380.625,
"epoch": 0.03002729754322111,
"format_confidence": 0.5,
"grad_norm": 1.076115993141789,
"grounded_proportion": 0.5,
"kl": 0.01397705078125,
"learning_rate": 9.70882620564149e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.421875,
"reward_std": 0.304455041885376,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.421875,
"step": 33
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.625,
"TT_Math/mode_0": 0.3375,
"TT_Math/mode_1": 0.4,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 358.66015625,
"completion_length/mode_0": 385.1640625,
"completion_length/mode_1": 332.15625,
"epoch": 0.030937215650591446,
"format_confidence": 0.5,
"grad_norm": 1.2114231515737024,
"grounded_proportion": 0.5,
"kl": 0.0238037109375,
"learning_rate": 9.69972702456779e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.3359375,
"reward_std": 0.21515312790870667,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3359375,
"step": 34
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.4166666666666667,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.08333333333333333,
"TT_Math/mode_0": 0.5694444444444444,
"TT_Math/mode_1": 0.4861111111111111,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 339.53125,
"completion_length/mode_0": 356.71875,
"completion_length/mode_1": 322.34375,
"epoch": 0.03184713375796178,
"format_confidence": 0.5,
"grad_norm": 0.9974240246736068,
"grounded_proportion": 0.5,
"kl": 0.026123046875,
"learning_rate": 9.690627843494086e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.00390625,
"reward": 1.3359375,
"reward_std": 0.28288590908050537,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.33984375,
"step": 35
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.4166666666666667,
"TT_Detection/mode_0": 0.05,
"TT_Detection/mode_1": 0.55,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.6333333333333333,
"TT_Math/mode_1": 0.4166666666666667,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.3333333333333333,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.25,
"completion_length": 334.58203125,
"completion_length/mode_0": 380.921875,
"completion_length/mode_1": 288.2421875,
"epoch": 0.03275705186533212,
"format_confidence": 0.5,
"grad_norm": 1.6406753400760954,
"grounded_proportion": 0.5,
"kl": 0.03759765625,
"learning_rate": 9.681528662420382e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.38671875,
"reward_std": 0.31667181849479675,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.38671875,
"step": 36
},
{
"TT_Chart/mode_0": 0.125,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5543478260869565,
"TT_Math/mode_1": 0.5652173913043478,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.3333333333333333,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.08333333333333333,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 348.41015625,
"completion_length/mode_0": 370.5234375,
"completion_length/mode_1": 326.296875,
"epoch": 0.03366696997270246,
"format_confidence": 0.5,
"grad_norm": 0.6026535973402165,
"grounded_proportion": 0.5,
"kl": 0.0167236328125,
"learning_rate": 9.672429481346678e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.4453125,
"reward_std": 0.24579495191574097,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4453125,
"step": 37
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.4875,
"TT_Math/mode_1": 0.6,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 1.0,
"TT_Science/mode_1": 0.0,
"completion_length": 325.0859375,
"completion_length/mode_0": 345.7109375,
"completion_length/mode_1": 304.4609375,
"epoch": 0.034576888080072796,
"format_confidence": 0.5,
"grad_norm": 1.7996536274917565,
"grounded_proportion": 0.5,
"kl": 0.039794921875,
"learning_rate": 9.663330300272975e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.2690715491771698,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 38
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 1.0,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.05,
"TT_Grounding/mode_1": 0.2,
"TT_Math/mode_0": 0.5277777777777778,
"TT_Math/mode_1": 0.4305555555555556,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.0,
"completion_length": 337.09375,
"completion_length/mode_0": 384.3984375,
"completion_length/mode_1": 289.7890625,
"epoch": 0.03548680618744313,
"format_confidence": 0.5,
"grad_norm": 1.8348507814517516,
"grounded_proportion": 0.5,
"kl": 0.051513671875,
"learning_rate": 9.65423111919927e-07,
"loss": 0.0021,
"over_lengthy_sequences": 0.00390625,
"reward": 1.375,
"reward_std": 0.29592859745025635,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.37890625,
"step": 39
},
{
"TT_Chart/mode_0": 0.4166666666666667,
"TT_Chart/mode_1": 0.3333333333333333,
"TT_Counting/mode_0": 0.4,
"TT_Counting/mode_1": 0.2,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.05,
"TT_Grounding/mode_1": 0.2,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.375,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.0,
"completion_length": 423.27734375,
"completion_length/mode_0": 478.3359375,
"completion_length/mode_1": 368.21875,
"epoch": 0.036396724294813464,
"format_confidence": 0.5,
"grad_norm": 0.8417782233647189,
"grounded_proportion": 0.5,
"kl": 0.0096435546875,
"learning_rate": 9.645131938125567e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.4140625,
"reward_std": 0.3811083436012268,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4140625,
"step": 40
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.5833333333333334,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.6333333333333333,
"TT_Math/mode_1": 0.6,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.625,
"TT_Others/mode_0": 0.5,
"TT_Others/mode_1": 0.375,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.5,
"completion_length": 346.296875,
"completion_length/mode_0": 368.3125,
"completion_length/mode_1": 324.28125,
"epoch": 0.0373066424021838,
"format_confidence": 0.5,
"grad_norm": 0.8971248354232603,
"grounded_proportion": 0.5,
"kl": 0.01708984375,
"learning_rate": 9.636032757051866e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.45703125,
"reward_std": 0.2794685363769531,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.45703125,
"step": 41
},
{
"TT_Chart/mode_0": 0.39285714285714285,
"TT_Chart/mode_1": 0.32142857142857145,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.375,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5192307692307693,
"TT_Math/mode_1": 0.5192307692307693,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.16666666666666666,
"TT_Science/mode_1": 0.4166666666666667,
"completion_length": 327.87890625,
"completion_length/mode_0": 374.3984375,
"completion_length/mode_1": 281.359375,
"epoch": 0.03821656050955414,
"format_confidence": 0.5,
"grad_norm": 1.969070754091683,
"grounded_proportion": 0.5,
"kl": 0.0157470703125,
"learning_rate": 9.626933575978162e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.375,
"reward_std": 0.2452620565891266,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.375,
"step": 42
},
{
"TT_Chart/mode_0": 0.2916666666666667,
"TT_Chart/mode_1": 0.2916666666666667,
"TT_Counting/mode_0": 0.6666666666666666,
"TT_Counting/mode_1": 0.6666666666666666,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5833333333333334,
"TT_Math/mode_1": 0.5208333333333334,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 0.75,
"completion_length": 279.71875,
"completion_length/mode_0": 307.2421875,
"completion_length/mode_1": 252.1953125,
"epoch": 0.039126478616924476,
"format_confidence": 0.5,
"grad_norm": 3.84523994130464,
"grounded_proportion": 0.5,
"kl": 0.0184326171875,
"learning_rate": 9.617834394904458e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.390625,
"reward_std": 0.2264637053012848,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.390625,
"step": 43
},
{
"TT_Chart/mode_0": 0.125,
"TT_Chart/mode_1": 0.125,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.125,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.5588235294117647,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.375,
"completion_length": 324.25390625,
"completion_length/mode_0": 361.8515625,
"completion_length/mode_1": 286.65625,
"epoch": 0.040036396724294813,
"format_confidence": 0.5,
"grad_norm": 0.9447344472383061,
"grounded_proportion": 0.5,
"kl": 0.01422119140625,
"learning_rate": 9.608735213830755e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.39453125,
"reward_std": 0.29340648651123047,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.39453125,
"step": 44
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.875,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.4473684210526316,
"TT_Math/mode_1": 0.39473684210526316,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.5,
"TT_Others/mode_1": 1.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.25,
"completion_length": 337.859375,
"completion_length/mode_0": 363.390625,
"completion_length/mode_1": 312.328125,
"epoch": 0.04094631483166515,
"format_confidence": 0.5,
"grad_norm": 1.4320135714153,
"grounded_proportion": 0.5,
"kl": 0.013671875,
"learning_rate": 9.599636032757051e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.375,
"reward_std": 0.31154942512512207,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.37890625,
"step": 45
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.5625,
"TT_Counting/mode_1": 0.4375,
"TT_Detection/mode_0": 1.0,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.3815789473684211,
"TT_Math/mode_1": 0.2894736842105263,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.5,
"completion_length": 380.5,
"completion_length/mode_0": 418.28125,
"completion_length/mode_1": 342.71875,
"epoch": 0.04185623293903549,
"format_confidence": 0.5,
"grad_norm": 0.7559008917052512,
"grounded_proportion": 0.5,
"kl": 0.012939453125,
"learning_rate": 9.590536851683348e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.37890625,
"reward_std": 0.23144766688346863,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.37890625,
"step": 46
},
{
"TT_Chart/mode_0": 0.75,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.4375,
"TT_Counting/mode_1": 0.75,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.5588235294117647,
"TT_Math/mode_1": 0.45588235294117646,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.2,
"TT_Science/mode_1": 0.2,
"completion_length": 399.77734375,
"completion_length/mode_0": 433.140625,
"completion_length/mode_1": 366.4140625,
"epoch": 0.042766151046405826,
"format_confidence": 0.5,
"grad_norm": 1.0624054559364031,
"grounded_proportion": 0.5,
"kl": 0.01416015625,
"learning_rate": 9.581437670609644e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.4140625,
"reward_std": 0.35639268159866333,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4140625,
"step": 47
},
{
"TT_Chart/mode_0": 0.875,
"TT_Chart/mode_1": 0.875,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.46875,
"TT_OCR/mode_0": 0.3333333333333333,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.4166666666666667,
"completion_length": 349.78125,
"completion_length/mode_0": 375.375,
"completion_length/mode_1": 324.1875,
"epoch": 0.04367606915377616,
"format_confidence": 0.5,
"grad_norm": 0.6132246625343835,
"grounded_proportion": 0.5,
"kl": 0.011474609375,
"learning_rate": 9.572338489535942e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.38671875,
"reward_std": 0.2500086724758148,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.38671875,
"step": 48
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.875,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.125,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.39473684210526316,
"TT_Math/mode_1": 0.4868421052631579,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.4166666666666667,
"completion_length": 357.80078125,
"completion_length/mode_0": 382.140625,
"completion_length/mode_1": 333.4609375,
"epoch": 0.044585987261146494,
"format_confidence": 0.5,
"grad_norm": 1.3534089418671387,
"grounded_proportion": 0.5,
"kl": 0.0108642578125,
"learning_rate": 9.563239308462239e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.3424571454524994,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 49
},
{
"TT_Chart/mode_0": 0.125,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.125,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.4722222222222222,
"TT_Math/mode_1": 0.4305555555555556,
"TT_OCR/mode_0": 0.08333333333333333,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.375,
"completion_length": 348.2421875,
"completion_length/mode_0": 378.8203125,
"completion_length/mode_1": 317.6640625,
"epoch": 0.04549590536851683,
"format_confidence": 0.5,
"grad_norm": 1.040116212408755,
"grounded_proportion": 0.5,
"kl": 0.0142822265625,
"learning_rate": 9.554140127388535e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.34375,
"reward_std": 0.3203405737876892,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.34765625,
"step": 50
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.16666666666666666,
"TT_Counting/mode_1": 0.5833333333333334,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.125,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.40789473684210525,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.3333333333333333,
"TT_Science/mode_1": 0.75,
"completion_length": 356.33203125,
"completion_length/mode_0": 390.25,
"completion_length/mode_1": 322.4140625,
"epoch": 0.04640582347588717,
"format_confidence": 0.5,
"grad_norm": 2.6529744307875993,
"grounded_proportion": 0.5,
"kl": 0.01220703125,
"learning_rate": 9.545040946314831e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.40234375,
"reward_std": 0.3068116307258606,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.40234375,
"step": 51
},
{
"TT_Chart/mode_0": 0.3125,
"TT_Chart/mode_1": 0.4375,
"TT_Counting/mode_0": 0.5833333333333334,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.125,
"TT_Document/mode_1": 0.125,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.671875,
"TT_Math/mode_1": 0.59375,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 1.0,
"TT_Science/mode_1": 0.75,
"completion_length": 329.06640625,
"completion_length/mode_0": 356.7421875,
"completion_length/mode_1": 301.390625,
"epoch": 0.047315741583257506,
"format_confidence": 0.5,
"grad_norm": 0.9611109881006651,
"grounded_proportion": 0.5,
"kl": 0.013916015625,
"learning_rate": 9.535941765241128e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.48828125,
"reward_std": 0.38452082872390747,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.48828125,
"step": 52
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 1.0,
"TT_Counting/mode_1": 0.75,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.1875,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.578125,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.125,
"completion_length": 327.03125,
"completion_length/mode_0": 346.4921875,
"completion_length/mode_1": 307.5703125,
"epoch": 0.048225659690627844,
"format_confidence": 0.5,
"grad_norm": 0.797769852763845,
"grounded_proportion": 0.5,
"kl": 0.012939453125,
"learning_rate": 9.526842584167425e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.34375,
"reward_std": 0.21713145077228546,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.34375,
"step": 53
},
{
"TT_Chart/mode_0": 0.6,
"TT_Chart/mode_1": 0.45,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.578125,
"TT_Math/mode_1": 0.578125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.5,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.4166666666666667,
"TT_Science/mode_1": 0.5833333333333334,
"completion_length": 292.44140625,
"completion_length/mode_0": 315.625,
"completion_length/mode_1": 269.2578125,
"epoch": 0.04913557779799818,
"format_confidence": 0.5,
"grad_norm": 1.549225397699757,
"grounded_proportion": 0.5,
"kl": 0.01287841796875,
"learning_rate": 9.517743403093721e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.46875,
"reward_std": 0.20779038965702057,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.46875,
"step": 54
},
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.08333333333333333,
"TT_Detection/mode_1": 0.4166666666666667,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.6052631578947368,
"TT_Math/mode_1": 0.631578947368421,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.25,
"completion_length": 336.27734375,
"completion_length/mode_0": 361.40625,
"completion_length/mode_1": 311.1484375,
"epoch": 0.05004549590536852,
"format_confidence": 0.5,
"grad_norm": 1.0145037617177997,
"grounded_proportion": 0.5,
"kl": 0.01055908203125,
"learning_rate": 9.508644222020018e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.44921875,
"reward_std": 0.27354732155799866,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.44921875,
"step": 55
},
{
"TT_Chart/mode_0": 0.3888888888888889,
"TT_Chart/mode_1": 0.3888888888888889,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.16666666666666666,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.7115384615384616,
"TT_Math/mode_1": 0.5961538461538461,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.3333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.25,
"completion_length": 319.97265625,
"completion_length/mode_0": 339.140625,
"completion_length/mode_1": 300.8046875,
"epoch": 0.050955414012738856,
"format_confidence": 0.5,
"grad_norm": 1.126724757554702,
"grounded_proportion": 0.5,
"kl": 0.01165771484375,
"learning_rate": 9.499545040946314e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.00390625,
"reward": 1.44140625,
"reward_std": 0.27130943536758423,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.4453125,
"step": 56
},
{
"TT_Chart/mode_0": 0.3333333333333333,
"TT_Chart/mode_1": 0.3333333333333333,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.875,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.4264705882352941,
"TT_Math/mode_1": 0.4852941176470588,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.25,
"completion_length": 377.96875,
"completion_length/mode_0": 419.2265625,
"completion_length/mode_1": 336.7109375,
"epoch": 0.051865332120109194,
"format_confidence": 0.5,
"grad_norm": 0.7761878682566107,
"grounded_proportion": 0.5,
"kl": 0.012939453125,
"learning_rate": 9.490445859872611e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.33984375,
"reward_std": 0.19791889190673828,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.33984375,
"step": 57
},
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.16666666666666666,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.4166666666666667,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.5131578947368421,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 354.40625,
"completion_length/mode_0": 378.8828125,
"completion_length/mode_1": 329.9296875,
"epoch": 0.052775250227479524,
"format_confidence": 0.5,
"grad_norm": 1.0626500338136928,
"grounded_proportion": 0.5,
"kl": 0.03759765625,
"learning_rate": 9.481346678798907e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.37109375,
"reward_std": 0.25118574500083923,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.37109375,
"step": 58
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.5,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.46,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.25,
"completion_length": 438.22265625,
"completion_length/mode_0": 464.8359375,
"completion_length/mode_1": 411.609375,
"epoch": 0.05368516833484986,
"format_confidence": 0.5,
"grad_norm": 1.2595171994008831,
"grounded_proportion": 0.5,
"kl": 0.00750732421875,
"learning_rate": 9.472247497725204e-07,
"loss": 0.0003,
"over_lengthy_sequences": 0.0,
"reward": 1.4453125,
"reward_std": 0.26565414667129517,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4453125,
"step": 59
},
{
"TT_Chart/mode_0": 0.21428571428571427,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.40625,
"TT_Math/mode_1": 0.265625,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.0,
"completion_length": 421.61328125,
"completion_length/mode_0": 453.5625,
"completion_length/mode_1": 389.6640625,
"epoch": 0.0545950864422202,
"format_confidence": 0.5,
"grad_norm": 0.7843446949263764,
"grounded_proportion": 0.5,
"kl": 0.0103759765625,
"learning_rate": 9.463148316651502e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.2734375,
"reward_std": 0.24078628420829773,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.2734375,
"step": 60
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.125,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5568181818181818,
"TT_Math/mode_1": 0.5568181818181818,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.0,
"completion_length": 374.40625,
"completion_length/mode_0": 416.109375,
"completion_length/mode_1": 332.703125,
"epoch": 0.055505004549590536,
"format_confidence": 0.5,
"grad_norm": 0.49588424035326556,
"grounded_proportion": 0.5,
"kl": 0.00921630859375,
"learning_rate": 9.454049135577798e-07,
"loss": 0.0004,
"over_lengthy_sequences": 0.0,
"reward": 1.4140625,
"reward_std": 0.20528410375118256,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.41796875,
"step": 61
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.75,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.3125,
"TT_Math/mode_0": 0.4166666666666667,
"TT_Math/mode_1": 0.5694444444444444,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.0,
"completion_length": 370.55078125,
"completion_length/mode_0": 399.0546875,
"completion_length/mode_1": 342.046875,
"epoch": 0.056414922656960874,
"format_confidence": 0.5,
"grad_norm": 0.9866700336073141,
"grounded_proportion": 0.5,
"kl": 0.01806640625,
"learning_rate": 9.444949954504094e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.44921875,
"reward_std": 0.26063913106918335,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.44921875,
"step": 62
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.53125,
"TT_Counting/mode_1": 0.4375,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.6166666666666667,
"TT_Math/mode_1": 0.7,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.0,
"completion_length": 312.875,
"completion_length/mode_0": 329.6640625,
"completion_length/mode_1": 296.0859375,
"epoch": 0.05732484076433121,
"format_confidence": 0.5,
"grad_norm": 0.826234605264805,
"grounded_proportion": 0.5,
"kl": 0.0211181640625,
"learning_rate": 9.435850773430391e-07,
"loss": 0.0008,
"over_lengthy_sequences": 0.0,
"reward": 1.48828125,
"reward_std": 0.2982718050479889,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.4921875,
"step": 63
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.08333333333333333,
"TT_Counting/mode_0": 0.4642857142857143,
"TT_Counting/mode_1": 0.42857142857142855,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.5588235294117647,
"TT_Math/mode_1": 0.4852941176470588,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.0,
"completion_length": 320.4296875,
"completion_length/mode_0": 344.984375,
"completion_length/mode_1": 295.875,
"epoch": 0.05823475887170155,
"format_confidence": 0.5,
"grad_norm": 1.332396950969122,
"grounded_proportion": 0.5,
"kl": 0.017822265625,
"learning_rate": 9.426751592356688e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.4375,
"reward_std": 0.30236050486564636,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4375,
"step": 64
},
{
"TT_Chart/mode_0": 0.5833333333333334,
"TT_Chart/mode_1": 0.4166666666666667,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.375,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.7708333333333334,
"TT_Math/mode_1": 0.7708333333333334,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.08333333333333333,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.35,
"TT_Science/mode_1": 0.4,
"completion_length": 265.51171875,
"completion_length/mode_0": 294.296875,
"completion_length/mode_1": 236.7265625,
"epoch": 0.059144676979071886,
"format_confidence": 0.5,
"grad_norm": 1.0706428268753283,
"grounded_proportion": 0.5,
"kl": 0.0172119140625,
"learning_rate": 9.417652411282983e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.53515625,
"reward_std": 0.2715778052806854,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.53515625,
"step": 65
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.35714285714285715,
"TT_Math/mode_0": 0.625,
"TT_Math/mode_1": 0.703125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.1875,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.35,
"TT_Science/mode_1": 0.4,
"completion_length": 337.359375,
"completion_length/mode_0": 346.75,
"completion_length/mode_1": 327.96875,
"epoch": 0.06005459508644222,
"format_confidence": 0.5,
"grad_norm": 0.9882812019724813,
"grounded_proportion": 0.5,
"kl": 0.0286865234375,
"learning_rate": 9.408553230209281e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.4296875,
"reward_std": 0.2574900984764099,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4296875,
"step": 66
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.16666666666666666,
"TT_Counting/mode_1": 0.16666666666666666,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.5227272727272727,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 1.0,
"TT_Science/mode_1": 0.5,
"completion_length": 360.41796875,
"completion_length/mode_0": 384.0,
"completion_length/mode_1": 336.8359375,
"epoch": 0.060964513193812554,
"format_confidence": 0.5,
"grad_norm": 1.081856623327895,
"grounded_proportion": 0.5,
"kl": 0.013671875,
"learning_rate": 9.399454049135578e-07,
"loss": 0.0005,
"over_lengthy_sequences": 0.0,
"reward": 1.3984375,
"reward_std": 0.3071898818016052,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3984375,
"step": 67
},
{
"TT_Chart/mode_0": 0.3333333333333333,
"TT_Chart/mode_1": 0.3333333333333333,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.36904761904761907,
"TT_Math/mode_1": 0.4523809523809524,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.25,
"completion_length": 382.390625,
"completion_length/mode_0": 399.09375,
"completion_length/mode_1": 365.6875,
"epoch": 0.06187443130118289,
"format_confidence": 0.5,
"grad_norm": 1.0129574632580445,
"grounded_proportion": 0.5,
"kl": 0.0257568359375,
"learning_rate": 9.390354868061873e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.00390625,
"reward": 1.36328125,
"reward_std": 0.33184993267059326,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.3671875,
"step": 68
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.5625,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.4166666666666667,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.16666666666666666,
"TT_Math/mode_0": 0.35714285714285715,
"TT_Math/mode_1": 0.42857142857142855,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.5,
"completion_length": 315.05078125,
"completion_length/mode_0": 341.9140625,
"completion_length/mode_1": 288.1875,
"epoch": 0.06278434940855324,
"format_confidence": 0.5,
"grad_norm": 0.6116970446341301,
"grounded_proportion": 0.5,
"kl": 0.031005859375,
"learning_rate": 9.381255686988171e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.31640625,
"reward_std": 0.25513356924057007,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.31640625,
"step": 69
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.75,
"TT_Counting/mode_0": 0.5833333333333334,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.16666666666666666,
"TT_Math/mode_0": 0.6111111111111112,
"TT_Math/mode_1": 0.6388888888888888,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 1.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.3,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 1.0,
"completion_length": 325.84375,
"completion_length/mode_0": 348.8671875,
"completion_length/mode_1": 302.8203125,
"epoch": 0.06369426751592357,
"format_confidence": 0.5,
"grad_norm": 1.393496150546713,
"grounded_proportion": 0.5,
"kl": 0.0267333984375,
"learning_rate": 9.372156505914467e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.49609375,
"reward_std": 0.3036562502384186,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.49609375,
"step": 70
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5277777777777778,
"TT_Math/mode_1": 0.5694444444444444,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.375,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 358.953125,
"completion_length/mode_0": 377.46875,
"completion_length/mode_1": 340.4375,
"epoch": 0.0646041856232939,
"format_confidence": 0.5,
"grad_norm": 0.8157106114860397,
"grounded_proportion": 0.5,
"kl": 0.01556396484375,
"learning_rate": 9.363057324840764e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.40625,
"reward_std": 0.2968239188194275,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.40625,
"step": 71
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.3888888888888889,
"TT_Math/mode_1": 0.37962962962962965,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 524.28515625,
"completion_length/mode_0": 538.4765625,
"completion_length/mode_1": 510.09375,
"epoch": 0.06551410373066424,
"format_confidence": 0.5,
"grad_norm": 1.4368535247954564,
"grounded_proportion": 0.5,
"kl": 0.038330078125,
"learning_rate": 9.35395814376706e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0078125,
"reward": 1.328125,
"reward_std": 0.3050992488861084,
"rewards/format_reward": 0.9921875,
"rewards/general_task_reward": 0.3359375,
"step": 72
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.45,
"TT_Counting/mode_1": 0.6,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.125,
"TT_Math/mode_0": 0.6052631578947368,
"TT_Math/mode_1": 0.618421052631579,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.0,
"completion_length": 345.50390625,
"completion_length/mode_0": 374.7734375,
"completion_length/mode_1": 316.234375,
"epoch": 0.06642402183803457,
"format_confidence": 0.5,
"grad_norm": 1.4581833815927654,
"grounded_proportion": 0.5,
"kl": 0.027587890625,
"learning_rate": 9.344858962693357e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.00390625,
"reward": 1.47265625,
"reward_std": 0.3206025958061218,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.4765625,
"step": 73
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.125,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6666666666666666,
"TT_Math/mode_1": 0.5833333333333334,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5833333333333334,
"TT_Science/mode_1": 0.4166666666666667,
"completion_length": 336.33203125,
"completion_length/mode_0": 374.1171875,
"completion_length/mode_1": 298.546875,
"epoch": 0.06733393994540492,
"format_confidence": 0.5,
"grad_norm": 1.9303402353521169,
"grounded_proportion": 0.5,
"kl": 0.0269775390625,
"learning_rate": 9.335759781619655e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.51171875,
"reward_std": 0.40742409229278564,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.51171875,
"step": 74
},
{
"TT_Chart/mode_0": 0.3,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0625,
"TT_Grounding/mode_1": 0.5625,
"TT_Math/mode_0": 0.44642857142857145,
"TT_Math/mode_1": 0.5178571428571429,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 310.859375,
"completion_length/mode_0": 345.890625,
"completion_length/mode_1": 275.828125,
"epoch": 0.06824385805277525,
"format_confidence": 0.5,
"grad_norm": 1.094986906392167,
"grounded_proportion": 0.5,
"kl": 0.0250244140625,
"learning_rate": 9.32666060054595e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.32421875,
"reward_std": 0.2394905686378479,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.32421875,
"step": 75
},
{
"TT_Chart/mode_0": 0.35,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.5125,
"TT_Math/mode_1": 0.45,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 1.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 444.99609375,
"completion_length/mode_0": 487.234375,
"completion_length/mode_1": 402.7578125,
"epoch": 0.06915377616014559,
"format_confidence": 0.5,
"grad_norm": 0.9456640910256001,
"grounded_proportion": 0.5,
"kl": 0.02783203125,
"learning_rate": 9.317561419472247e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.32864031195640564,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 76
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.484375,
"TT_Math/mode_1": 0.46875,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.375,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.375,
"completion_length": 357.015625,
"completion_length/mode_0": 384.8125,
"completion_length/mode_1": 329.21875,
"epoch": 0.07006369426751592,
"format_confidence": 0.5,
"grad_norm": 1.126552968002394,
"grounded_proportion": 0.5,
"kl": 0.01446533203125,
"learning_rate": 9.308462238398544e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.3359375,
"reward_std": 0.20832324028015137,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3359375,
"step": 77
},
{
"TT_Chart/mode_0": 0.625,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.08333333333333333,
"TT_Detection/mode_0": 0.3125,
"TT_Detection/mode_1": 0.3125,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.39473684210526316,
"TT_Math/mode_1": 0.39473684210526316,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.375,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 0.75,
"completion_length": 365.06640625,
"completion_length/mode_0": 388.1796875,
"completion_length/mode_1": 341.953125,
"epoch": 0.07097361237488627,
"format_confidence": 0.5,
"grad_norm": 1.8929118579408397,
"grounded_proportion": 0.5,
"kl": 0.021240234375,
"learning_rate": 9.299363057324841e-07,
"loss": 0.0009,
"over_lengthy_sequences": 0.00390625,
"reward": 1.36328125,
"reward_std": 0.32180553674697876,
"rewards/format_reward": 0.98828125,
"rewards/general_task_reward": 0.375,
"step": 78
},
{
"TT_Chart/mode_0": 0.4166666666666667,
"TT_Chart/mode_1": 0.4166666666666667,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.5441176470588235,
"TT_Math/mode_1": 0.4264705882352941,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.25,
"completion_length": 412.8359375,
"completion_length/mode_0": 454.6640625,
"completion_length/mode_1": 371.0078125,
"epoch": 0.0718835304822566,
"format_confidence": 0.5,
"grad_norm": 2.3611466751955206,
"grounded_proportion": 0.5,
"kl": 0.0196533203125,
"learning_rate": 9.290263876251136e-07,
"loss": 0.0008,
"over_lengthy_sequences": 0.00390625,
"reward": 1.35546875,
"reward_std": 0.2786721885204315,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.359375,
"step": 79
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.375,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.8333333333333334,
"TT_Math/mode_0": 0.703125,
"TT_Math/mode_1": 0.546875,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.3333333333333333,
"TT_Others/mode_1": 0.4166666666666667,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.5,
"completion_length": 324.98828125,
"completion_length/mode_0": 360.6484375,
"completion_length/mode_1": 289.328125,
"epoch": 0.07279344858962693,
"format_confidence": 0.5,
"grad_norm": 1.423595772914213,
"grounded_proportion": 0.5,
"kl": 0.028076171875,
"learning_rate": 9.281164695177434e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.4765625,
"reward_std": 0.32864275574684143,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4765625,
"step": 80
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.3333333333333333,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.375,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.4583333333333333,
"TT_Math/mode_1": 0.4861111111111111,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.5,
"completion_length": 421.86328125,
"completion_length/mode_0": 444.5234375,
"completion_length/mode_1": 399.203125,
"epoch": 0.07370336669699727,
"format_confidence": 0.5,
"grad_norm": 0.9052028066723226,
"grounded_proportion": 0.5,
"kl": 0.0186767578125,
"learning_rate": 9.272065514103731e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.37890625,
"reward_std": 0.3306756913661957,
"rewards/format_reward": 0.98828125,
"rewards/general_task_reward": 0.390625,
"step": 81
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0625,
"TT_Detection/mode_1": 0.6875,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.55,
"TT_Math/mode_1": 0.5125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 1.0,
"TT_Science/mode_1": 0.5,
"completion_length": 383.95703125,
"completion_length/mode_0": 410.4375,
"completion_length/mode_1": 357.4765625,
"epoch": 0.0746132848043676,
"format_confidence": 0.5,
"grad_norm": 1.1442493170852235,
"grounded_proportion": 0.5,
"kl": 0.030029296875,
"learning_rate": 9.262966333030026e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.00390625,
"reward": 1.46875,
"reward_std": 0.3109995126724243,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.47265625,
"step": 82
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.125,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.4166666666666667,
"TT_Math/mode_0": 0.525,
"TT_Math/mode_1": 0.475,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 0.625,
"completion_length": 355.09765625,
"completion_length/mode_0": 388.8984375,
"completion_length/mode_1": 321.296875,
"epoch": 0.07552320291173795,
"format_confidence": 0.5,
"grad_norm": 1.1396443794919924,
"grounded_proportion": 0.5,
"kl": 0.01708984375,
"learning_rate": 9.253867151956324e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.4296875,
"reward_std": 0.3350968360900879,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4296875,
"step": 83
},
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.08333333333333333,
"TT_Counting/mode_0": 0.3,
"TT_Counting/mode_1": 0.3,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.4375,
"TT_Document/mode_0": 0.3333333333333333,
"TT_Document/mode_1": 0.3333333333333333,
"TT_Grounding/mode_0": 0.375,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.4807692307692308,
"TT_Math/mode_1": 0.5769230769230769,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 306.87890625,
"completion_length/mode_0": 345.4296875,
"completion_length/mode_1": 268.328125,
"epoch": 0.07643312101910828,
"format_confidence": 0.5,
"grad_norm": 0.8312318484762555,
"grounded_proportion": 0.5,
"kl": 0.0380859375,
"learning_rate": 9.24476797088262e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.36328125,
"reward_std": 0.2795896828174591,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.3671875,
"step": 84
},
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.08333333333333333,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.4270833333333333,
"TT_Math/mode_1": 0.4479166666666667,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 418.26953125,
"completion_length/mode_0": 462.9609375,
"completion_length/mode_1": 373.578125,
"epoch": 0.07734303912647862,
"format_confidence": 0.5,
"grad_norm": 0.7031346175738115,
"grounded_proportion": 0.5,
"kl": 0.0191650390625,
"learning_rate": 9.235668789808917e-07,
"loss": 0.0008,
"over_lengthy_sequences": 0.0,
"reward": 1.37890625,
"reward_std": 0.2682702839374542,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.37890625,
"step": 85
},
{
"TT_Chart/mode_0": 0.625,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.3125,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.625,
"TT_Math/mode_0": 0.6710526315789473,
"TT_Math/mode_1": 0.631578947368421,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.75,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 1.0,
"TT_Science/mode_1": 0.25,
"completion_length": 314.35546875,
"completion_length/mode_0": 347.1015625,
"completion_length/mode_1": 281.609375,
"epoch": 0.07825295723384895,
"format_confidence": 0.5,
"grad_norm": 1.1106438112336248,
"grounded_proportion": 0.5,
"kl": 0.033447265625,
"learning_rate": 9.226569608735213e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.53125,
"reward_std": 0.3708537220954895,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.53125,
"step": 86
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.625,
"TT_Math/mode_0": 0.4659090909090909,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 459.609375,
"completion_length/mode_0": 470.390625,
"completion_length/mode_1": 448.828125,
"epoch": 0.0791628753412193,
"format_confidence": 0.5,
"grad_norm": 0.9111319582454244,
"grounded_proportion": 0.5,
"kl": 0.0283203125,
"learning_rate": 9.21747042766151e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.00390625,
"reward": 1.37890625,
"reward_std": 0.32089686393737793,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.3828125,
"step": 87
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.3125,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.16666666666666666,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.45588235294117646,
"TT_Math/mode_1": 0.35294117647058826,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.3,
"TT_Science/mode_1": 0.5,
"completion_length": 288.15625,
"completion_length/mode_0": 321.078125,
"completion_length/mode_1": 255.234375,
"epoch": 0.08007279344858963,
"format_confidence": 0.5,
"grad_norm": 0.9337639335763862,
"grounded_proportion": 0.5,
"kl": 0.0252685546875,
"learning_rate": 9.208371246587808e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.33984375,
"reward_std": 0.32207000255584717,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.33984375,
"step": 88
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.08333333333333333,
"TT_Detection/mode_1": 0.16666666666666666,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.4444444444444444,
"TT_Math/mode_1": 0.4305555555555556,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.0,
"completion_length": 379.68359375,
"completion_length/mode_0": 409.75,
"completion_length/mode_1": 349.6171875,
"epoch": 0.08098271155595996,
"format_confidence": 0.5,
"grad_norm": 0.9170168681283755,
"grounded_proportion": 0.5,
"kl": 0.04052734375,
"learning_rate": 9.199272065514103e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.0,
"reward": 1.27734375,
"reward_std": 0.2290886491537094,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.27734375,
"step": 89
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.1875,
"TT_Grounding/mode_1": 0.3125,
"TT_Math/mode_0": 0.5625,
"TT_Math/mode_1": 0.525,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 1.0,
"completion_length": 332.9296875,
"completion_length/mode_0": 359.171875,
"completion_length/mode_1": 306.6875,
"epoch": 0.0818926296633303,
"format_confidence": 0.5,
"grad_norm": 1.2383174113880757,
"grounded_proportion": 0.5,
"kl": 0.0498046875,
"learning_rate": 9.1901728844404e-07,
"loss": 0.002,
"over_lengthy_sequences": 0.0,
"reward": 1.421875,
"reward_std": 0.29393690824508667,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.421875,
"step": 90
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.3125,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.3125,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.4625,
"TT_Math/mode_1": 0.4,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 1.0,
"completion_length": 342.94921875,
"completion_length/mode_0": 382.40625,
"completion_length/mode_1": 303.4921875,
"epoch": 0.08280254777070063,
"format_confidence": 0.5,
"grad_norm": 1.0193811299262545,
"grounded_proportion": 0.5,
"kl": 0.03173828125,
"learning_rate": 9.181073703366697e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.3356223702430725,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 91
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.75,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.5657894736842105,
"TT_Math/mode_1": 0.4473684210526316,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.1875,
"completion_length": 314.7265625,
"completion_length/mode_0": 340.65625,
"completion_length/mode_1": 288.796875,
"epoch": 0.08371246587807098,
"format_confidence": 0.5,
"grad_norm": 0.9300317397982756,
"grounded_proportion": 0.5,
"kl": 0.02294921875,
"learning_rate": 9.171974522292994e-07,
"loss": 0.0009,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.3043053150177002,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 92
},
{
"TT_Chart/mode_0": 0.4375,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.08333333333333333,
"TT_Detection/mode_1": 0.16666666666666666,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.359375,
"TT_Math/mode_1": 0.3125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 1.0,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.1875,
"completion_length": 352.9375,
"completion_length/mode_0": 383.890625,
"completion_length/mode_1": 321.984375,
"epoch": 0.08462238398544131,
"format_confidence": 0.5,
"grad_norm": 1.3154100577214958,
"grounded_proportion": 0.5,
"kl": 0.0419921875,
"learning_rate": 9.162875341219289e-07,
"loss": 0.0017,
"over_lengthy_sequences": 0.0,
"reward": 1.328125,
"reward_std": 0.29681897163391113,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.328125,
"step": 93
},
{
"TT_Chart/mode_0": 0.4375,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.4166666666666667,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.453125,
"TT_Math/mode_1": 0.328125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 338.6953125,
"completion_length/mode_0": 377.4609375,
"completion_length/mode_1": 299.9296875,
"epoch": 0.08553230209281165,
"format_confidence": 0.5,
"grad_norm": 0.8794839213028697,
"grounded_proportion": 0.5,
"kl": 0.0306396484375,
"learning_rate": 9.153776160145587e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.00390625,
"reward": 1.27734375,
"reward_std": 0.2864776849746704,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.28125,
"step": 94
},
{
"TT_Chart/mode_0": 0.4375,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.675,
"TT_Math/mode_1": 0.6125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.4166666666666667,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 403.01171875,
"completion_length/mode_0": 441.59375,
"completion_length/mode_1": 364.4296875,
"epoch": 0.08644222020018198,
"format_confidence": 0.5,
"grad_norm": 0.7977211310971815,
"grounded_proportion": 0.5,
"kl": 0.0137939453125,
"learning_rate": 9.144676979071884e-07,
"loss": 0.0006,
"over_lengthy_sequences": 0.0,
"reward": 1.52734375,
"reward_std": 0.30707117915153503,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.52734375,
"step": 95
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.875,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.625,
"TT_Math/mode_0": 0.35294117647058826,
"TT_Math/mode_1": 0.3088235294117647,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.4166666666666667,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 380.80078125,
"completion_length/mode_0": 407.078125,
"completion_length/mode_1": 354.5234375,
"epoch": 0.08735213830755233,
"format_confidence": 0.5,
"grad_norm": 0.893912421847757,
"grounded_proportion": 0.5,
"kl": 0.039306640625,
"learning_rate": 9.135577797998179e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.0,
"reward": 1.3125,
"reward_std": 0.23303402960300446,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3125,
"step": 96
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.6666666666666666,
"TT_Counting/mode_1": 0.4166666666666667,
"TT_Detection/mode_0": 0.5,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.875,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6309523809523809,
"TT_Math/mode_1": 0.5476190476190477,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.5,
"completion_length": 349.80859375,
"completion_length/mode_0": 384.3671875,
"completion_length/mode_1": 315.25,
"epoch": 0.08826205641492266,
"format_confidence": 0.5,
"grad_norm": 1.488221637431153,
"grounded_proportion": 0.5,
"kl": 0.047607421875,
"learning_rate": 9.126478616924477e-07,
"loss": 0.0019,
"over_lengthy_sequences": 0.0,
"reward": 1.48046875,
"reward_std": 0.33312100172042847,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.48046875,
"step": 97
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.75,
"TT_Counting/mode_0": 0.125,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.875,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.475,
"TT_Math/mode_1": 0.5125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.5,
"completion_length": 386.19140625,
"completion_length/mode_0": 423.7421875,
"completion_length/mode_1": 348.640625,
"epoch": 0.08917197452229299,
"format_confidence": 0.5,
"grad_norm": 0.7423205892030071,
"grounded_proportion": 0.5,
"kl": 0.0419921875,
"learning_rate": 9.117379435850773e-07,
"loss": 0.0017,
"over_lengthy_sequences": 0.00390625,
"reward": 1.4375,
"reward_std": 0.2985200881958008,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.44140625,
"step": 98
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.4166666666666667,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.4375,
"TT_Detection/mode_0": 0.375,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.08333333333333333,
"TT_Grounding/mode_1": 0.08333333333333333,
"TT_Math/mode_0": 0.578125,
"TT_Math/mode_1": 0.546875,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.5,
"completion_length": 307.75,
"completion_length/mode_0": 326.390625,
"completion_length/mode_1": 289.109375,
"epoch": 0.09008189262966333,
"format_confidence": 0.5,
"grad_norm": 1.089867159322649,
"grounded_proportion": 0.5,
"kl": 0.05029296875,
"learning_rate": 9.108280254777069e-07,
"loss": 0.002,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.2457924783229828,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 99
},
{
"TT_Chart/mode_0": 0.75,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0625,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.08333333333333333,
"TT_Grounding/mode_1": 0.08333333333333333,
"TT_Math/mode_0": 0.3333333333333333,
"TT_Math/mode_1": 0.2857142857142857,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.25,
"completion_length": 390.5234375,
"completion_length/mode_0": 416.84375,
"completion_length/mode_1": 364.203125,
"epoch": 0.09099181073703366,
"format_confidence": 0.5,
"grad_norm": 0.7432661418713463,
"grounded_proportion": 0.5,
"kl": 0.033447265625,
"learning_rate": 9.099181073703366e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.296875,
"reward_std": 0.3486403822898865,
"rewards/format_reward": 0.9921875,
"rewards/general_task_reward": 0.3046875,
"step": 100
},
{
"TT_Chart/mode_0": 0.625,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.375,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5138888888888888,
"TT_Math/mode_1": 0.4722222222222222,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.3333333333333333,
"TT_Science/mode_1": 0.25,
"completion_length": 417.96875,
"completion_length/mode_0": 456.1015625,
"completion_length/mode_1": 379.8359375,
"epoch": 0.09190172884440401,
"format_confidence": 0.5,
"grad_norm": 0.8403418247569083,
"grounded_proportion": 0.5,
"kl": 0.017822265625,
"learning_rate": 9.090081892629663e-07,
"loss": 0.0007,
"over_lengthy_sequences": 0.0,
"reward": 1.37890625,
"reward_std": 0.31049102544784546,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.37890625,
"step": 101
},
{
"TT_Chart/mode_0": 0.625,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.4375,
"TT_Counting/mode_1": 0.1875,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.125,
"TT_Math/mode_0": 0.4852941176470588,
"TT_Math/mode_1": 0.38235294117647056,
"TT_OCR/mode_0": 1.0,
"TT_OCR/mode_1": 1.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.1875,
"completion_length": 357.59765625,
"completion_length/mode_0": 374.7421875,
"completion_length/mode_1": 340.453125,
"epoch": 0.09281164695177434,
"format_confidence": 0.5,
"grad_norm": 1.0298049787611796,
"grounded_proportion": 0.5,
"kl": 0.04736328125,
"learning_rate": 9.08098271155596e-07,
"loss": 0.0019,
"over_lengthy_sequences": 0.0,
"reward": 1.3671875,
"reward_std": 0.30194875597953796,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3671875,
"step": 102
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.45,
"TT_Detection/mode_0": 0.08333333333333333,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.6333333333333333,
"TT_Math/mode_1": 0.55,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.3333333333333333,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 1.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.5,
"completion_length": 289.25,
"completion_length/mode_0": 316.984375,
"completion_length/mode_1": 261.515625,
"epoch": 0.09372156505914468,
"format_confidence": 0.5,
"grad_norm": 1.322134373732465,
"grounded_proportion": 0.5,
"kl": 0.03857421875,
"learning_rate": 9.071883530482256e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.4375,
"reward_std": 0.3878220021724701,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4375,
"step": 103
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.42105263157894735,
"TT_Math/mode_1": 0.4605263157894737,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.3333333333333333,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.3333333333333333,
"TT_Science/mode_1": 0.0,
"completion_length": 394.15625,
"completion_length/mode_0": 421.609375,
"completion_length/mode_1": 366.703125,
"epoch": 0.09463148316651501,
"format_confidence": 0.5,
"grad_norm": 1.138479077746897,
"grounded_proportion": 0.5,
"kl": 0.02734375,
"learning_rate": 9.062784349408553e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.33984375,
"reward_std": 0.28182753920555115,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.33984375,
"step": 104
},
{
"TT_Chart/mode_0": 0.6666666666666666,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.375,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.6470588235294118,
"TT_Math/mode_1": 0.6176470588235294,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.3333333333333333,
"TT_Science/mode_1": 0.0,
"completion_length": 359.4453125,
"completion_length/mode_0": 369.296875,
"completion_length/mode_1": 349.59375,
"epoch": 0.09554140127388536,
"format_confidence": 0.5,
"grad_norm": 0.8292575747476529,
"grounded_proportion": 0.5,
"kl": 0.029052734375,
"learning_rate": 9.05368516833485e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.00390625,
"reward": 1.52734375,
"reward_std": 0.22620166838169098,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.53125,
"step": 105
},
{
"TT_Chart/mode_0": 0.75,
"TT_Chart/mode_1": 0.75,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.5,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.08333333333333333,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.47368421052631576,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.4166666666666667,
"TT_Others/mode_1": 0.6666666666666666,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.3333333333333333,
"TT_Science/mode_1": 0.0,
"completion_length": 311.8515625,
"completion_length/mode_0": 330.546875,
"completion_length/mode_1": 293.15625,
"epoch": 0.09645131938125569,
"format_confidence": 0.5,
"grad_norm": 0.7435578662413549,
"grounded_proportion": 0.5,
"kl": 0.0233154296875,
"learning_rate": 9.044585987261146e-07,
"loss": 0.0009,
"over_lengthy_sequences": 0.0,
"reward": 1.453125,
"reward_std": 0.3033941984176636,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.453125,
"step": 106
},
{
"TT_Chart/mode_0": 0.625,
"TT_Chart/mode_1": 0.625,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0625,
"TT_Math/mode_0": 0.39705882352941174,
"TT_Math/mode_1": 0.4264705882352941,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.4166666666666667,
"TT_Others/mode_1": 0.6666666666666666,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 379.23046875,
"completion_length/mode_0": 410.96875,
"completion_length/mode_1": 347.4921875,
"epoch": 0.09736123748862602,
"format_confidence": 0.5,
"grad_norm": 1.95451747528783,
"grounded_proportion": 0.5,
"kl": 0.031494140625,
"learning_rate": 9.035486806187442e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.3671875,
"reward_std": 0.2845958471298218,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3671875,
"step": 107
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.35,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.75,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5394736842105263,
"TT_Math/mode_1": 0.5394736842105263,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 430.86328125,
"completion_length/mode_0": 444.203125,
"completion_length/mode_1": 417.5234375,
"epoch": 0.09827115559599636,
"format_confidence": 0.5,
"grad_norm": 0.6617312507830653,
"grounded_proportion": 0.5,
"kl": 0.0247802734375,
"learning_rate": 9.02638762511374e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.01171875,
"reward": 1.40234375,
"reward_std": 0.2459551990032196,
"rewards/format_reward": 0.98828125,
"rewards/general_task_reward": 0.4140625,
"step": 108
},
{
"TT_Chart/mode_0": 0.125,
"TT_Chart/mode_1": 0.125,
"TT_Counting/mode_0": 1.0,
"TT_Counting/mode_1": 1.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.55,
"TT_Math/mode_1": 0.43333333333333335,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.4166666666666667,
"completion_length": 364.859375,
"completion_length/mode_0": 396.6875,
"completion_length/mode_1": 333.03125,
"epoch": 0.09918107370336669,
"format_confidence": 0.5,
"grad_norm": 1.1162695339468396,
"grounded_proportion": 0.5,
"kl": 0.0306396484375,
"learning_rate": 9.017288444040037e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.38671875,
"reward_std": 0.306543231010437,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.38671875,
"step": 109
},
{
"TT_Chart/mode_0": 0.6,
"TT_Chart/mode_1": 0.55,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.45,
"TT_Detection/mode_0": 0.3333333333333333,
"TT_Detection/mode_1": 0.4166666666666667,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.4117647058823529,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 350.5859375,
"completion_length/mode_0": 396.2421875,
"completion_length/mode_1": 304.9296875,
"epoch": 0.10009099181073704,
"format_confidence": 0.5,
"grad_norm": 1.4082884314141118,
"grounded_proportion": 0.5,
"kl": 0.0264892578125,
"learning_rate": 9.008189262966332e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.00390625,
"reward": 1.421875,
"reward_std": 0.372009813785553,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.42578125,
"step": 110
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.3333333333333333,
"TT_Detection/mode_1": 0.4166666666666667,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.5263157894736842,
"TT_Math/mode_1": 0.5263157894736842,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 272.01953125,
"completion_length/mode_0": 287.265625,
"completion_length/mode_1": 256.7734375,
"epoch": 0.10100090991810737,
"format_confidence": 0.5,
"grad_norm": 14.658431508796246,
"grounded_proportion": 0.5,
"kl": 0.034423828125,
"learning_rate": 8.99909008189263e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.46875,
"reward_std": 0.29970598220825195,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.46875,
"step": 111
},
{
"TT_Chart/mode_0": 0.16666666666666666,
"TT_Chart/mode_1": 0.16666666666666666,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.6052631578947368,
"TT_Math/mode_1": 0.5526315789473685,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.3333333333333333,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.5,
"completion_length": 357.01171875,
"completion_length/mode_0": 377.296875,
"completion_length/mode_1": 336.7265625,
"epoch": 0.10191082802547771,
"format_confidence": 0.5,
"grad_norm": 0.8195338040008043,
"grounded_proportion": 0.5,
"kl": 0.030517578125,
"learning_rate": 8.989990900818926e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.45703125,
"reward_std": 0.305894136428833,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.45703125,
"step": 112
},
{
"TT_Chart/mode_0": 0.625,
"TT_Chart/mode_1": 0.625,
"TT_Counting/mode_0": 0.5833333333333334,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.4852941176470588,
"TT_Math/mode_1": 0.4852941176470588,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.4375,
"completion_length": 337.96875,
"completion_length/mode_0": 374.4453125,
"completion_length/mode_1": 301.4921875,
"epoch": 0.10282074613284804,
"format_confidence": 0.5,
"grad_norm": 0.8731230125231162,
"grounded_proportion": 0.5,
"kl": 0.026123046875,
"learning_rate": 8.980891719745222e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.453125,
"reward_std": 0.246971994638443,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.453125,
"step": 113
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.3125,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.9166666666666666,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6071428571428571,
"TT_Math/mode_1": 0.5357142857142857,
"TT_OCR/mode_0": 0.125,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.0625,
"TT_Puzzle/mode_0": 1.0,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.0,
"completion_length": 338.16015625,
"completion_length/mode_0": 361.3359375,
"completion_length/mode_1": 314.984375,
"epoch": 0.10373066424021839,
"format_confidence": 0.5,
"grad_norm": 0.7811590590595154,
"grounded_proportion": 0.5,
"kl": 0.027587890625,
"learning_rate": 8.971792538671519e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.421875,
"reward_std": 0.23079612851142883,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.421875,
"step": 114
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.16666666666666666,
"TT_Math/mode_0": 0.3382352941176471,
"TT_Math/mode_1": 0.39705882352941174,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.08333333333333333,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.125,
"completion_length": 328.72265625,
"completion_length/mode_0": 343.1015625,
"completion_length/mode_1": 314.34375,
"epoch": 0.10464058234758872,
"format_confidence": 0.5,
"grad_norm": 0.8326183125105682,
"grounded_proportion": 0.5,
"kl": 0.037353515625,
"learning_rate": 8.962693357597816e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.3203125,
"reward_std": 0.2396092265844345,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3203125,
"step": 115
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.8333333333333334,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.16666666666666666,
"TT_Math/mode_0": 0.5384615384615384,
"TT_Math/mode_1": 0.5769230769230769,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.16666666666666666,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.40625,
"TT_Science/mode_1": 0.21875,
"completion_length": 325.96484375,
"completion_length/mode_0": 346.40625,
"completion_length/mode_1": 305.5234375,
"epoch": 0.10555050045495905,
"format_confidence": 0.5,
"grad_norm": 1.1036768563812662,
"grounded_proportion": 0.5,
"kl": 0.0242919921875,
"learning_rate": 8.953594176524113e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.4296875,
"reward_std": 0.3274608254432678,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4296875,
"step": 116
},
{
"TT_Chart/mode_0": 0.6666666666666666,
"TT_Chart/mode_1": 0.5833333333333334,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.375,
"TT_Detection/mode_0": 0.08333333333333333,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.16666666666666666,
"TT_Document/mode_1": 0.08333333333333333,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5166666666666667,
"TT_Math/mode_1": 0.5833333333333334,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.5833333333333334,
"TT_Science/mode_1": 0.5833333333333334,
"completion_length": 330.74609375,
"completion_length/mode_0": 349.3984375,
"completion_length/mode_1": 312.09375,
"epoch": 0.10646041856232939,
"format_confidence": 0.5,
"grad_norm": 0.760986746071062,
"grounded_proportion": 0.5,
"kl": 0.02978515625,
"learning_rate": 8.944494995450409e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.1817479431629181,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 117
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.42857142857142855,
"TT_Counting/mode_1": 0.6071428571428571,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.45588235294117646,
"TT_Math/mode_1": 0.47058823529411764,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.5833333333333334,
"TT_Science/mode_1": 0.5833333333333334,
"completion_length": 403.1484375,
"completion_length/mode_0": 429.703125,
"completion_length/mode_1": 376.59375,
"epoch": 0.10737033666969972,
"format_confidence": 0.5,
"grad_norm": 0.8553481604117894,
"grounded_proportion": 0.5,
"kl": 0.0291748046875,
"learning_rate": 8.935395814376706e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.45703125,
"reward_std": 0.308400422334671,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.45703125,
"step": 118
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.5394736842105263,
"TT_Math/mode_1": 0.39473684210526316,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.25,
"completion_length": 328.36328125,
"completion_length/mode_0": 361.7578125,
"completion_length/mode_1": 294.96875,
"epoch": 0.10828025477707007,
"format_confidence": 0.5,
"grad_norm": 1.3318825940512236,
"grounded_proportion": 0.5,
"kl": 0.033203125,
"learning_rate": 8.926296633303002e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.3984375,
"reward_std": 0.3040344715118408,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3984375,
"step": 119
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.4625,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.4166666666666667,
"TT_Science/mode_1": 0.25,
"completion_length": 424.55859375,
"completion_length/mode_0": 449.5390625,
"completion_length/mode_1": 399.578125,
"epoch": 0.1091901728844404,
"format_confidence": 0.5,
"grad_norm": 1.1258840181711312,
"grounded_proportion": 0.5,
"kl": 0.0235595703125,
"learning_rate": 8.917197452229299e-07,
"loss": 0.0009,
"over_lengthy_sequences": 0.0,
"reward": 1.375,
"reward_std": 0.32681170105934143,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.375,
"step": 120
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 1.0,
"TT_Counting/mode_1": 1.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.75,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.4852941176470588,
"TT_Math/mode_1": 0.5588235294117647,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.375,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 1.0,
"TT_Science/mode_0": 0.4,
"TT_Science/mode_1": 0.4,
"completion_length": 337.171875,
"completion_length/mode_0": 355.90625,
"completion_length/mode_1": 318.4375,
"epoch": 0.11010009099181074,
"format_confidence": 0.5,
"grad_norm": 1.8863415714384077,
"grounded_proportion": 0.5,
"kl": 0.029541015625,
"learning_rate": 8.908098271155595e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.4375,
"reward_std": 0.337970107793808,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4375,
"step": 121
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.1875,
"TT_Math/mode_0": 0.6,
"TT_Math/mode_1": 0.5333333333333333,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 318.25,
"completion_length/mode_0": 324.25,
"completion_length/mode_1": 312.25,
"epoch": 0.11101000909918107,
"format_confidence": 0.5,
"grad_norm": 0.7478850656639994,
"grounded_proportion": 0.5,
"kl": 0.0390625,
"learning_rate": 8.898999090081893e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.00390625,
"reward": 1.36328125,
"reward_std": 0.24553291499614716,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.3671875,
"step": 122
},
{
"TT_Chart/mode_0": 0.3,
"TT_Chart/mode_1": 0.15,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.16666666666666666,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.39285714285714285,
"TT_Math/mode_1": 0.3392857142857143,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.08333333333333333,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.25,
"completion_length": 299.01171875,
"completion_length/mode_0": 332.2109375,
"completion_length/mode_1": 265.8125,
"epoch": 0.11191992720655142,
"format_confidence": 0.5,
"grad_norm": 1.0087357565512716,
"grounded_proportion": 0.5,
"kl": 0.0294189453125,
"learning_rate": 8.889899909008188e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.2734375,
"reward_std": 0.3007756471633911,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.27734375,
"step": 123
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.3333333333333333,
"TT_Math/mode_0": 0.4027777777777778,
"TT_Math/mode_1": 0.4583333333333333,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.5,
"TT_Others/mode_1": 0.375,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.4375,
"TT_Science/mode_1": 0.5,
"completion_length": 372.5859375,
"completion_length/mode_0": 401.2109375,
"completion_length/mode_1": 343.9609375,
"epoch": 0.11282984531392175,
"format_confidence": 0.5,
"grad_norm": 0.8137821391046608,
"grounded_proportion": 0.5,
"kl": 0.041015625,
"learning_rate": 8.880800727934485e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.0,
"reward": 1.38671875,
"reward_std": 0.2759014368057251,
"rewards/format_reward": 0.9921875,
"rewards/general_task_reward": 0.39453125,
"step": 124
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.16666666666666666,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.5166666666666667,
"TT_Math/mode_1": 0.55,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.125,
"completion_length": 355.76171875,
"completion_length/mode_0": 365.4296875,
"completion_length/mode_1": 346.09375,
"epoch": 0.11373976342129208,
"format_confidence": 0.5,
"grad_norm": 0.7654413326806154,
"grounded_proportion": 0.5,
"kl": 0.036865234375,
"learning_rate": 8.871701546860783e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0078125,
"reward": 1.33984375,
"reward_std": 0.26253271102905273,
"rewards/format_reward": 0.9921875,
"rewards/general_task_reward": 0.34765625,
"step": 125
},
{
"TT_Chart/mode_0": 0.3333333333333333,
"TT_Chart/mode_1": 0.3333333333333333,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.525,
"TT_Math/mode_1": 0.45,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.75,
"completion_length": 371.875,
"completion_length/mode_0": 405.6640625,
"completion_length/mode_1": 338.0859375,
"epoch": 0.11464968152866242,
"format_confidence": 0.5,
"grad_norm": 0.6049711014702803,
"grounded_proportion": 0.5,
"kl": 0.036376953125,
"learning_rate": 8.862602365787079e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.00390625,
"reward": 1.4140625,
"reward_std": 0.28288590908050537,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.41796875,
"step": 126
},
{
"TT_Chart/mode_0": 0.4,
"TT_Chart/mode_1": 0.4,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.75,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.4444444444444444,
"TT_Math/mode_1": 0.5277777777777778,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 325.46484375,
"completion_length/mode_0": 355.6796875,
"completion_length/mode_1": 295.25,
"epoch": 0.11555959963603275,
"format_confidence": 0.5,
"grad_norm": 1.8410793961989773,
"grounded_proportion": 0.5,
"kl": 0.033203125,
"learning_rate": 8.853503184713375e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.39453125,
"reward_std": 0.2690715193748474,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.39453125,
"step": 127
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.75,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.5625,
"TT_Math/mode_1": 0.515625,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.08333333333333333,
"TT_Others/mode_1": 0.3333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5416666666666666,
"TT_Science/mode_1": 0.5,
"completion_length": 320.73046875,
"completion_length/mode_0": 337.2265625,
"completion_length/mode_1": 304.234375,
"epoch": 0.1164695177434031,
"format_confidence": 0.5,
"grad_norm": 0.6389432250465099,
"grounded_proportion": 0.5,
"kl": 0.0242919921875,
"learning_rate": 8.844404003639672e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.45703125,
"reward_std": 0.23922216892242432,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.45703125,
"step": 128
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.4375,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6666666666666666,
"TT_Math/mode_1": 0.5166666666666667,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.3333333333333333,
"TT_Others/mode_1": 0.3333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 0.75,
"completion_length": 406.078125,
"completion_length/mode_0": 416.359375,
"completion_length/mode_1": 395.796875,
"epoch": 0.11737943585077343,
"format_confidence": 0.5,
"grad_norm": 0.5631128640852374,
"grounded_proportion": 0.5,
"kl": 0.038818359375,
"learning_rate": 8.835304822565969e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.0,
"reward": 1.48828125,
"reward_std": 0.26944732666015625,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.48828125,
"step": 129
},
{
"TT_Chart/mode_0": 0.75,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.125,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.4375,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.6,
"TT_Math/mode_1": 0.6,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.3333333333333333,
"TT_Others/mode_1": 0.3333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 353.71484375,
"completion_length/mode_0": 367.8984375,
"completion_length/mode_1": 339.53125,
"epoch": 0.11828935395814377,
"format_confidence": 0.5,
"grad_norm": 1.1261868640139805,
"grounded_proportion": 0.5,
"kl": 0.032470703125,
"learning_rate": 8.826205641492264e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.4453125,
"reward_std": 0.2877512276172638,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4453125,
"step": 130
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.375,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.46875,
"TT_Math/mode_1": 0.40625,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 1.0,
"completion_length": 301.109375,
"completion_length/mode_0": 332.0390625,
"completion_length/mode_1": 270.1796875,
"epoch": 0.1191992720655141,
"format_confidence": 0.5,
"grad_norm": 1.3174322812929569,
"grounded_proportion": 0.5,
"kl": 0.05322265625,
"learning_rate": 8.817106460418562e-07,
"loss": 0.0021,
"over_lengthy_sequences": 0.0,
"reward": 1.3671875,
"reward_std": 0.23277445137500763,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3671875,
"step": 131
},
{
"TT_Chart/mode_0": 0.39285714285714285,
"TT_Chart/mode_1": 0.35714285714285715,
"TT_Counting/mode_0": 1.0,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.375,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5555555555555556,
"TT_Math/mode_1": 0.5277777777777778,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 1.0,
"TT_Science/mode_1": 0.75,
"completion_length": 325.77734375,
"completion_length/mode_0": 339.9296875,
"completion_length/mode_1": 311.625,
"epoch": 0.12010919017288443,
"format_confidence": 0.5,
"grad_norm": 0.7069147924688921,
"grounded_proportion": 0.5,
"kl": 0.022705078125,
"learning_rate": 8.808007279344859e-07,
"loss": 0.0009,
"over_lengthy_sequences": 0.0,
"reward": 1.453125,
"reward_std": 0.2563130855560303,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.453125,
"step": 132
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.16666666666666666,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.75,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.484375,
"TT_Math/mode_1": 0.328125,
"TT_OCR/mode_0": 0.125,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.5833333333333334,
"completion_length": 326.15234375,
"completion_length/mode_0": 331.0,
"completion_length/mode_1": 321.3046875,
"epoch": 0.12101910828025478,
"format_confidence": 0.5,
"grad_norm": 1.4281295890879266,
"grounded_proportion": 0.5,
"kl": 0.04443359375,
"learning_rate": 8.798908098271155e-07,
"loss": 0.0018,
"over_lengthy_sequences": 0.0,
"reward": 1.40234375,
"reward_std": 0.2764293849468231,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.40234375,
"step": 133
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.125,
"TT_Counting/mode_0": 0.35,
"TT_Counting/mode_1": 0.2,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.125,
"TT_Math/mode_0": 0.5535714285714286,
"TT_Math/mode_1": 0.5714285714285714,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.25,
"completion_length": 311.26171875,
"completion_length/mode_0": 341.046875,
"completion_length/mode_1": 281.4765625,
"epoch": 0.12192902638762511,
"format_confidence": 0.5,
"grad_norm": 2.805065330097895,
"grounded_proportion": 0.5,
"kl": 0.0291748046875,
"learning_rate": 8.789808917197452e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.37109375,
"reward_std": 0.284593403339386,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.37109375,
"step": 134
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.6666666666666666,
"TT_Counting/mode_1": 0.6666666666666666,
"TT_Detection/mode_0": 0.75,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.125,
"TT_Math/mode_0": 0.5833333333333334,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 375.53125,
"completion_length/mode_0": 389.0234375,
"completion_length/mode_1": 362.0390625,
"epoch": 0.12283894449499545,
"format_confidence": 0.5,
"grad_norm": 0.7313698173703068,
"grounded_proportion": 0.5,
"kl": 0.0250244140625,
"learning_rate": 8.780709736123748e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.4296875,
"reward_std": 0.2764318287372589,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4296875,
"step": 135
},
{
"TT_Chart/mode_0": 0.75,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.55,
"TT_Counting/mode_1": 0.35,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.4,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.08333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 378.30078125,
"completion_length/mode_0": 388.0859375,
"completion_length/mode_1": 368.515625,
"epoch": 0.12374886260236578,
"format_confidence": 0.5,
"grad_norm": 1.3879851931562597,
"grounded_proportion": 0.5,
"kl": 0.02783203125,
"learning_rate": 8.771610555050046e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.3515625,
"reward_std": 0.29143065214157104,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3515625,
"step": 136
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.08333333333333333,
"TT_Grounding/mode_1": 0.16666666666666666,
"TT_Math/mode_0": 0.5625,
"TT_Math/mode_1": 0.6041666666666666,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.08333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 366.40625,
"completion_length/mode_0": 374.453125,
"completion_length/mode_1": 358.359375,
"epoch": 0.12465878070973613,
"format_confidence": 0.5,
"grad_norm": 0.7633680054263763,
"grounded_proportion": 0.5,
"kl": 0.02880859375,
"learning_rate": 8.762511373976341e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.00390625,
"reward": 1.47265625,
"reward_std": 0.3259276747703552,
"rewards/format_reward": 0.98828125,
"rewards/general_task_reward": 0.484375,
"step": 137
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 1.0,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5238095238095238,
"TT_Math/mode_1": 0.4880952380952381,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.5,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.0,
"completion_length": 412.8359375,
"completion_length/mode_0": 415.609375,
"completion_length/mode_1": 410.0625,
"epoch": 0.12556869881710647,
"format_confidence": 0.5,
"grad_norm": 5.3443264326908135,
"grounded_proportion": 0.5,
"kl": 0.026611328125,
"learning_rate": 8.753412192902638e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.421875,
"reward_std": 0.3915102481842041,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.421875,
"step": 138
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.42045454545454547,
"TT_Math/mode_1": 0.3181818181818182,
"TT_OCR/mode_0": 0.6666666666666666,
"TT_OCR/mode_1": 0.3333333333333333,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.0,
"completion_length": 423.390625,
"completion_length/mode_0": 448.96875,
"completion_length/mode_1": 397.8125,
"epoch": 0.1264786169244768,
"format_confidence": 0.5,
"grad_norm": 6.032568642838766,
"grounded_proportion": 0.5,
"kl": 0.033203125,
"learning_rate": 8.744313011828936e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.36328125,
"reward_std": 0.308400422334671,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.36328125,
"step": 139
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.4,
"TT_Document/mode_1": 0.35,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6875,
"TT_Math/mode_1": 0.703125,
"TT_OCR/mode_0": 0.6666666666666666,
"TT_OCR/mode_1": 0.3333333333333333,
"TT_Others/mode_0": 0.041666666666666664,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.0,
"completion_length": 343.56640625,
"completion_length/mode_0": 358.5234375,
"completion_length/mode_1": 328.609375,
"epoch": 0.12738853503184713,
"format_confidence": 0.5,
"grad_norm": 0.6882157506953025,
"grounded_proportion": 0.5,
"kl": 0.037353515625,
"learning_rate": 8.735213830755232e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.4375,
"reward_std": 0.19792133569717407,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4375,
"step": 140
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.08333333333333333,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5476190476190477,
"TT_Math/mode_1": 0.5476190476190477,
"TT_OCR/mode_0": 0.6666666666666666,
"TT_OCR/mode_1": 0.3333333333333333,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 1.0,
"TT_Science/mode_1": 1.0,
"completion_length": 348.94140625,
"completion_length/mode_0": 355.53125,
"completion_length/mode_1": 342.3515625,
"epoch": 0.12829845313921748,
"format_confidence": 0.5,
"grad_norm": 0.7339055309204625,
"grounded_proportion": 0.5,
"kl": 0.044189453125,
"learning_rate": 8.726114649681528e-07,
"loss": 0.0018,
"over_lengthy_sequences": 0.0,
"reward": 1.421875,
"reward_std": 0.2690690755844116,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.421875,
"step": 141
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.375,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.4852941176470588,
"TT_Math/mode_1": 0.47058823529411764,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.16666666666666666,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.2,
"TT_Science/mode_1": 0.25,
"completion_length": 386.5390625,
"completion_length/mode_0": 398.9375,
"completion_length/mode_1": 374.140625,
"epoch": 0.1292083712465878,
"format_confidence": 0.5,
"grad_norm": 0.5546463094726118,
"grounded_proportion": 0.5,
"kl": 0.0262451171875,
"learning_rate": 8.717015468607825e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.33203125,
"reward_std": 0.27130940556526184,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.33203125,
"step": 142
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.35,
"TT_Counting/mode_0": 0.4,
"TT_Counting/mode_1": 0.2,
"TT_Detection/mode_0": 0.75,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.375,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6041666666666666,
"TT_Math/mode_1": 0.5625,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.08333333333333333,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.875,
"TT_Science/mode_1": 1.0,
"completion_length": 287.44921875,
"completion_length/mode_0": 305.828125,
"completion_length/mode_1": 269.0703125,
"epoch": 0.13011828935395814,
"format_confidence": 0.5,
"grad_norm": 0.889656894734258,
"grounded_proportion": 0.5,
"kl": 0.031982421875,
"learning_rate": 8.707916287534122e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.4296875,
"reward_std": 0.30077171325683594,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4296875,
"step": 143
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.2916666666666667,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.375,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.45454545454545453,
"TT_Math/mode_1": 0.45454545454545453,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.041666666666666664,
"TT_Others/mode_1": 0.2916666666666667,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.3333333333333333,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 300.1171875,
"completion_length/mode_0": 312.078125,
"completion_length/mode_1": 288.15625,
"epoch": 0.13102820746132848,
"format_confidence": 0.5,
"grad_norm": 0.8824978684287549,
"grounded_proportion": 0.5,
"kl": 0.03515625,
"learning_rate": 8.698817106460417e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.31640625,
"reward_std": 0.30931398272514343,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.31640625,
"step": 144
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.125,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.4166666666666667,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.375,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.5735294117647058,
"TT_Math/mode_1": 0.5147058823529411,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.65,
"TT_Science/mode_1": 0.5,
"completion_length": 360.56640625,
"completion_length/mode_0": 382.359375,
"completion_length/mode_1": 338.7734375,
"epoch": 0.13193812556869883,
"format_confidence": 0.5,
"grad_norm": 0.7721949198133805,
"grounded_proportion": 0.5,
"kl": 0.0274658203125,
"learning_rate": 8.689717925386715e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.46875,
"reward_std": 0.33547264337539673,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.46875,
"step": 145
},
{
"TT_Chart/mode_0": 0.875,
"TT_Chart/mode_1": 0.75,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.4605263157894737,
"TT_Math/mode_1": 0.4473684210526316,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 344.33203125,
"completion_length/mode_0": 366.6171875,
"completion_length/mode_1": 322.046875,
"epoch": 0.13284804367606914,
"format_confidence": 0.5,
"grad_norm": 0.9909534863047289,
"grounded_proportion": 0.5,
"kl": 0.0238037109375,
"learning_rate": 8.680618744313012e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.421875,
"reward_std": 0.324045866727829,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.421875,
"step": 146
},
{
"TT_Chart/mode_0": 0.875,
"TT_Chart/mode_1": 0.75,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.08333333333333333,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.375,
"TT_Grounding/mode_1": 0.625,
"TT_Math/mode_0": 0.3472222222222222,
"TT_Math/mode_1": 0.375,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.3125,
"completion_length": 347.421875,
"completion_length/mode_0": 357.6640625,
"completion_length/mode_1": 337.1796875,
"epoch": 0.1337579617834395,
"format_confidence": 0.5,
"grad_norm": 0.720949141572624,
"grounded_proportion": 0.5,
"kl": 0.034423828125,
"learning_rate": 8.671519563239307e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.328125,
"reward_std": 0.2761722803115845,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.328125,
"step": 147
},
{
"TT_Chart/mode_0": 0.125,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 1.0,
"TT_Counting/mode_1": 1.0,
"TT_Detection/mode_0": 0.3333333333333333,
"TT_Detection/mode_1": 0.5833333333333334,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.6666666666666666,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5681818181818182,
"TT_Math/mode_1": 0.5568181818181818,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.3125,
"completion_length": 340.26171875,
"completion_length/mode_0": 362.578125,
"completion_length/mode_1": 317.9453125,
"epoch": 0.13466787989080983,
"format_confidence": 0.5,
"grad_norm": 0.7763799064119764,
"grounded_proportion": 0.5,
"kl": 0.0400390625,
"learning_rate": 8.662420382165605e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.0,
"reward": 1.53515625,
"reward_std": 0.288013219833374,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.53515625,
"step": 148
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.6666666666666666,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.4583333333333333,
"TT_Math/mode_1": 0.4583333333333333,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 456.44140625,
"completion_length/mode_0": 481.53125,
"completion_length/mode_1": 431.3515625,
"epoch": 0.13557779799818018,
"format_confidence": 0.5,
"grad_norm": 0.6076513635609712,
"grounded_proportion": 0.5,
"kl": 0.03271484375,
"learning_rate": 8.653321201091901e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.40625,
"reward_std": 0.25460314750671387,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.40625,
"step": 149
},
{
"TT_Chart/mode_0": 0.35,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.08333333333333333,
"TT_Detection/mode_0": 0.3125,
"TT_Detection/mode_1": 0.3125,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.48333333333333334,
"TT_Math/mode_1": 0.5333333333333333,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.5,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 337.10546875,
"completion_length/mode_0": 345.125,
"completion_length/mode_1": 329.0859375,
"epoch": 0.1364877161055505,
"format_confidence": 0.5,
"grad_norm": 0.7513140959836652,
"grounded_proportion": 0.5,
"kl": 0.048583984375,
"learning_rate": 8.644222020018199e-07,
"loss": 0.0019,
"over_lengthy_sequences": 0.0,
"reward": 1.390625,
"reward_std": 0.2961748242378235,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.390625,
"step": 150
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5972222222222222,
"TT_Math/mode_1": 0.6388888888888888,
"TT_OCR/mode_0": 0.4375,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 315.36328125,
"completion_length/mode_0": 329.984375,
"completion_length/mode_1": 300.7421875,
"epoch": 0.13739763421292084,
"format_confidence": 0.5,
"grad_norm": 1.0275263126976402,
"grounded_proportion": 0.5,
"kl": 0.037841796875,
"learning_rate": 8.635122838944494e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.3984375,
"reward_std": 0.24172601103782654,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3984375,
"step": 151
},
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.375,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.5131578947368421,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.625,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.5,
"completion_length": 313.54296875,
"completion_length/mode_0": 326.3203125,
"completion_length/mode_1": 300.765625,
"epoch": 0.13830755232029118,
"format_confidence": 0.5,
"grad_norm": 0.9977927113854368,
"grounded_proportion": 0.5,
"kl": 0.0299072265625,
"learning_rate": 8.626023657870791e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.421875,
"reward_std": 0.28182509541511536,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.421875,
"step": 152
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.6666666666666666,
"TT_Counting/mode_1": 0.5833333333333334,
"TT_Detection/mode_0": 0.16666666666666666,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.125,
"TT_Math/mode_0": 0.5833333333333334,
"TT_Math/mode_1": 0.5555555555555556,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.875,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.5,
"completion_length": 390.1796875,
"completion_length/mode_0": 406.2421875,
"completion_length/mode_1": 374.1171875,
"epoch": 0.1392174704276615,
"format_confidence": 0.5,
"grad_norm": 0.7680531984092493,
"grounded_proportion": 0.5,
"kl": 0.040771484375,
"learning_rate": 8.616924476797089e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.00390625,
"reward": 1.47265625,
"reward_std": 0.30024129152297974,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.4765625,
"step": 153
},
{
"TT_Chart/mode_0": 0.125,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 1.0,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6666666666666666,
"TT_Math/mode_1": 0.5694444444444444,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 256.26171875,
"completion_length/mode_0": 282.7109375,
"completion_length/mode_1": 229.8125,
"epoch": 0.14012738853503184,
"format_confidence": 0.5,
"grad_norm": 0.8865413491194238,
"grounded_proportion": 0.5,
"kl": 0.04150390625,
"learning_rate": 8.607825295723384e-07,
"loss": 0.0017,
"over_lengthy_sequences": 0.0,
"reward": 1.43359375,
"reward_std": 0.2194880098104477,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.43359375,
"step": 154
},
{
"TT_Chart/mode_0": 0.5416666666666666,
"TT_Chart/mode_1": 0.5416666666666666,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.625,
"TT_Math/mode_1": 0.575,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.375,
"completion_length": 367.86328125,
"completion_length/mode_0": 385.09375,
"completion_length/mode_1": 350.6328125,
"epoch": 0.1410373066424022,
"format_confidence": 0.5,
"grad_norm": 0.9428079277390882,
"grounded_proportion": 0.5,
"kl": 0.0299072265625,
"learning_rate": 8.598726114649681e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.00390625,
"reward": 1.49609375,
"reward_std": 0.32734215259552,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.5,
"step": 155
},
{
"TT_Chart/mode_0": 0.4,
"TT_Chart/mode_1": 0.35,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.375,
"TT_Grounding/mode_1": 0.6875,
"TT_Math/mode_0": 0.515625,
"TT_Math/mode_1": 0.484375,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.875,
"TT_Science/mode_1": 0.625,
"completion_length": 298.578125,
"completion_length/mode_0": 303.390625,
"completion_length/mode_1": 293.765625,
"epoch": 0.14194722474977253,
"format_confidence": 0.5,
"grad_norm": 2.2779976937162885,
"grounded_proportion": 0.5,
"kl": 0.06103515625,
"learning_rate": 8.589626933575978e-07,
"loss": 0.0024,
"over_lengthy_sequences": 0.0,
"reward": 1.453125,
"reward_std": 0.2784101665019989,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.453125,
"step": 156
},
{
"TT_Chart/mode_0": 0.125,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.625,
"TT_Document/mode_1": 0.625,
"TT_Grounding/mode_0": 0.375,
"TT_Grounding/mode_1": 0.6875,
"TT_Math/mode_0": 0.6578947368421053,
"TT_Math/mode_1": 0.631578947368421,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.4166666666666667,
"TT_Science/mode_1": 0.75,
"completion_length": 419.5,
"completion_length/mode_0": 432.984375,
"completion_length/mode_1": 406.015625,
"epoch": 0.14285714285714285,
"format_confidence": 0.5,
"grad_norm": 0.8454361433490853,
"grounded_proportion": 0.5,
"kl": 0.03466796875,
"learning_rate": 8.580527752502275e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.5234375,
"reward_std": 0.2661820948123932,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.5234375,
"step": 157
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 1.0,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.44047619047619047,
"TT_Math/mode_1": 0.4523809523809524,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 350.96484375,
"completion_length/mode_0": 365.6484375,
"completion_length/mode_1": 336.28125,
"epoch": 0.1437670609645132,
"format_confidence": 0.5,
"grad_norm": 0.41622087929958396,
"grounded_proportion": 0.5,
"kl": 0.03369140625,
"learning_rate": 8.57142857142857e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.32421875,
"reward_std": 0.14992907643318176,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.32421875,
"step": 158
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.1875,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.75,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.3,
"TT_Math/mode_0": 0.4,
"TT_Math/mode_1": 0.3333333333333333,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 299.73828125,
"completion_length/mode_0": 326.0546875,
"completion_length/mode_1": 273.421875,
"epoch": 0.14467697907188354,
"format_confidence": 0.5,
"grad_norm": 1.0658096903762169,
"grounded_proportion": 0.5,
"kl": 0.04296875,
"learning_rate": 8.562329390354868e-07,
"loss": 0.0017,
"over_lengthy_sequences": 0.0,
"reward": 1.28515625,
"reward_std": 0.28972315788269043,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.28515625,
"step": 159
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.3333333333333333,
"TT_Detection/mode_0": 0.75,
"TT_Detection/mode_1": 1.0,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.4868421052631579,
"TT_Math/mode_1": 0.5131578947368421,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.16666666666666666,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.625,
"TT_Science/mode_1": 0.4375,
"completion_length": 399.140625,
"completion_length/mode_0": 402.296875,
"completion_length/mode_1": 395.984375,
"epoch": 0.14558689717925385,
"format_confidence": 0.5,
"grad_norm": 1.3202914229642293,
"grounded_proportion": 0.5,
"kl": 0.0294189453125,
"learning_rate": 8.553230209281165e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.47265625,
"reward_std": 0.2205488383769989,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.47265625,
"step": 160
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.16666666666666666,
"TT_Counting/mode_1": 0.4166666666666667,
"TT_Detection/mode_0": 0.5,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.625,
"TT_Math/mode_1": 0.5892857142857143,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.16666666666666666,
"TT_Others/mode_1": 0.4166666666666667,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.16666666666666666,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 324.12109375,
"completion_length/mode_0": 342.9296875,
"completion_length/mode_1": 305.3125,
"epoch": 0.1464968152866242,
"format_confidence": 0.5,
"grad_norm": 0.9445520944835173,
"grounded_proportion": 0.5,
"kl": 0.040771484375,
"learning_rate": 8.54413102820746e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.00390625,
"reward": 1.390625,
"reward_std": 0.29116618633270264,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.39453125,
"step": 161
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.65,
"TT_Counting/mode_1": 0.65,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.125,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.36666666666666664,
"TT_Math/mode_1": 0.43333333333333335,
"TT_OCR/mode_0": 1.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.1875,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.16666666666666666,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 336.70703125,
"completion_length/mode_0": 345.09375,
"completion_length/mode_1": 328.3203125,
"epoch": 0.14740673339399454,
"format_confidence": 0.5,
"grad_norm": 1.053194115355607,
"grounded_proportion": 0.5,
"kl": 0.043701171875,
"learning_rate": 8.535031847133758e-07,
"loss": 0.0017,
"over_lengthy_sequences": 0.0,
"reward": 1.3984375,
"reward_std": 0.18345540761947632,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.40234375,
"step": 162
},
{
"TT_Chart/mode_0": 0.16666666666666666,
"TT_Chart/mode_1": 0.3333333333333333,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.125,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5714285714285714,
"TT_Math/mode_1": 0.5833333333333334,
"TT_OCR/mode_0": 0.125,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.5833333333333334,
"TT_Others/mode_1": 0.4166666666666667,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.16666666666666666,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 394.390625,
"completion_length/mode_0": 414.21875,
"completion_length/mode_1": 374.5625,
"epoch": 0.1483166515013649,
"format_confidence": 0.5,
"grad_norm": 0.949326413337948,
"grounded_proportion": 0.5,
"kl": 0.03515625,
"learning_rate": 8.525932666060054e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.4765625,
"reward_std": 0.2801200747489929,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4765625,
"step": 163
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.4166666666666667,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.375,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5375,
"TT_Math/mode_1": 0.575,
"TT_OCR/mode_0": 0.25,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.16666666666666666,
"TT_Science/mode_1": 0.08333333333333333,
"completion_length": 326.171875,
"completion_length/mode_0": 345.1796875,
"completion_length/mode_1": 307.1640625,
"epoch": 0.1492265696087352,
"format_confidence": 0.5,
"grad_norm": 1.0393763681729113,
"grounded_proportion": 0.5,
"kl": 0.042236328125,
"learning_rate": 8.516833484986351e-07,
"loss": 0.0017,
"over_lengthy_sequences": 0.0,
"reward": 1.4140625,
"reward_std": 0.2548676133155823,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4140625,
"step": 164
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.08333333333333333,
"TT_Detection/mode_0": 0.4166666666666667,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.375,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6166666666666667,
"TT_Math/mode_1": 0.5333333333333333,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.25,
"completion_length": 306.5078125,
"completion_length/mode_0": 308.4765625,
"completion_length/mode_1": 304.5390625,
"epoch": 0.15013648771610555,
"format_confidence": 0.5,
"grad_norm": 1.4513886148665085,
"grounded_proportion": 0.5,
"kl": 0.039306640625,
"learning_rate": 8.507734303912647e-07,
"loss": 0.0016,
"over_lengthy_sequences": 0.0,
"reward": 1.45703125,
"reward_std": 0.3153514266014099,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.45703125,
"step": 165
},
{
"TT_Chart/mode_0": 0.625,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.08333333333333333,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.16666666666666666,
"TT_Math/mode_0": 0.36538461538461536,
"TT_Math/mode_1": 0.40384615384615385,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.16666666666666666,
"TT_Others/mode_1": 0.3333333333333333,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.08333333333333333,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 286.8515625,
"completion_length/mode_0": 297.140625,
"completion_length/mode_1": 276.5625,
"epoch": 0.1510464058234759,
"format_confidence": 0.5,
"grad_norm": 1.232171973166691,
"grounded_proportion": 0.5,
"kl": 0.0673828125,
"learning_rate": 8.498635122838944e-07,
"loss": 0.0027,
"over_lengthy_sequences": 0.0,
"reward": 1.2578125,
"reward_std": 0.3183930218219757,
"rewards/format_reward": 0.98828125,
"rewards/general_task_reward": 0.26953125,
"step": 166
},
{
"TT_Chart/mode_0": 0.75,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.4375,
"TT_Counting/mode_1": 0.3125,
"TT_Detection/mode_0": 0.4166666666666667,
"TT_Detection/mode_1": 0.4166666666666667,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5833333333333334,
"TT_Math/mode_1": 0.5277777777777778,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.0,
"TT_Puzzle/mode_1": 0.0,
"TT_Science/mode_0": 0.08333333333333333,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 284.21484375,
"completion_length/mode_0": 311.2109375,
"completion_length/mode_1": 257.21875,
"epoch": 0.15195632393084624,
"format_confidence": 0.5,
"grad_norm": 0.9435576850130559,
"grounded_proportion": 0.5,
"kl": 0.050048828125,
"learning_rate": 8.489535941765242e-07,
"loss": 0.002,
"over_lengthy_sequences": 0.0,
"reward": 1.4140625,
"reward_std": 0.2546031177043915,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4140625,
"step": 167
},
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.08333333333333333,
"TT_Counting/mode_0": 0.4375,
"TT_Counting/mode_1": 0.3125,
"TT_Detection/mode_0": 0.75,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.3977272727272727,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 422.70703125,
"completion_length/mode_0": 433.296875,
"completion_length/mode_1": 412.1171875,
"epoch": 0.15286624203821655,
"format_confidence": 0.5,
"grad_norm": 0.6448597730106973,
"grounded_proportion": 0.5,
"kl": 0.037109375,
"learning_rate": 8.480436760691537e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.37109375,
"reward_std": 0.27813929319381714,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.37109375,
"step": 168
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.75,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.4375,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.609375,
"TT_Math/mode_1": 0.578125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.08333333333333333,
"TT_Others/mode_1": 0.08333333333333333,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 337.109375,
"completion_length/mode_0": 346.390625,
"completion_length/mode_1": 327.828125,
"epoch": 0.1537761601455869,
"format_confidence": 0.5,
"grad_norm": 0.7151475312153581,
"grounded_proportion": 0.5,
"kl": 0.044921875,
"learning_rate": 8.471337579617834e-07,
"loss": 0.0018,
"over_lengthy_sequences": 0.0,
"reward": 1.39453125,
"reward_std": 0.229889914393425,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.39453125,
"step": 169
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.1875,
"TT_Counting/mode_0": 1.0,
"TT_Counting/mode_1": 1.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.28125,
"TT_Math/mode_1": 0.296875,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.3,
"completion_length": 304.55078125,
"completion_length/mode_0": 310.8515625,
"completion_length/mode_1": 298.25,
"epoch": 0.15468607825295724,
"format_confidence": 0.5,
"grad_norm": 0.5636664720948796,
"grounded_proportion": 0.5,
"kl": 0.03173828125,
"learning_rate": 8.462238398544131e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.30078125,
"reward_std": 0.17939773201942444,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.30078125,
"step": 170
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.5,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5555555555555556,
"TT_Math/mode_1": 0.5555555555555556,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.5833333333333334,
"TT_Science/mode_1": 0.5,
"completion_length": 359.69140625,
"completion_length/mode_0": 369.3984375,
"completion_length/mode_1": 349.984375,
"epoch": 0.15559599636032756,
"format_confidence": 0.5,
"grad_norm": 1.0708410982269836,
"grounded_proportion": 0.5,
"kl": 0.033935546875,
"learning_rate": 8.453139217470428e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.52734375,
"reward_std": 0.23448191583156586,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.52734375,
"step": 171
},
{
"TT_Chart/mode_0": 0.4166666666666667,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.4852941176470588,
"TT_Math/mode_1": 0.29411764705882354,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.9166666666666666,
"TT_Science/mode_1": 0.8333333333333334,
"completion_length": 343.12109375,
"completion_length/mode_0": 352.09375,
"completion_length/mode_1": 334.1484375,
"epoch": 0.1565059144676979,
"format_confidence": 0.5,
"grad_norm": 0.9973888113787986,
"grounded_proportion": 0.5,
"kl": 0.046142578125,
"learning_rate": 8.444040036396723e-07,
"loss": 0.0018,
"over_lengthy_sequences": 0.0,
"reward": 1.3828125,
"reward_std": 0.2741939425468445,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3828125,
"step": 172
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 1.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.5096153846153846,
"TT_Math/mode_1": 0.5288461538461539,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 1.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.5,
"completion_length": 492.6171875,
"completion_length/mode_0": 512.953125,
"completion_length/mode_1": 472.28125,
"epoch": 0.15741583257506825,
"format_confidence": 0.5,
"grad_norm": 0.8266362533405296,
"grounded_proportion": 0.5,
"kl": 0.02978515625,
"learning_rate": 8.434940855323021e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.48046875,
"reward_std": 0.3358907699584961,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.484375,
"step": 173
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6547619047619048,
"TT_Math/mode_1": 0.6190476190476191,
"TT_OCR/mode_0": 0.75,
"TT_OCR/mode_1": 0.25,
"TT_Others/mode_0": 1.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.0,
"completion_length": 374.87890625,
"completion_length/mode_0": 395.5234375,
"completion_length/mode_1": 354.234375,
"epoch": 0.1583257506824386,
"format_confidence": 0.5,
"grad_norm": 0.6723817621947202,
"grounded_proportion": 0.5,
"kl": 0.0294189453125,
"learning_rate": 8.425841674249318e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.50390625,
"reward_std": 0.2964407503604889,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.50390625,
"step": 174
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.5833333333333334,
"TT_Counting/mode_1": 0.6666666666666666,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.65,
"TT_Math/mode_1": 0.575,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.125,
"completion_length": 386.2890625,
"completion_length/mode_0": 403.7734375,
"completion_length/mode_1": 368.8046875,
"epoch": 0.1592356687898089,
"format_confidence": 0.5,
"grad_norm": 0.6645566999099587,
"grounded_proportion": 0.5,
"kl": 0.0289306640625,
"learning_rate": 8.416742493175613e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.5546875,
"reward_std": 0.2982693314552307,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.5546875,
"step": 175
},
{
"TT_Chart/mode_0": 0.6666666666666666,
"TT_Chart/mode_1": 0.6666666666666666,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5952380952380952,
"TT_Math/mode_1": 0.5952380952380952,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.125,
"completion_length": 376.30859375,
"completion_length/mode_0": 382.4453125,
"completion_length/mode_1": 370.171875,
"epoch": 0.16014558689717925,
"format_confidence": 0.5,
"grad_norm": 0.7597524598153307,
"grounded_proportion": 0.5,
"kl": 0.02978515625,
"learning_rate": 8.407643312101911e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.5078125,
"reward_std": 0.23277443647384644,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.5078125,
"step": 176
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.5,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0625,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.7142857142857143,
"TT_Math/mode_1": 0.6607142857142857,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.25,
"completion_length": 353.76953125,
"completion_length/mode_0": 389.421875,
"completion_length/mode_1": 318.1171875,
"epoch": 0.1610555050045496,
"format_confidence": 0.5,
"grad_norm": 1.1293887912044984,
"grounded_proportion": 0.5,
"kl": 0.05078125,
"learning_rate": 8.398544131028207e-07,
"loss": 0.002,
"over_lengthy_sequences": 0.0,
"reward": 1.3828125,
"reward_std": 0.2761722505092621,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3828125,
"step": 177
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.7,
"TT_Counting/mode_1": 0.7,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.6022727272727273,
"TT_Math/mode_1": 0.5681818181818182,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.0,
"completion_length": 306.37109375,
"completion_length/mode_0": 315.796875,
"completion_length/mode_1": 296.9453125,
"epoch": 0.16196542311191992,
"format_confidence": 0.5,
"grad_norm": 0.7974369345469358,
"grounded_proportion": 0.5,
"kl": 0.033447265625,
"learning_rate": 8.389444949954503e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.5234375,
"reward_std": 0.2797393798828125,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.5234375,
"step": 178
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.35,
"TT_Math/mode_0": 0.5441176470588235,
"TT_Math/mode_1": 0.47058823529411764,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.08333333333333333,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.375,
"completion_length": 322.7109375,
"completion_length/mode_0": 335.171875,
"completion_length/mode_1": 310.25,
"epoch": 0.16287534121929026,
"format_confidence": 0.5,
"grad_norm": 0.8987778065024237,
"grounded_proportion": 0.5,
"kl": 0.034912109375,
"learning_rate": 8.3803457688808e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.41796875,
"reward_std": 0.2861511707305908,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41796875,
"step": 179
},
{
"TT_Chart/mode_0": 0.25,
"TT_Chart/mode_1": 0.16666666666666666,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5263157894736842,
"TT_Math/mode_1": 0.5394736842105263,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.15,
"TT_Others/mode_1": 0.35,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.375,
"completion_length": 348.140625,
"completion_length/mode_0": 359.0703125,
"completion_length/mode_1": 337.2109375,
"epoch": 0.1637852593266606,
"format_confidence": 0.5,
"grad_norm": 0.7343429231062427,
"grounded_proportion": 0.5,
"kl": 0.03515625,
"learning_rate": 8.371246587807097e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.2685386538505554,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 180
},
{
"TT_Chart/mode_0": 0.75,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.5833333333333334,
"TT_Detection/mode_0": 0.5,
"TT_Detection/mode_1": 0.875,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5921052631578947,
"TT_Math/mode_1": 0.5131578947368421,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.75,
"TT_Others/mode_1": 0.6875,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.375,
"completion_length": 364.34765625,
"completion_length/mode_0": 393.75,
"completion_length/mode_1": 334.9453125,
"epoch": 0.16469517743403095,
"format_confidence": 0.5,
"grad_norm": 1.3940683409897012,
"grounded_proportion": 0.5,
"kl": 0.024658203125,
"learning_rate": 8.362147406733395e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.546875,
"reward_std": 0.37559884786605835,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.546875,
"step": 181
},
{
"TT_Chart/mode_0": 0.15,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.75,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5125,
"TT_Math/mode_1": 0.475,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.75,
"TT_Science/mode_1": 0.5,
"completion_length": 342.953125,
"completion_length/mode_0": 368.9765625,
"completion_length/mode_1": 316.9296875,
"epoch": 0.16560509554140126,
"format_confidence": 0.5,
"grad_norm": 0.7261784826800016,
"grounded_proportion": 0.5,
"kl": 0.0245361328125,
"learning_rate": 8.35304822565969e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.41015625,
"reward_std": 0.2590813636779785,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.41015625,
"step": 182
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.08333333333333333,
"TT_Counting/mode_0": 0.5,
"TT_Counting/mode_1": 0.75,
"TT_Detection/mode_0": 0.5833333333333334,
"TT_Detection/mode_1": 0.5833333333333334,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.38235294117647056,
"TT_Math/mode_1": 0.39705882352941174,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.375,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.16666666666666666,
"TT_Science/mode_1": 0.16666666666666666,
"completion_length": 321.29296875,
"completion_length/mode_0": 325.5390625,
"completion_length/mode_1": 317.046875,
"epoch": 0.1665150136487716,
"format_confidence": 0.5,
"grad_norm": 0.690692859788369,
"grounded_proportion": 0.5,
"kl": 0.02587890625,
"learning_rate": 8.343949044585987e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.00390625,
"reward": 1.31640625,
"reward_std": 0.26170387864112854,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.3203125,
"step": 183
},
{
"TT_Chart/mode_0": 0.6666666666666666,
"TT_Chart/mode_1": 0.6666666666666666,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.6666666666666666,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.625,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.6029411764705882,
"TT_Math/mode_1": 0.5147058823529411,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.375,
"TT_Others/mode_0": 0.125,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.5,
"completion_length": 299.9453125,
"completion_length/mode_0": 306.9921875,
"completion_length/mode_1": 292.8984375,
"epoch": 0.16742493175614195,
"format_confidence": 0.5,
"grad_norm": 0.943115785852982,
"grounded_proportion": 0.5,
"kl": 0.033447265625,
"learning_rate": 8.334849863512284e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.53125,
"reward_std": 0.30076679587364197,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.53125,
"step": 184
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.35,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.125,
"TT_Grounding/mode_1": 0.375,
"TT_Math/mode_0": 0.5694444444444444,
"TT_Math/mode_1": 0.5416666666666666,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.375,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.125,
"completion_length": 309.7265625,
"completion_length/mode_0": 322.5703125,
"completion_length/mode_1": 296.8828125,
"epoch": 0.16833484986351227,
"format_confidence": 0.5,
"grad_norm": 0.8074492550195393,
"grounded_proportion": 0.5,
"kl": 0.03271484375,
"learning_rate": 8.32575068243858e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.38671875,
"reward_std": 0.28630331158638,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.38671875,
"step": 185
},
{
"TT_Chart/mode_0": 0.6666666666666666,
"TT_Chart/mode_1": 0.8333333333333334,
"TT_Counting/mode_0": 0.08333333333333333,
"TT_Counting/mode_1": 0.16666666666666666,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.25,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.515625,
"TT_Math/mode_1": 0.484375,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.375,
"TT_Others/mode_0": 0.25,
"TT_Others/mode_1": 0.08333333333333333,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.125,
"TT_Science/mode_1": 0.25,
"completion_length": 353.81640625,
"completion_length/mode_0": 383.046875,
"completion_length/mode_1": 324.5859375,
"epoch": 0.16924476797088261,
"format_confidence": 0.5,
"grad_norm": 3.2195277238429063,
"grounded_proportion": 0.5,
"kl": 0.035400390625,
"learning_rate": 8.316651501364876e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.0,
"reward": 1.39453125,
"reward_std": 0.30312827229499817,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.39453125,
"step": 186
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.25,
"TT_Counting/mode_0": 0.3333333333333333,
"TT_Counting/mode_1": 0.75,
"TT_Detection/mode_0": 0.625,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.125,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.35,
"TT_Grounding/mode_1": 0.45,
"TT_Math/mode_0": 0.578125,
"TT_Math/mode_1": 0.5,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.375,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.875,
"completion_length": 340.77734375,
"completion_length/mode_0": 350.6328125,
"completion_length/mode_1": 330.921875,
"epoch": 0.17015468607825296,
"format_confidence": 0.5,
"grad_norm": 1.0584895132267007,
"grounded_proportion": 0.5,
"kl": 0.0311279296875,
"learning_rate": 8.307552320291174e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.00390625,
"reward": 1.4609375,
"reward_std": 0.35466182231903076,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.46484375,
"step": 187
},
{
"TT_Chart/mode_0": 0.1875,
"TT_Chart/mode_1": 0.1875,
"TT_Counting/mode_0": 0.8125,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.625,
"TT_Detection/mode_1": 0.25,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.35,
"TT_Grounding/mode_1": 0.45,
"TT_Math/mode_0": 0.546875,
"TT_Math/mode_1": 0.453125,
"TT_OCR/mode_0": 1.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.6,
"TT_Science/mode_1": 0.45,
"completion_length": 340.80859375,
"completion_length/mode_0": 366.453125,
"completion_length/mode_1": 315.1640625,
"epoch": 0.1710646041856233,
"format_confidence": 0.5,
"grad_norm": 1.7060635808674018,
"grounded_proportion": 0.5,
"kl": 0.03271484375,
"learning_rate": 8.298453139217471e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.49609375,
"reward_std": 0.2823604345321655,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.49609375,
"step": 188
},
{
"TT_Chart/mode_0": 0.5,
"TT_Chart/mode_1": 0.45,
"TT_Counting/mode_0": 0.125,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.5833333333333334,
"TT_Grounding/mode_1": 0.5833333333333334,
"TT_Math/mode_0": 0.618421052631579,
"TT_Math/mode_1": 0.5789473684210527,
"TT_OCR/mode_0": 1.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 1.0,
"TT_Others/mode_1": 1.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.25,
"completion_length": 442.46875,
"completion_length/mode_0": 476.4296875,
"completion_length/mode_1": 408.5078125,
"epoch": 0.17197452229299362,
"format_confidence": 0.5,
"grad_norm": 1.1713295543987994,
"grounded_proportion": 0.5,
"kl": 0.0224609375,
"learning_rate": 8.289353958143766e-07,
"loss": 0.0009,
"over_lengthy_sequences": 0.0,
"reward": 1.53515625,
"reward_std": 0.30076926946640015,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.53515625,
"step": 189
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.375,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.3333333333333333,
"TT_Detection/mode_1": 0.3333333333333333,
"TT_Document/mode_0": 0.0,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.375,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.475,
"TT_OCR/mode_0": 1.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.05,
"TT_Others/mode_1": 0.15,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.55,
"TT_Science/mode_1": 0.4,
"completion_length": 260.03125,
"completion_length/mode_0": 268.5,
"completion_length/mode_1": 251.5625,
"epoch": 0.17288444040036396,
"format_confidence": 0.5,
"grad_norm": 1.7958755613959498,
"grounded_proportion": 0.5,
"kl": 0.03857421875,
"learning_rate": 8.280254777070064e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.32421875,
"reward_std": 0.2951115369796753,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.32421875,
"step": 190
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.0,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.25,
"TT_Math/mode_0": 0.5125,
"TT_Math/mode_1": 0.475,
"TT_OCR/mode_0": 0.875,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.25,
"TT_Science/mode_1": 0.25,
"completion_length": 342.26171875,
"completion_length/mode_0": 358.6328125,
"completion_length/mode_1": 325.890625,
"epoch": 0.1737943585077343,
"format_confidence": 0.5,
"grad_norm": 1.119909562752138,
"grounded_proportion": 0.5,
"kl": 0.036376953125,
"learning_rate": 8.27115559599636e-07,
"loss": 0.0015,
"over_lengthy_sequences": 0.0,
"reward": 1.421875,
"reward_std": 0.29906177520751953,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.421875,
"step": 191
},
{
"TT_Chart/mode_0": 0.375,
"TT_Chart/mode_1": 0.375,
"TT_Counting/mode_0": 0.4166666666666667,
"TT_Counting/mode_1": 0.25,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.75,
"TT_Math/mode_0": 0.4722222222222222,
"TT_Math/mode_1": 0.4305555555555556,
"TT_OCR/mode_0": 0.625,
"TT_OCR/mode_1": 0.125,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.5,
"completion_length": 397.421875,
"completion_length/mode_0": 413.5703125,
"completion_length/mode_1": 381.2734375,
"epoch": 0.17470427661510465,
"format_confidence": 0.5,
"grad_norm": 1.2286906019231645,
"grounded_proportion": 0.5,
"kl": 0.0311279296875,
"learning_rate": 8.262056414922656e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.00390625,
"reward": 1.41015625,
"reward_std": 0.32666200399398804,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.4140625,
"step": 192
},
{
"TT_Chart/mode_0": 0.35,
"TT_Chart/mode_1": 0.5,
"TT_Counting/mode_0": 0.625,
"TT_Counting/mode_1": 0.375,
"TT_Detection/mode_0": 0.6666666666666666,
"TT_Detection/mode_1": 0.6666666666666666,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.6666666666666666,
"TT_Grounding/mode_1": 0.6666666666666666,
"TT_Math/mode_0": 0.4,
"TT_Math/mode_1": 0.45,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.25,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.5,
"completion_length": 359.20703125,
"completion_length/mode_0": 382.375,
"completion_length/mode_1": 336.0390625,
"epoch": 0.17561419472247497,
"format_confidence": 0.5,
"grad_norm": 0.909737373388406,
"grounded_proportion": 0.5,
"kl": 0.033447265625,
"learning_rate": 8.252957233848953e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.4765625,
"reward_std": 0.3333781063556671,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.4765625,
"step": 193
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.75,
"TT_Counting/mode_1": 0.625,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 0.75,
"TT_Document/mode_1": 0.25,
"TT_Grounding/mode_0": 0.0,
"TT_Grounding/mode_1": 0.0,
"TT_Math/mode_0": 0.5227272727272727,
"TT_Math/mode_1": 0.4431818181818182,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.0,
"completion_length": 405.328125,
"completion_length/mode_0": 413.921875,
"completion_length/mode_1": 396.734375,
"epoch": 0.17652411282984531,
"format_confidence": 0.5,
"grad_norm": 1.000256041324093,
"grounded_proportion": 0.5,
"kl": 0.033935546875,
"learning_rate": 8.24385805277525e-07,
"loss": 0.0014,
"over_lengthy_sequences": 0.00390625,
"reward": 1.390625,
"reward_std": 0.31116873025894165,
"rewards/format_reward": 0.99609375,
"rewards/general_task_reward": 0.39453125,
"step": 194
},
{
"TT_Chart/mode_0": 0.041666666666666664,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.75,
"TT_Detection/mode_1": 0.75,
"TT_Document/mode_0": 0.5,
"TT_Document/mode_1": 0.0,
"TT_Grounding/mode_0": 0.625,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.4264705882352941,
"TT_Math/mode_1": 0.5735294117647058,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.375,
"TT_Science/mode_1": 0.125,
"completion_length": 341.66015625,
"completion_length/mode_0": 353.7109375,
"completion_length/mode_1": 329.609375,
"epoch": 0.17743403093721566,
"format_confidence": 0.5,
"grad_norm": 0.5359895637363522,
"grounded_proportion": 0.5,
"kl": 0.023681640625,
"learning_rate": 8.234758871701548e-07,
"loss": 0.0009,
"over_lengthy_sequences": 0.0,
"reward": 1.3515625,
"reward_std": 0.2563130855560303,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3515625,
"step": 195
},
{
"TT_Chart/mode_0": 0.4166666666666667,
"TT_Chart/mode_1": 0.16666666666666666,
"TT_Counting/mode_0": 0.6666666666666666,
"TT_Counting/mode_1": 0.6666666666666666,
"TT_Detection/mode_0": 0.0,
"TT_Detection/mode_1": 0.125,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.1875,
"TT_Grounding/mode_1": 0.1875,
"TT_Math/mode_0": 0.46875,
"TT_Math/mode_1": 0.453125,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 0.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 1.0,
"TT_Science/mode_1": 0.75,
"completion_length": 283.7734375,
"completion_length/mode_0": 304.7109375,
"completion_length/mode_1": 262.8359375,
"epoch": 0.17834394904458598,
"format_confidence": 0.5,
"grad_norm": 9.681747982191952,
"grounded_proportion": 0.5,
"kl": 0.0322265625,
"learning_rate": 8.225659690627843e-07,
"loss": 0.0013,
"over_lengthy_sequences": 0.0,
"reward": 1.3984375,
"reward_std": 0.28630968928337097,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3984375,
"step": 196
},
{
"TT_Chart/mode_0": 1.0,
"TT_Chart/mode_1": 1.0,
"TT_Counting/mode_0": 0.25,
"TT_Counting/mode_1": 0.5,
"TT_Detection/mode_0": 0.25,
"TT_Detection/mode_1": 0.16666666666666666,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 0.5,
"TT_Grounding/mode_0": 0.5,
"TT_Grounding/mode_1": 0.5,
"TT_Math/mode_0": 0.35714285714285715,
"TT_Math/mode_1": 0.30952380952380953,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.5,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.0,
"TT_Science/mode_1": 0.25,
"completion_length": 424.5859375,
"completion_length/mode_0": 437.5,
"completion_length/mode_1": 411.671875,
"epoch": 0.17925386715195632,
"format_confidence": 0.5,
"grad_norm": 0.5628841267736974,
"grounded_proportion": 0.5,
"kl": 0.0281982421875,
"learning_rate": 8.21656050955414e-07,
"loss": 0.0011,
"over_lengthy_sequences": 0.0,
"reward": 1.3515625,
"reward_std": 0.23250606656074524,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3515625,
"step": 197
},
{
"TT_Chart/mode_0": 0.08333333333333333,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.125,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.5,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.625,
"TT_Grounding/mode_1": 0.875,
"TT_Math/mode_0": 0.4722222222222222,
"TT_Math/mode_1": 0.4861111111111111,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.4166666666666667,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.5,
"TT_Science/mode_1": 0.0,
"completion_length": 304.98046875,
"completion_length/mode_0": 306.78125,
"completion_length/mode_1": 303.1796875,
"epoch": 0.18016378525932666,
"format_confidence": 0.5,
"grad_norm": 1.2573499359794245,
"grounded_proportion": 0.5,
"kl": 0.0247802734375,
"learning_rate": 8.207461328480437e-07,
"loss": 0.001,
"over_lengthy_sequences": 0.0,
"reward": 1.42578125,
"reward_std": 0.24461543560028076,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.42578125,
"step": 198
},
{
"TT_Chart/mode_0": 0.6666666666666666,
"TT_Chart/mode_1": 0.6666666666666666,
"TT_Counting/mode_0": 0.0,
"TT_Counting/mode_1": 0.0,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 1.0,
"TT_Document/mode_1": 1.0,
"TT_Grounding/mode_0": 0.16666666666666666,
"TT_Grounding/mode_1": 0.16666666666666666,
"TT_Math/mode_0": 0.5,
"TT_Math/mode_1": 0.5333333333333333,
"TT_OCR/mode_0": 0.5,
"TT_OCR/mode_1": 0.375,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.0,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.625,
"TT_Science/mode_1": 0.375,
"completion_length": 366.18359375,
"completion_length/mode_0": 374.296875,
"completion_length/mode_1": 358.0703125,
"epoch": 0.181073703366697,
"format_confidence": 0.5,
"grad_norm": 0.763912879776671,
"grounded_proportion": 0.5,
"kl": 0.0439453125,
"learning_rate": 8.198362147406733e-07,
"loss": 0.0018,
"over_lengthy_sequences": 0.0,
"reward": 1.45703125,
"reward_std": 0.3085215389728546,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.45703125,
"step": 199
},
{
"TT_Chart/mode_0": 0.0,
"TT_Chart/mode_1": 0.0,
"TT_Counting/mode_0": 0.4375,
"TT_Counting/mode_1": 0.4375,
"TT_Detection/mode_0": 0.125,
"TT_Detection/mode_1": 0.625,
"TT_Document/mode_0": 0.25,
"TT_Document/mode_1": 0.75,
"TT_Grounding/mode_0": 1.0,
"TT_Grounding/mode_1": 1.0,
"TT_Math/mode_0": 0.39285714285714285,
"TT_Math/mode_1": 0.40476190476190477,
"TT_OCR/mode_0": 0.0,
"TT_OCR/mode_1": 1.0,
"TT_Others/mode_0": 0.0,
"TT_Others/mode_1": 0.125,
"TT_Puzzle/mode_0": 0.75,
"TT_Puzzle/mode_1": 0.5,
"TT_Science/mode_0": 0.625,
"TT_Science/mode_1": 0.375,
"completion_length": 342.05859375,
"completion_length/mode_0": 350.3359375,
"completion_length/mode_1": 333.78125,
"epoch": 0.18198362147406733,
"format_confidence": 0.5,
"grad_norm": 1.010417481464837,
"grounded_proportion": 0.5,
"kl": 0.0294189453125,
"learning_rate": 8.189262966333029e-07,
"loss": 0.0012,
"over_lengthy_sequences": 0.0,
"reward": 1.3828125,
"reward_std": 0.2797393798828125,
"rewards/format_reward": 1.0,
"rewards/general_task_reward": 0.3828125,
"step": 200
}
],
"logging_steps": 1.0,
"max_steps": 1099,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}