Add files using upload-large-folder tool
Browse files- README.md +13 -3
- id2-10_0.2easy_0.3medium_0.5hard/README.md +12 -2
- id2-10_0.2easy_0.3medium_0.5hard/base/README.md +12 -2
- id2-10_0.2easy_0.3medium_0.5hard/rl/op11-14_uniform/README.md +12 -2
- id2-10_0.2easy_0.3medium_0.5hard/rl/op17-20_uniform/README.md +12 -2
- id2-10_0.2easy_0.3medium_0.5hard/rl/op7-10_uniform/README.md +12 -2
- id2-10_0.2easy_0.3medium_0.5hard/rl/op9-12_uniform/README.md +12 -2
- id2-10_0.475easy_0.475medium_0.05hard/README.md +12 -2
- id2-10_0.475easy_0.475medium_0.05hard/base/README.md +12 -2
- id2-10_0.475easy_0.475medium_0.05hard/rl/op11-14_uniform_process_strict/README.md +12 -2
- id2-10_0.4995easy_0.4995medium_0.001hard/README.md +12 -2
- id2-10_0.4995easy_0.4995medium_0.001hard/base/README.md +12 -2
- id2-10_0.4995easy_0.4995medium_0.001hard/rl/op11-14_uniform/README.md +12 -2
- id2-10_0.4995easy_0.4995medium_0.001hard/rl/op17-20_uniform/README.md +12 -2
- id2-10_0.4995easy_0.4995medium_0.001hard/rl/op7-10_uniform/README.md +12 -2
- id2-10_0.4995easy_0.4995medium_0.001hard/rl/op9-12_uniform/README.md +12 -2
- id2-10_0.5easy_0.3medium_0.2hard/README.md +12 -2
- id2-10_0.5easy_0.3medium_0.2hard/base/README.md +12 -2
- id2-10_0.5easy_0.3medium_0.2hard/rl/op11-14_uniform/README.md +12 -2
- id2-10_0.5easy_0.3medium_0.2hard/rl/op17-20_uniform/README.md +12 -2
- id2-10_0.5easy_0.3medium_0.2hard/rl/op7-10_uniform/README.md +12 -2
- id2-10_0.5easy_0.3medium_0.2hard/rl/op9-12_uniform/README.md +12 -2
README.md
CHANGED
|
@@ -38,6 +38,16 @@ tokenizer = AutoTokenizer.from_pretrained(repo_id, subfolder=subdir)
|
|
| 38 |
model = AutoModelForCausalLM.from_pretrained(repo_id, subfolder=subdir)
|
| 39 |
```
|
| 40 |
|
| 41 |
-
##
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
model = AutoModelForCausalLM.from_pretrained(repo_id, subfolder=subdir)
|
| 39 |
```
|
| 40 |
|
| 41 |
+
## Citation
|
| 42 |
+
|
| 43 |
+
```bibtex
|
| 44 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 45 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 46 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 47 |
+
year={2025},
|
| 48 |
+
eprint={2512.07783},
|
| 49 |
+
archivePrefix={arXiv},
|
| 50 |
+
primaryClass={cs.CL},
|
| 51 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 52 |
+
}
|
| 53 |
+
```
|
id2-10_0.2easy_0.3medium_0.5hard/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
20% easy, 30% medium, 50% hard base mixture. This directory contains the base model and the final RL checkpoints for the associated extrapolation experiments.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
20% easy, 30% medium, 50% hard base mixture. This directory contains the base model and the final RL checkpoints for the associated extrapolation experiments.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.2easy_0.3medium_0.5hard/base/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Base model used to initialize the RL runs under this setting.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Base model used to initialize the RL runs under this setting.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.2easy_0.3medium_0.5hard/rl/op11-14_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.2easy_0.3medium_0.5hard/rl/op17-20_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.2easy_0.3medium_0.5hard/rl/op7-10_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.2easy_0.3medium_0.5hard/rl/op9-12_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.475easy_0.475medium_0.05hard/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
47.5% easy, 47.5% medium, 5% hard base mixture. This directory contains the base model and the final RL checkpoints for the associated extrapolation experiments.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
47.5% easy, 47.5% medium, 5% hard base mixture. This directory contains the base model and the final RL checkpoints for the associated extrapolation experiments.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.475easy_0.475medium_0.05hard/base/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Base model used to initialize the RL runs under this setting.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Base model used to initialize the RL runs under this setting.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.475easy_0.475medium_0.05hard/rl/op11-14_uniform_process_strict/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.4995easy_0.4995medium_0.001hard/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
49.95% easy, 49.95% medium, 0.1% hard base mixture. This directory contains the base model and the final RL checkpoints for the associated extrapolation experiments.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
49.95% easy, 49.95% medium, 0.1% hard base mixture. This directory contains the base model and the final RL checkpoints for the associated extrapolation experiments.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.4995easy_0.4995medium_0.001hard/base/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Base model used to initialize the RL runs under this setting.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Base model used to initialize the RL runs under this setting.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.4995easy_0.4995medium_0.001hard/rl/op11-14_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.4995easy_0.4995medium_0.001hard/rl/op17-20_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.4995easy_0.4995medium_0.001hard/rl/op7-10_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.4995easy_0.4995medium_0.001hard/rl/op9-12_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.5easy_0.3medium_0.2hard/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
50% easy, 30% medium, 20% hard base mixture. This directory contains the base model and the final RL checkpoints for the associated extrapolation experiments.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
50% easy, 30% medium, 20% hard base mixture. This directory contains the base model and the final RL checkpoints for the associated extrapolation experiments.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.5easy_0.3medium_0.2hard/base/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Base model used to initialize the RL runs under this setting.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Base model used to initialize the RL runs under this setting.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.5easy_0.3medium_0.2hard/rl/op11-14_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.5easy_0.3medium_0.2hard/rl/op17-20_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.5easy_0.3medium_0.2hard/rl/op7-10_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|
id2-10_0.5easy_0.3medium_0.2hard/rl/op9-12_uniform/README.md
CHANGED
|
@@ -12,6 +12,16 @@ tags:
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
-
##
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Final RL checkpoint for this experiment variant.
|
| 14 |
|
| 15 |
+
## Citation
|
| 16 |
|
| 17 |
+
```bibtex
|
| 18 |
+
@misc{zhang2025interplaypretrainingmidtrainingrl,
|
| 19 |
+
title={On the Interplay of Pre-Training, Mid-Training, and RL on Reasoning Language Models},
|
| 20 |
+
author={Charlie Zhang and Graham Neubig and Xiang Yue},
|
| 21 |
+
year={2025},
|
| 22 |
+
eprint={2512.07783},
|
| 23 |
+
archivePrefix={arXiv},
|
| 24 |
+
primaryClass={cs.CL},
|
| 25 |
+
url={https://arxiv.org/abs/2512.07783},
|
| 26 |
+
}
|
| 27 |
+
```
|