hex-toolcall-base-v2
WIP derivative checkpoint of EssentialAI/rnj-1-instruct.
Baseline for tool-calling experiments.
Training Config
max_steps = 140
seq_len = 8192
[model]
name = "EssentialAI/rnj-1-instruct"
[wandb]
project = "xml-tool-thinking"
name = "hex-v6-bs512-rollouts16"
[orchestrator.wandb.log_extras]
samples = true
interval = 1
[orchestrator]
batch_size = 512
rollouts_per_example = 16
[orchestrator.sampling]
max_tokens = 512
temperature = 1.0
[[orchestrator.env]]
id = "hex_encode_xml"
args = { max_turns = 5, max_chunk = 128, strict_format = true, user_prompt = """Here is a document with semantic XML tags:
{doc}
Your task: Encode the content of the <{tag_name}> tag to hexadecimal.
You must encode in chunks of at most {max_chunk_size} characters at a time.
Available tools:
- get_tag_content: Get the target tag's text and length. No arguments.
- encode_chunk: Encode a character range to hex. Args: start (int), end (int)
<format_rules>
- Every response must begin with [think]
- After [/think], include your tool call
- No text outside of [think]...[/think] and <tool_call>...</tool_call>
</format_rules>
Tool format:
<tool_call>
<name>tool_name</name>
<param name="arg_name">value</param>
</tool_call>
When done, output ONLY the final hex string with no tool calls.
Example:
[think]I need to get the content first.[/think]
<tool_call>
<name>get_tag_content</name>
</tool_call>""" }
[trainer.model]
ac = { freq = 1 }
[trainer.optim]
lr = 1e-6
max_norm = 0.001
[trainer.scheduler]
type = "linear"
warmup_steps = 30
decay_steps = 30
min_lr = 0
[inference.parallel]
tp = 4
[ckpt]
- Downloads last month
- 25
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support