hex-toolcall-base-v2

WIP derivative checkpoint of EssentialAI/rnj-1-instruct.

Baseline for tool-calling experiments.

Training Config

max_steps = 140
seq_len = 8192

[model]
name = "EssentialAI/rnj-1-instruct"

[wandb]
project = "xml-tool-thinking"
name = "hex-v6-bs512-rollouts16"

[orchestrator.wandb.log_extras]
samples = true
interval = 1

[orchestrator]
batch_size = 512
rollouts_per_example = 16

[orchestrator.sampling]
max_tokens = 512
temperature = 1.0

[[orchestrator.env]]
id = "hex_encode_xml"
args = { max_turns = 5, max_chunk = 128, strict_format = true, user_prompt = """Here is a document with semantic XML tags:

{doc}

Your task: Encode the content of the <{tag_name}> tag to hexadecimal.

You must encode in chunks of at most {max_chunk_size} characters at a time.

Available tools:
- get_tag_content: Get the target tag's text and length. No arguments.
- encode_chunk: Encode a character range to hex. Args: start (int), end (int)

<format_rules>
- Every response must begin with [think]
- After [/think], include your tool call
- No text outside of [think]...[/think] and <tool_call>...</tool_call>
</format_rules>

Tool format:
<tool_call>
<name>tool_name</name>
<param name="arg_name">value</param>
</tool_call>

When done, output ONLY the final hex string with no tool calls.

Example:
[think]I need to get the content first.[/think]
<tool_call>
<name>get_tag_content</name>
</tool_call>""" }

[trainer.model]
ac = { freq = 1 }

[trainer.optim]
lr = 1e-6
max_norm = 0.001

[trainer.scheduler]
type = "linear"
warmup_steps = 30
decay_steps = 30
min_lr = 0

[inference.parallel]
tp = 4

[ckpt]
Downloads last month
25
Safetensors
Model size
9B params
Tensor type
BF16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for kalomaze/hex-toolcall-base-v2

Base model

EssentialAI/rnj-1
Finetuned
(5)
this model
Quantizations
1 model