rl_bs128_gs16_ruby-30 / bs128-gs16-ruby_rl_config.json
penfever's picture
Add files using upload-large-folder tool
8dfa464 verified
{
"job_name": "bs128-gs16-ruby",
"experiments_dir": "/pscratch/sd/p/penfever/OpenThoughts-Agent/experiments/bs128-gs16-ruby",
"cluster_name": "perlmutter",
"skyrl_entrypoint": "examples.terminal_bench.entrypoints.main_tbench",
"skyrl_hydra_args": [
"+terminal_bench_config=terminal_bench",
"trainer.strategy=fsdp2",
"trainer.algorithm.advantage_estimator=rloo_n",
"trainer.algorithm.use_kl_loss=false",
"trainer.algorithm.kl_loss_coef=0.0",
"trainer.algorithm.eps_clip_low=0.2",
"trainer.algorithm.eps_clip_high=0.2",
"trainer.algorithm.loss_reduction=token_mean",
"trainer.epochs=10",
"trainer.update_epochs_per_batch=1",
"trainer.train_batch_size=128",
"trainer.policy_mini_batch_size=128",
"trainer.eval_batch_size=128",
"trainer.micro_forward_batch_size_per_gpu=4",
"trainer.micro_train_batch_size_per_gpu=1",
"trainer.max_prompt_length=999999",
"trainer.eval_interval=999999",
"trainer.eval_before_train=false",
"trainer.ckpt_interval=999999",
"trainer.resume_mode=latest",
"trainer.hf_save_interval=5",
"++trainer.hf_hub_repo_id=laion/bs128-gs16-ruby",
"++trainer.hf_hub_private=false",
"++trainer.hf_hub_revision=main",
"++trainer.enable_db_registration=true",
"trainer.project_name=OpenThoughts-Agent",
"trainer.log_level=INFO",
"trainer.tracker_commit_each_step=true",
"trainer.run_name=bs128-gs16-ruby",
"trainer.ckpt_path=experiments/bs128-gs16-ruby/bs128-gs16-ruby/checkpoints",
"trainer.export_path=experiments/bs128-gs16-ruby/bs128-gs16-ruby/exports",
"trainer.policy.optimizer_config.lr=1e-5",
"trainer.policy.optimizer_config.weight_decay=0.0",
"trainer.policy.optimizer_config.adam_betas=[0.9,0.999]",
"trainer.policy.optimizer_config.max_grad_norm=10.0",
"trainer.policy.fsdp_config.cpu_offload=false",
"trainer.policy.fsdp_config.reshard_after_forward=false",
"trainer.policy.fsdp_config.fsdp_size=4",
"trainer.policy.model.path=/pscratch/sd/p/penfever/hub/models--laion--r2egym-nl2bash-stack-bugsseq/snapshots/1e2611c5bd7aec4479129ef497c504b77b8f6726",
"trainer.ref.fsdp_config.cpu_offload=false",
"trainer.ref.fsdp_config.reshard_after_forward=false",
"trainer.ref.fsdp_config.fsdp_size=4",
"trainer.placement.colocate_all=false",
"trainer.placement.policy_num_nodes=2",
"trainer.placement.ref_num_nodes=2",
"trainer.placement.policy_num_gpus_per_node=4",
"trainer.placement.ref_num_gpus_per_node=4",
"trainer.fully_async.max_staleness_steps=12",
"trainer.fully_async.num_parallel_generation_workers=768",
"generator.backend=vllm",
"generator.timeout_multiplier=2.0",
"generator.model_dtype=bfloat16",
"generator.inference_engine_tensor_parallel_size=1",
"generator.num_inference_engines=16",
"generator.n_samples_per_prompt=16",
"generator.eval_n_samples_per_prompt=16",
"generator.gpu_memory_utilization=0.9",
"generator.max_num_seqs=32",
"generator.max_num_batched_tokens=16384",
"generator.enable_prefix_caching=true",
"generator.enable_chunked_prefill=true",
"generator.run_engines_locally=true",
"generator.weight_sync_backend=nccl",
"generator.async_engine=true",
"generator.batched=false",
"generator.enable_http_endpoint=true",
"generator.enable_ray_prometheus_stats=false",
"generator.append_eos_token_after_stop_str_in_multi_turn=true",
"generator.max_turns=999999",
"generator.sampling_params.max_generate_length=8192",
"generator.sampling_params.temperature=1.0",
"generator.sampling_params.top_p=0.95",
"generator.sampling_params.top_k=-1",
"++generator.engine_init_kwargs={max_model_len: 32768, served_model_name: 1e2611c5bd7aec4479129ef497c504b77b8f6726}",
"data.train_data=[\"/pscratch/sd/p/penfever/tasks/exp_rpt_stack-ruby\"]",
"data.val_data=[\"/pscratch/sd/p/penfever/tasks/OpenThoughts-TB-dev\"]",
"+terminal_bench_config.trials_dir=experiments/bs128-gs16-ruby/bs128-gs16-ruby/trace_jobs",
"+terminal_bench_config.harbor.name=terminus-2",
"+terminal_bench_config.harbor.max_episodes=999999",
"+terminal_bench_config.harbor.enable_summarize=false",
"+terminal_bench_config.harbor.store_all_messages=true",
"+terminal_bench_config.harbor.enable_episode_logging=false",
"+terminal_bench_config.harbor.record_terminal_session=false",
"+terminal_bench_config.harbor.enable_pane_logging=false",
"+terminal_bench_config.harbor.strict_json_parser=true",
"+terminal_bench_config.harbor.interleaved_thinking=true",
"+terminal_bench_config.harbor.extra_body.chat_template_kwargs={enable_thinking: true}",
"+terminal_bench_config.harbor.extra_body.response_prefix='<think>\\n\\n</think>\\n\\n{\\n \"analysis\": \"'",
"+terminal_bench_config.harbor.override_timeout_sec=1500",
"+terminal_bench_config.harbor.override_cpus=1",
"+terminal_bench_config.harbor.override_memory_mb=2048",
"+terminal_bench_config.harbor.override_storage_mb=2048",
"+terminal_bench_config.harbor.auto_snapshot=true",
"+terminal_bench_config.harbor.verifier_override_timeout_sec=120",
"+terminal_bench_config.harbor.max_retries=3",
"+terminal_bench_config.harbor.min_wait_sec=60.0",
"+terminal_bench_config.harbor.max_wait_sec=600.0",
"+terminal_bench_config.harbor.wait_multiplier=2.0",
"+terminal_bench_config.harbor.exclude_exceptions=[\"AgentTimeoutError\",\"VerifierTimeoutError\",\"RewardFileNotFoundError\",\"RewardFileEmptyError\",\"VerifierOutputParseError\",\"ContextLengthExceededError\"]",
"+terminal_bench_config.harbor.n_concurrent_trials=432",
"+terminal_bench_config.harbor.log_level=INFO",
"+terminal_bench_config.harbor.enable_reward_shaping=false",
"+terminal_bench_config.harbor.enable_error_classification=true",
"+terminal_bench_config.harbor.mask_exceptions=[\"DaytonaError\",\"EnvironmentStartTimeoutError\",\"NetworkError\",\"ConnectionError\",\"RewardFileNotFoundError\",\"RewardFileEmptyError\",\"AgentEnvironmentTimeoutError\"]",
"+terminal_bench_config.harbor.zero_exceptions=[\"AgentTimeoutError\",\"ContextLengthExceededError\"]",
"+terminal_bench_config.harbor.default_error_treatment=zero",
"+terminal_bench_config.model_info.max_input_tokens=32768",
"+terminal_bench_config.model_info.max_output_tokens=8192",
"+terminal_bench_config.archiving.enabled=false"
],
"model_path": "/pscratch/sd/p/penfever/hub/models--laion--r2egym-nl2bash-stack-bugsseq/snapshots/1e2611c5bd7aec4479129ef497c504b77b8f6726",
"train_data": [
"/pscratch/sd/p/penfever/tasks/exp_rpt_stack-ruby"
],
"val_data": [
"/pscratch/sd/p/penfever/tasks/OpenThoughts-TB-dev"
],
"num_nodes": 6,
"gpus_per_node": 4,
"cpus_per_node": 64,
"tensor_parallel_size": 1,
"ray_port": 6379,
"master_port": 12345,
"checkpoints_dir": null,
"export_path": "/pscratch/sd/p/penfever/OpenThoughts-Agent/experiments/bs128-gs16-ruby/bs128-gs16-ruby/exports",
"needs_ssh_tunnel": false,
"needs_cuda_detection": true,
"pinggy_persistent_url": null,
"pinggy_token": null,
"agent_name": "terminus-2",
"harbor_env": "daytona",
"proxychains_binary": null
}