File size: 956 Bytes
5dd1bb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""Training utilities for GRPO-based SQLEnv experiments."""

from .config import GRPOConfig, apply_device_overrides, find_project_root
from .data_loading import (
    filter_questions_by_difficulty,
    load_model_and_tokenizer,
    load_question_prompts,
)
from .notebook_pipeline import (
    build_trainer,
    format_oom_guidance,
    run_training_with_metrics,
    sample_random_baseline,
)
from .prompts import format_observation, get_system_prompt
from .rewards import reward_correctness, reward_operational, reward_progress

__all__ = [
    "GRPOConfig",
    "apply_device_overrides",
    "find_project_root",
    "build_trainer",
    "filter_questions_by_difficulty",
    "format_observation",
    "format_oom_guidance",
    "get_system_prompt",
    "load_model_and_tokenizer",
    "load_question_prompts",
    "run_training_with_metrics",
    "sample_random_baseline",
    "reward_correctness",
    "reward_progress",
    "reward_operational",
]