"""Comprehensive integration tests across tasks, rewards, and determinism.""" from __future__ import annotations from env.environment import CodeReviewEnv from env.models import CodeReviewAction def test_each_task_reset_and_done_path_is_stable() -> None: """Each task can reset and reach done with a valid score.""" env = CodeReviewEnv() for task_id in ("easy", "medium", "hard"): obs = env.reset(task_id) assert obs.task_id == task_id assert obs.step_number == 1 assert obs.max_steps >= 1 env.step(CodeReviewAction(operation="add_comment", line_number=1, severity="minor", category="style", message="probe")) obs2, reward, done, info = env.step(CodeReviewAction(operation="done")) assert done is True assert obs2.review_status == "submitted" assert 0.0 <= float(reward) <= 1.1 assert isinstance(info["current_score"], float) def test_done_is_deterministic_for_same_comment_set() -> None: """Running done twice with identical actions yields identical final reward.""" def run_once() -> float: env = CodeReviewEnv() env.reset("hard") env.step(CodeReviewAction(operation="add_comment", line_number=25, severity="major", category="performance", message="n+1")) _, reward, _, _ = env.step(CodeReviewAction(operation="done")) return float(reward) r1 = run_once() r2 = run_once() assert r1 == r2 def test_step_limit_penalty_applies_when_exceeded_without_done() -> None: """Exceeding max steps without done triggers final penalty.""" env = CodeReviewEnv() obs = env.reset("easy") max_steps = obs.max_steps done = False for _ in range(max_steps + 2): obs, _, done, info = env.step( CodeReviewAction(operation="add_comment", line_number=2, severity="minor", category="style", message="x") ) if done: break assert done is True assert info["current_score"] == 0.001