File size: 2,693 Bytes
8097081 72a7241 1435892 72a7241 8097081 72a7241 1700927 1435892 72a7241 1435892 72a7241 1435892 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | # tests/test_reward.py
from src.pytorch_debug_env.reward import (
clamp_score,
compute_step_reward,
final_diagnosis_score,
hypothesis_quality,
line_overlap,
)
def test_hypothesis_quality_exact_match():
gt = {
"bug_type": "missing_zero_grad",
"primary_bug_file": "train.py",
"related_files": [],
}
hyp = {
"bug_type": "missing_zero_grad",
"affected_file": "train.py",
"confidence": 0.8,
}
assert hypothesis_quality(hyp, gt) > 0.8
def test_line_overlap_handles_no_overlap():
assert line_overlap([1, 2], [5, 6]) == 0.0
def test_final_diagnosis_score_bounds():
gt = {
"bug_type": "missing_zero_grad",
"primary_bug_file": "train.py",
"related_files": [],
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
action = {
"bug_type": "missing_zero_grad",
"affected_file": "train.py",
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
score = final_diagnosis_score(action, gt)
assert 0.0 < score < 1.0
def test_final_diagnosis_score_perfect_clamped():
gt = {
"bug_type": "missing_zero_grad",
"primary_bug_file": "train.py",
"related_files": [],
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
action = {
"bug_type": "missing_zero_grad",
"affected_file": "train.py",
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
score = final_diagnosis_score(action, gt)
assert 0.0 < score < 1.0
def test_compute_step_reward_clamps_non_negative():
gt = {
"bug_type": "missing_zero_grad",
"primary_bug_file": "train.py",
"related_files": [],
"red_herring_file": "model/architecture.py",
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
hypothesis = {
"bug_type": "data_leakage",
"affected_file": "unknown.py",
"confidence": 0.1,
}
reward, components = compute_step_reward(
previous_quality=0.6,
current_hypothesis=hypothesis,
ground_truth=gt,
investigation_target="model/architecture.py",
committed_diagnosis=None,
step_num=1,
max_steps=5,
)
assert 0.0 < reward < 1.0
assert components["investigation_reward"] <= 0.0
def test_clamp_score_open_interval():
assert 0.0 < clamp_score(0.0) < 1.0
assert 0.0 < clamp_score(1.0) < 1.0
|