File size: 2,693 Bytes
8097081
72a7241
1435892
72a7241
 
 
 
 
8097081
 
 
 
 
 
 
 
 
 
 
 
 
 
72a7241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1700927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1435892
 
72a7241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1435892
72a7241
1435892
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# tests/test_reward.py
from src.pytorch_debug_env.reward import (
    clamp_score,
    compute_step_reward,
    final_diagnosis_score,
    hypothesis_quality,
    line_overlap,
)


def test_hypothesis_quality_exact_match():
    gt = {
        "bug_type": "missing_zero_grad",
        "primary_bug_file": "train.py",
        "related_files": [],
    }
    hyp = {
        "bug_type": "missing_zero_grad",
        "affected_file": "train.py",
        "confidence": 0.8,
    }
    assert hypothesis_quality(hyp, gt) > 0.8


def test_line_overlap_handles_no_overlap():
    assert line_overlap([1, 2], [5, 6]) == 0.0


def test_final_diagnosis_score_bounds():
    gt = {
        "bug_type": "missing_zero_grad",
        "primary_bug_file": "train.py",
        "related_files": [],
        "line_range": [10, 12],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
    }
    action = {
        "bug_type": "missing_zero_grad",
        "affected_file": "train.py",
        "line_range": [10, 12],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
    }
    score = final_diagnosis_score(action, gt)
    assert 0.0 < score < 1.0


def test_final_diagnosis_score_perfect_clamped():
    gt = {
        "bug_type": "missing_zero_grad",
        "primary_bug_file": "train.py",
        "related_files": [],
        "line_range": [10, 12],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
    }
    action = {
        "bug_type": "missing_zero_grad",
        "affected_file": "train.py",
        "line_range": [10, 12],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
    }
    score = final_diagnosis_score(action, gt)
    assert 0.0 < score < 1.0


def test_compute_step_reward_clamps_non_negative():
    gt = {
        "bug_type": "missing_zero_grad",
        "primary_bug_file": "train.py",
        "related_files": [],
        "red_herring_file": "model/architecture.py",
        "line_range": [10, 12],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
    }
    hypothesis = {
        "bug_type": "data_leakage",
        "affected_file": "unknown.py",
        "confidence": 0.1,
    }
    reward, components = compute_step_reward(
        previous_quality=0.6,
        current_hypothesis=hypothesis,
        ground_truth=gt,
        investigation_target="model/architecture.py",
        committed_diagnosis=None,
        step_num=1,
        max_steps=5,
    )
    assert 0.0 < reward < 1.0
    assert components["investigation_reward"] <= 0.0


def test_clamp_score_open_interval():
    assert 0.0 < clamp_score(0.0) < 1.0
    assert 0.0 < clamp_score(1.0) < 1.0