File size: 3,772 Bytes
8097081
72a7241
8097081
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1435892
 
 
72a7241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1435892
 
 
72a7241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1435892
 
 
72a7241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1435892
1700927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# tests/test_graders.py
from src.pytorch_debug_env.graders import grade_easy, grade_hard, grade_medium

def test_grade_easy():
    gt = {
        "bug_type": "missing_zero_grad",
        "primary_bug_file": "train.py",
        "related_files": [],
        "line_range": [10, 15],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
    }
    action = {
        "bug_type": "missing_zero_grad",
        "affected_file": "train.py",
        "line_range": [10, 15],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
        "confidence": 0.8
    }
    score = grade_easy(action, gt)
    assert score > 0.8
    assert score < 1.0


def test_grade_medium_related_file_bonus():
    gt = {
        "bug_type": "data_leakage",
        "primary_bug_file": "data/dataset.py",
        "related_files": ["data/preprocessing.py"],
        "line_range": [4, 6],
        "fix_strategy": "Ensure validation split is strictly separate from training",
    }
    action = {
        "bug_type": "data_leakage",
        "affected_file": "data/preprocessing.py",
        "line_range": [1, 2],
        "fix_strategy": "Ensure validation split is strictly separate from training",
        "confidence": 0.6,
    }
    score = grade_medium(action, gt)
    assert score >= grade_easy(action, gt)
    assert 0.0 < score < 1.0


def test_grade_hard_category_partial_credit():
    gt = {
        "bug_type": "missing_zero_grad",
        "category": "optimization",
        "primary_bug_file": "train.py",
        "related_files": [],
        "red_herring_file": "model/attention.py",
        "line_range": [10, 12],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
    }
    action = {
        "bug_type": "wrong_loss_function",
        "affected_file": "data/dataset.py",
        "line_range": [1, 2],
        "fix_strategy": "Use CrossEntropyLoss instead of MSE",
        "confidence": 0.5,
    }
    score = grade_hard(action, gt)
    assert score >= 0.18
    assert 0.0 < score < 1.0


def test_grade_hard_penalizes_red_herring():
    gt = {
        "bug_type": "memory_leak",
        "category": "resource",
        "primary_bug_file": "data/dataset.py",
        "related_files": ["train.py"],
        "red_herring_file": "model/attention.py",
        "line_range": [5, 9],
        "fix_strategy": "Avoid holding reference to tensors in class cache",
    }
    action = {
        "bug_type": "memory_leak",
        "affected_file": "model/attention.py",
        "line_range": [5, 9],
        "fix_strategy": "Avoid holding reference to tensors in class cache",
        "confidence": 0.7,
    }
    penalized = grade_hard(action, gt)
    assert penalized <= 0.9
    assert 0.0 < penalized < 1.0


def test_grade_easy_perfect_is_not_one():
    gt = {
        "bug_type": "missing_zero_grad",
        "primary_bug_file": "train.py",
        "related_files": [],
        "line_range": [10, 12],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
    }
    action = {
        "bug_type": "missing_zero_grad",
        "affected_file": "train.py",
        "line_range": [10, 12],
        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
        "confidence": 1.0,
    }
    score = grade_easy(action, gt)
    assert 0.0 < score < 1.0


def test_grader_empty_action_clamped():
    gt = {
        "bug_type": "data_leakage",
        "primary_bug_file": "data/dataset.py",
        "related_files": [],
        "line_range": [4, 6],
        "fix_strategy": "Ensure validation split is strictly separate from training",
    }
    action = {}
    assert 0.0 < grade_easy(action, gt) < 1.0
    assert 0.0 < grade_medium(action, gt) < 1.0
    assert 0.0 < grade_hard(action, gt) < 1.0