File size: 3,772 Bytes
8097081 72a7241 8097081 1435892 72a7241 1435892 72a7241 1435892 72a7241 1435892 1700927 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | # tests/test_graders.py
from src.pytorch_debug_env.graders import grade_easy, grade_hard, grade_medium
def test_grade_easy():
gt = {
"bug_type": "missing_zero_grad",
"primary_bug_file": "train.py",
"related_files": [],
"line_range": [10, 15],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
action = {
"bug_type": "missing_zero_grad",
"affected_file": "train.py",
"line_range": [10, 15],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
"confidence": 0.8
}
score = grade_easy(action, gt)
assert score > 0.8
assert score < 1.0
def test_grade_medium_related_file_bonus():
gt = {
"bug_type": "data_leakage",
"primary_bug_file": "data/dataset.py",
"related_files": ["data/preprocessing.py"],
"line_range": [4, 6],
"fix_strategy": "Ensure validation split is strictly separate from training",
}
action = {
"bug_type": "data_leakage",
"affected_file": "data/preprocessing.py",
"line_range": [1, 2],
"fix_strategy": "Ensure validation split is strictly separate from training",
"confidence": 0.6,
}
score = grade_medium(action, gt)
assert score >= grade_easy(action, gt)
assert 0.0 < score < 1.0
def test_grade_hard_category_partial_credit():
gt = {
"bug_type": "missing_zero_grad",
"category": "optimization",
"primary_bug_file": "train.py",
"related_files": [],
"red_herring_file": "model/attention.py",
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
action = {
"bug_type": "wrong_loss_function",
"affected_file": "data/dataset.py",
"line_range": [1, 2],
"fix_strategy": "Use CrossEntropyLoss instead of MSE",
"confidence": 0.5,
}
score = grade_hard(action, gt)
assert score >= 0.18
assert 0.0 < score < 1.0
def test_grade_hard_penalizes_red_herring():
gt = {
"bug_type": "memory_leak",
"category": "resource",
"primary_bug_file": "data/dataset.py",
"related_files": ["train.py"],
"red_herring_file": "model/attention.py",
"line_range": [5, 9],
"fix_strategy": "Avoid holding reference to tensors in class cache",
}
action = {
"bug_type": "memory_leak",
"affected_file": "model/attention.py",
"line_range": [5, 9],
"fix_strategy": "Avoid holding reference to tensors in class cache",
"confidence": 0.7,
}
penalized = grade_hard(action, gt)
assert penalized <= 0.9
assert 0.0 < penalized < 1.0
def test_grade_easy_perfect_is_not_one():
gt = {
"bug_type": "missing_zero_grad",
"primary_bug_file": "train.py",
"related_files": [],
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
action = {
"bug_type": "missing_zero_grad",
"affected_file": "train.py",
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
"confidence": 1.0,
}
score = grade_easy(action, gt)
assert 0.0 < score < 1.0
def test_grader_empty_action_clamped():
gt = {
"bug_type": "data_leakage",
"primary_bug_file": "data/dataset.py",
"related_files": [],
"line_range": [4, 6],
"fix_strategy": "Ensure validation split is strictly separate from training",
}
action = {}
assert 0.0 < grade_easy(action, gt) < 1.0
assert 0.0 < grade_medium(action, gt) < 1.0
assert 0.0 < grade_hard(action, gt) < 1.0
|