Openenv / tests /test_graders.py
Priyansh Saxena
test: enforce strict score bounds
1700927
# tests/test_graders.py
from src.pytorch_debug_env.graders import grade_easy, grade_hard, grade_medium
def test_grade_easy():
gt = {
"bug_type": "missing_zero_grad",
"primary_bug_file": "train.py",
"related_files": [],
"line_range": [10, 15],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
action = {
"bug_type": "missing_zero_grad",
"affected_file": "train.py",
"line_range": [10, 15],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
"confidence": 0.8
}
score = grade_easy(action, gt)
assert score > 0.8
assert score < 1.0
def test_grade_medium_related_file_bonus():
gt = {
"bug_type": "data_leakage",
"primary_bug_file": "data/dataset.py",
"related_files": ["data/preprocessing.py"],
"line_range": [4, 6],
"fix_strategy": "Ensure validation split is strictly separate from training",
}
action = {
"bug_type": "data_leakage",
"affected_file": "data/preprocessing.py",
"line_range": [1, 2],
"fix_strategy": "Ensure validation split is strictly separate from training",
"confidence": 0.6,
}
score = grade_medium(action, gt)
assert score >= grade_easy(action, gt)
assert 0.0 < score < 1.0
def test_grade_hard_category_partial_credit():
gt = {
"bug_type": "missing_zero_grad",
"category": "optimization",
"primary_bug_file": "train.py",
"related_files": [],
"red_herring_file": "model/attention.py",
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
action = {
"bug_type": "wrong_loss_function",
"affected_file": "data/dataset.py",
"line_range": [1, 2],
"fix_strategy": "Use CrossEntropyLoss instead of MSE",
"confidence": 0.5,
}
score = grade_hard(action, gt)
assert score >= 0.18
assert 0.0 < score < 1.0
def test_grade_hard_penalizes_red_herring():
gt = {
"bug_type": "memory_leak",
"category": "resource",
"primary_bug_file": "data/dataset.py",
"related_files": ["train.py"],
"red_herring_file": "model/attention.py",
"line_range": [5, 9],
"fix_strategy": "Avoid holding reference to tensors in class cache",
}
action = {
"bug_type": "memory_leak",
"affected_file": "model/attention.py",
"line_range": [5, 9],
"fix_strategy": "Avoid holding reference to tensors in class cache",
"confidence": 0.7,
}
penalized = grade_hard(action, gt)
assert penalized <= 0.9
assert 0.0 < penalized < 1.0
def test_grade_easy_perfect_is_not_one():
gt = {
"bug_type": "missing_zero_grad",
"primary_bug_file": "train.py",
"related_files": [],
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
}
action = {
"bug_type": "missing_zero_grad",
"affected_file": "train.py",
"line_range": [10, 12],
"fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
"confidence": 1.0,
}
score = grade_easy(action, gt)
assert 0.0 < score < 1.0
def test_grader_empty_action_clamped():
gt = {
"bug_type": "data_leakage",
"primary_bug_file": "data/dataset.py",
"related_files": [],
"line_range": [4, 6],
"fix_strategy": "Ensure validation split is strictly separate from training",
}
action = {}
assert 0.0 < grade_easy(action, gt) < 1.0
assert 0.0 < grade_medium(action, gt) < 1.0
assert 0.0 < grade_hard(action, gt) < 1.0