Spaces:
Running
Running
File size: 990 Bytes
7eba1aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import evaluate
import datasets
from text2sql_eval.metrics.bleu import bleu_score
_DESCRIPTION = "SQL token BLEU (0–1). Returns mean score in [0, 1]."
def _to_str(x):
if isinstance(x, (list, tuple)):
return x[0] if x else ""
return "" if x is None else str(x)
class SQLBLEU(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation="Uses sacrebleu via text2sql-eval implementation.",
features=datasets.Features(
{
"predictions": datasets.Value("string"),
"references": datasets.Value("string"),
}
),
)
def _compute(self, predictions, references):
scores = []
for p, r in zip(predictions, references):
scores.append(float(bleu_score(_to_str(p), _to_str(r))))
mean = sum(scores) / len(scores) if scores else 0.0
return {"sql_bleu": mean}
|