Spaces:
Sleeping
Sleeping
| """Compare pooled random split vs grouped LOPO for XGBoost.""" | |
| import os | |
| import sys | |
| import numpy as np | |
| from sklearn.metrics import accuracy_score, f1_score, roc_auc_score | |
| _PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) | |
| if _PROJECT_ROOT not in sys.path: | |
| sys.path.insert(0, _PROJECT_ROOT) | |
| from data_preparation.prepare_dataset import get_default_split_config, get_numpy_splits, load_per_person | |
| from models.xgboost.config import build_xgb_classifier | |
| MODEL_NAME = "face_orientation" | |
| OUT_PATH = os.path.join(_PROJECT_ROOT, "evaluation", "GROUPED_SPLIT_BENCHMARK.md") | |
| def run_pooled_split(): | |
| split_ratios, seed = get_default_split_config() | |
| splits, _, _, _ = get_numpy_splits( | |
| model_name=MODEL_NAME, | |
| split_ratios=split_ratios, | |
| seed=seed, | |
| scale=False, | |
| ) | |
| model = build_xgb_classifier(seed, verbosity=0, early_stopping_rounds=30) | |
| model.fit( | |
| splits["X_train"], | |
| splits["y_train"], | |
| eval_set=[(splits["X_val"], splits["y_val"])], | |
| verbose=False, | |
| ) | |
| probs = model.predict_proba(splits["X_test"])[:, 1] | |
| preds = (probs >= 0.5).astype(int) | |
| y = splits["y_test"] | |
| return { | |
| "accuracy": float(accuracy_score(y, preds)), | |
| "f1": float(f1_score(y, preds, average="weighted")), | |
| "auc": float(roc_auc_score(y, probs)), | |
| } | |
| def run_grouped_lopo(): | |
| by_person, _, _ = load_per_person(MODEL_NAME) | |
| persons = sorted(by_person.keys()) | |
| scores = {"accuracy": [], "f1": [], "auc": []} | |
| _, seed = get_default_split_config() | |
| for held_out in persons: | |
| train_x = np.concatenate([by_person[p][0] for p in persons if p != held_out], axis=0) | |
| train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out], axis=0) | |
| test_x, test_y = by_person[held_out] | |
| model = build_xgb_classifier(seed, verbosity=0) | |
| model.fit(train_x, train_y, verbose=False) | |
| probs = model.predict_proba(test_x)[:, 1] | |
| preds = (probs >= 0.5).astype(int) | |
| scores["accuracy"].append(float(accuracy_score(test_y, preds))) | |
| scores["f1"].append(float(f1_score(test_y, preds, average="weighted"))) | |
| scores["auc"].append(float(roc_auc_score(test_y, probs))) | |
| return { | |
| "accuracy": float(np.mean(scores["accuracy"])), | |
| "f1": float(np.mean(scores["f1"])), | |
| "auc": float(np.mean(scores["auc"])), | |
| "folds": len(persons), | |
| } | |
| def write_report(pooled, grouped): | |
| lines = [ | |
| "# Grouped vs pooled split benchmark", | |
| "", | |
| "This compares the same XGBoost config under two evaluation protocols.", | |
| "", | |
| f"Config: `{XGB_BASE_PARAMS}`", | |
| "", | |
| "| Protocol | Accuracy | F1 (weighted) | ROC-AUC |", | |
| "|----------|---------:|--------------:|--------:|", | |
| f"| Pooled random split (70/15/15) | {pooled['accuracy']:.4f} | {pooled['f1']:.4f} | {pooled['auc']:.4f} |", | |
| f"| Grouped LOPO ({grouped['folds']} folds) | {grouped['accuracy']:.4f} | {grouped['f1']:.4f} | {grouped['auc']:.4f} |", | |
| "", | |
| "Use grouped LOPO as the primary generalisation metric when reporting model quality.", | |
| "", | |
| ] | |
| with open(OUT_PATH, "w", encoding="utf-8") as f: | |
| f.write("\n".join(lines)) | |
| print(f"[LOG] Wrote {OUT_PATH}") | |
| def main(): | |
| pooled = run_pooled_split() | |
| grouped = run_grouped_lopo() | |
| write_report(pooled, grouped) | |
| print( | |
| "[DONE] pooled_f1={:.4f} grouped_f1={:.4f}".format( | |
| pooled["f1"], grouped["f1"] | |
| ) | |
| ) | |
| if __name__ == "__main__": | |
| main() | |