| from fastapi import FastAPI, HTTPException |
| import pandas as pd |
| from pydantic import BaseModel |
| import joblib |
| from sklearn.model_selection import train_test_split |
| from sklearn.ensemble import RandomForestClassifier |
| from sklearn.linear_model import LogisticRegression |
|
|
| |
| app = FastAPI() |
|
|
| |
| DATASET_PATH = "credit_risk_dataset.csv" |
| df = pd.read_csv(DATASET_PATH) |
|
|
| |
| FEATURES = ["loan_amnt", "loan_int_rate", "person_age", "person_income", "cb_person_cred_hist_length"] |
| X = df[FEATURES] |
| y = df["loan_status"] |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
| |
| classifier = RandomForestClassifier(n_estimators=100, random_state=42) |
| classifier.fit(X_train, y_train) |
| joblib.dump(classifier, "models/risk_classifier.pkl") |
|
|
| regressor = LogisticRegression() |
| regressor.fit(X_train, y_train) |
| joblib.dump(regressor, "models/past_due_regressor.pkl") |
|
|
| |
| classifier = joblib.load("models/risk_classifier.pkl") |
| regressor = joblib.load("models/past_due_regressor.pkl") |
|
|
| |
| class LoanRequest(BaseModel): |
| loan_amnt: float |
| loan_int_rate: float |
| person_age: int |
| person_income: float |
| person_home_ownership: str |
| cb_person_cred_hist_length: int |
|
|
| |
| @app.get("/loan_status_distribution") |
| def loan_status_distribution(): |
| if "loan_status" not in df.columns: |
| raise HTTPException(status_code=400, detail="Missing 'loan_status' column") |
| |
| status_counts = df["loan_status"].value_counts(normalize=True) * 100 |
| return {"default_percentage": status_counts.get(1, 0), "non_default_percentage": status_counts.get(0, 0)} |
|
|
| @app.get("/payment_timeline_analysis") |
| def payment_timeline_analysis(): |
| grouped = df.groupby("loan_status")["loan_amnt"].mean().to_dict() |
| return {"average_loan_amount_by_status": grouped} |
|
|
| @app.get("/principal_amount_patterns") |
| def principal_amount_patterns(): |
| demographic_defaults = df.groupby(["person_age", "person_income", "person_home_ownership"])['loan_status'].mean().to_dict() |
| return {"demographic_default_rates": demographic_defaults} |
|
|
| @app.get("/credit_history_impact") |
| def credit_history_impact(): |
| history_impact = df.groupby("cb_person_cred_hist_length")["loan_status"].mean().to_dict() |
| return {"credit_history_default_rates": history_impact} |
|
|
| @app.get("/customer_profile_analysis") |
| def customer_profile_analysis(): |
| profile_analysis = df.groupby(["person_age", "person_income", "person_home_ownership"])["loan_status"].mean().to_dict() |
| return {"customer_profile_default_rates": profile_analysis} |
|
|
| @app.get("/loan_intent_analysis") |
| def loan_intent_analysis(): |
| intent_defaults = df.groupby("loan_intent")["loan_status"].mean().to_dict() |
| return {"loan_intent_default_rates": intent_defaults} |
|
|
| @app.get("/collection_effectiveness") |
| def collection_effectiveness(): |
| success_rate = df.groupby("cb_person_default_on_file")["loan_status"].mean().to_dict() |
| return {"collection_success_rate": success_rate} |
|
|
| @app.get("/risk_score_development") |
| def risk_score_development(): |
| risk_factors = df.groupby(["loan_amnt", "loan_int_rate", "person_age", "person_income", "cb_person_cred_hist_length"])["loan_status"].mean().to_dict() |
| return {"risk_scores": risk_factors} |
|
|
| @app.post("/predict_loan_risk") |
| def predict_loan_risk(request: LoanRequest): |
| input_data = [[request.loan_amnt, request.loan_int_rate, request.person_age, request.person_income, request.cb_person_cred_hist_length]] |
| risk_class = classifier.predict(input_data)[0] |
| risk_prob = regressor.predict_proba(input_data)[0][1] |
| return {"predicted_risk_category": int(risk_class), "default_probability": float(risk_prob)} |
|
|
|
|