| import numpy as np |
| import matplotlib.pyplot as plt |
| from sklearn.datasets import make_blobs |
| from sklearn.linear_model import LogisticRegression |
| from sklearn.inspection import DecisionBoundaryDisplay |
|
|
| import gradio as gr |
|
|
| import matplotlib |
| matplotlib.use('agg') |
|
|
| def create_dataset(num_samples): |
| |
| centers = [[-5, 0], [0, 1.5], [5, -1]] |
| X, y = make_blobs(n_samples=num_samples, centers=centers, random_state=42) |
| transformation = [[0.4, 0.2], [-0.4, 1.2]] |
| X = np.dot(X, transformation) |
|
|
| return X, y |
|
|
| def train_plot(multi_class, num_samples): |
|
|
| X, y = create_dataset(num_samples) |
| clf = LogisticRegression( |
| solver="sag", max_iter=100, random_state=42, multi_class=multi_class |
| ).fit(X, y) |
| |
| fig, ax = plt.subplots() |
| DecisionBoundaryDisplay.from_estimator( |
| clf, X, response_method="predict", cmap=plt.cm.Paired, ax=ax |
| ) |
| plt.title("Decision surface of LogisticRegression (%s)" % multi_class) |
| plt.axis("tight") |
|
|
| colors = "bry" |
| for i, color in zip(clf.classes_, colors): |
| idx = np.where(y == i) |
| plt.scatter( |
| X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired, edgecolor="black", s=20 |
| ) |
|
|
| |
| xmin, xmax = plt.xlim() |
| ymin, ymax = plt.ylim() |
| coef = clf.coef_ |
| intercept = clf.intercept_ |
|
|
| def plot_hyperplane(c, color): |
| def line(x0): |
| return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1] |
|
|
| plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls="--", color=color) |
|
|
| for i, color in zip(clf.classes_, colors): |
| plot_hyperplane(i, color) |
|
|
| plt.xlabel("x") |
| plt.ylabel("y") |
|
|
| return fig, clf.score(X, y) |
|
|
| def plot_both(num_samples): |
| fig1, score1 = train_plot("multinomial", num_samples) |
| fig2, score2 = train_plot("ovr", num_samples) |
|
|
| return fig1, fig2, score1, score2 |
|
|
| title = "Plot multinomial and One-vs-Rest Logistic Regression" |
| description = """ |
| The demo shows the difference between multinomial and One-vs-Rest Logistic Regression in a \ |
| two-dimensional synthetic dataset. |
| |
| The dataset is generated around three cluster centers to simulate three different classes. \ |
| Two different types of logistic regression models are fit to the synthetic data: a multinomial \ |
| and a one-vs-rest logistic regression. The figures show scatter plots of the data, the decision \ |
| boundaries of each logistic regresion model and the decision surfaces in different colors per respective class. \ |
| The hyperplanes corresponding to the three One-vs-Rest (OVR) classifiers are represented by the \ |
| dashed lines. \ |
| |
| |
| The mean accuracy of the training data and labels for each classifier is given underneath each respective plot. |
| """ |
| with gr.Blocks() as demo: |
| gr.Markdown(f"## {title}") |
| gr.Markdown(description) |
|
|
| num_samples = gr.Slider(minimum=500, maximum=2000, step=500, value=500, label="Number of samples") |
|
|
| with gr.Row(): |
| plot = gr.Plot() |
| plot2 = gr.Plot() |
| with gr.Row(): |
| score1 = gr.Textbox(label="Multinomial score") |
| score2 = gr.Textbox(label="OVR score") |
|
|
| num_samples.change(fn=plot_both, inputs=[num_samples], outputs=[plot, plot2, score1, score2]) |
| |
| demo.launch(enable_queue=True) |
|
|
|
|