| from matplotlib import pyplot as plt |
| from sklearn.cluster import KMeans |
| from sklearn.metrics import silhouette_score |
|
|
|
|
| def calculate_wcss(data): |
| wcss = [] |
| for i in range(1, 11): |
| kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) |
| kmeans.fit(data) |
| wcss.append(kmeans.inertia_) |
| return wcss |
|
|
| def calculate_silhouette_scores(data): |
| scores = [] |
| range_values = range(2, 11) |
| for i in range_values: |
| kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) |
| kmeans.fit(data) |
| score = silhouette_score(data, kmeans.labels_, metric='euclidean') |
| scores.append(score) |
| return scores |
|
|
| def plot_elbow(wcss): |
| plt.plot(range(1, 11), wcss) |
| plt.title('Elbow Method') |
| plt.xlabel('Number of clusters') |
| plt.ylabel('WCSS') |
| plt.show() |
|
|
| def get_optimal_clusters_silhouette(scores): |
| optimal_clusters = scores.index(max(scores)) + 2 |
| print(f"Optimal number of clusters: {optimal_clusters}") |
| return optimal_clusters |
|
|
| def fit_kmeans(data, n_clusters): |
| kmeans = KMeans(n_clusters=n_clusters, random_state=0) |
| clusters = kmeans.fit_predict(data) |
| data['cluster'] = clusters |
| return kmeans, data |