I am currently using the sklearn KMeans algorithm to cluster my data. This works well, and I can plot the centroids and data points. At the same time, I am also plotting a grid.
I would now like to use the grid to simplify the plot by coloring each grid section in the color of the centroid to which most data points in this grid belong.
Unfortunately, I have run out of ideas on how to approach this. Iterating through the complete dataset for each grid part takes way too long with the amount of data I have. Maybe someone has an idea on how to approach this better. Thx
import time
import numpy as np
import pandas as pd
from itertools import product
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from collections import Counter
# Define X and Y Data
xAxisDataName = 'testProp'
yAxisDataName = 'testDelay'
# Get Data from dataframe by name
df = pd.read_csv("myData.csv")
inputData = df.loc[:, [xAxisDataName, yAxisDataName]]
start_time_calc = time.time()
# Equally Scale Data
scaler = StandardScaler()
scaled_Input = scaler.fit_transform(inputData)
# Do kmeans prediction
cluster = KMeans(n_clusters=5, random_state=0).fit(scaled_Input)
k = 5
kmeans = KMeans(n_clusters=k)
kmeans.fit(scaled_Input)
labels = kmeans.labels_
centroids = kmeans.cluster_centers_
plt.figure(figsize=(15, 15))
plt.rcParams["figure.autolayout"] = True
for i in range(k):
ds = scaled_Input[np.where(labels == i)]
plt.plot(ds[:, 0], ds[:, 1], 'o')
lines = plt.plot(centroids[i, 0], centroids[i, 1], 'kx')
plt.setp(lines, ms=15.0)
plt.setp(lines, mew=2.0)
print("--- KMeans and Plot Time: %s seconds ---" %
(time.time() - start_time_calc))
# Show grid corners
ax = plt.gca()
xTicks = ax.get_xticks()
yTicks = ax.get_yticks()
for x in range(len(xTicks)):
xValue = xTicks[x]
for y in range(len(yTicks)):
yValue = yTicks[y]
plt.scatter(xValue, yValue, marker="+", c="k")
start_time_render = time.time()
plt.show()
print("--- Render Time: %s seconds ---" % (time.time() - start_time_render))