Steps in the DBSCAN algorithm
1. Classify the points.
2. Discard noise.
3. Assign cluster to a core point.
4. Color all the density connected points of a core point.
5. Color boundary points according to the nearest core point.
import numpy as np
from sklearn.datasets import make_blobs
from matplotlib import pyplot as plt
from pandas import DataFrame
X, _ = make_blobs(n_samples = 500, centers = 3, n_features = 2, random_state
= 20)
df = DataFrame(dict(x=X[:,0], y=X[:,1]))
fig, ax = plt.subplots(figsize=(8,8))
df.plot(ax=ax, kind='scatter', x='x', y='y')
plt.xlabel('x_1')
plt.ylabel('x_2')
plt.show()
from sklearn.cluster import DBSCAN
clustering = DBSCAN(eps = 1, min_samples = 5).fit(X)
cluster = clustering.labels_
len(set(cluster))
def show_clusters(X, cluster):
df = DataFrame(dict(x=X[:,0], y=X[:,1], label=cluster))
colors = {-1: 'red', 0: 'blue', 1:'orange', 2:'green', 3:'yellow'}
fig, ax = plt.subplots(figsize=(8,8))
grouped = df.groupby('label')
for key, group in grouped:
group.plot(ax=ax, kind='scatter', x='x', y='y', label=key,
color=colors[key])
plt.xlabel('x_1')
plt.ylabel('x_2')
plt.show()
show_clusters(X, cluster)