Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
1 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
2 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
3 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
4 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
5 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
6 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
7 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
8 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
9 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
10 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
1 %matplotlib inline
2 import matplotlib
3 import matplotlib.pyplot as plt
4 import numpy as np
5 import pandas as pd
6
7 from sklearn.datasets import load_wine
8
9 from sklearn.preprocessing import StandardScaler
10 from sklearn.decomposition import PCA
11 from sklearn.pipeline import Pipeline
12 # load data set
13 data = load_wine()
14 X = data['data']
15
16 # truncate to two variables
17 pipe = Pipeline([('scaler', StandardScaler()), ('dim_red', PCA(n_components=2
18 Xt = pipe.fit_transform(X)
19
20 # generate novel/outlier points
21 np.random.seed(1)
22 theta = 2*np.pi*np.random.random(10)
23 X_test = np.vstack((4*np.cos(theta) + np.random.random(10), 4*np.sin(theta) +
24
25 plt.scatter(*Xt.T)
26 plt.scatter(*X_test, c='red')
27 plt.xlabel('$\\xi_1$')
28 plt.ylabel('$\\xi_2$');
29 plt.legend(["training set", "novel/outliers"]);
30
31 from sklearn.svm import OneClassSVM
32 from ipywidgets import interact, FloatSlider
33
34 def plot_one_class_svm(X, X_test):
35 def plotter(nu=0.95):
36 clf = OneClassSVM(nu=nu, gamma='auto')
37 clf.fit(X)
38 y_pred = clf.predict(X)
39 fp_rate = (y_pred == -1).sum()/len(X)
40
41 X1, X2 = np.meshgrid(np.linspace(-5, 5), np.linspace(-5, 5))
42 y_proba = clf decision_function(np hstack((X1 reshape( 1 1), X2 reshape
11 of 12 10/14/2021, 10:16 AM
Anomaly Detection with Machine Learning | by Kagumire Su... https://medium.com/mlearning-ai/anomaly-detection-with-ma...
46 plt.contourf(X1, X2, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm
47 plt.colorbar()
48 a = plt.contour(X1, X2, Z, levels=[0], linewidths=2, colors='black')
49 b1 = plt.scatter(*X.T, c='blue')
50 b2 = plt.scatter(*X_test, c='red')
51 plt.title("false positive rate: {:g}".format(fp_rate))
52 plt.legend([a.collections[0], b1, b2], ["boundary", " true inliers",
53 loc="lower left")
54 return plotter
55
56 nu_slider = FloatSlider(min=0.01, max=0.99, step=0.01, value=0.5, description
57 interact(plot_one_class_svm(Xt, X_test), nu=nu_slider);
58
59 def plot_isolation_forest(X, X_test):
60 def plotter(contamination=0.2):
61 clf = IsolationForest(n_estimators=100, contamination=contamination)
62 clf.fit(X)
63
64 y_pred = clf.predict(X)
65 outlier_rate = (y_pred == -1).sum()/len(X)
66
67 X1, X2 = np.meshgrid(np.linspace(-5, 5), np.linspace(-5, 5))
68 y_proba = clf.decision_function(np.hstack((X1.reshape(-1, 1), X2.reshape
69 Z = y_proba.reshape(50, 50)
70
12 of 12 10/14/2021, 10:16 AM