10/29/23, 11:00 PM lab3.
ipynb - Colaboratory
IMPORT LIBRARIES
1 import pandas as pd
2 import numpy as np
3 import seaborn as sns
4 import matplotlib.pyplot as plt
1 df=pd.read_csv("Churn_Modelling.csv")
1 df.head()
account_circle RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Bala
0 1 15634602 Hargrave 619 France Female 42 2
1 2 15647311 Hill 608 Spain Female 41 1 8380
2 3 15619304 Onio 502 France Female 42 8 15966
3 4 15701354 Boni 699 France Female 39 1
4 5 15737888 Mitchell 850 Spain Female 43 2 12551
1 df.shape
(10000, 14)
1 df.describe()
RowNumber CustomerId CreditScore Age Tenure Balan
count 10000.00000 1.000000e+04 10000.000000 10000.000000 10000.000000 10000.00000
mean 5000.50000 1.569094e+07 650.528800 38.921800 5.012800 76485.88928
std 2886.89568 7.193619e+04 96.653299 10.487806 2.892174 62397.40520
min 1.00000 1.556570e+07 350.000000 18.000000 0.000000 0.00000
25% 2500.75000 1.562853e+07 584.000000 32.000000 3.000000 0.00000
50% 5000.50000 1.569074e+07 652.000000 37.000000 5.000000 97198.54000
75% 7500.25000 1.575323e+07 718.000000 44.000000 7.000000 127644.24000
max 10000.00000 1.581569e+07 850.000000 92.000000 10.000000 250898.09000
1 df.columns
Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
'IsActiveMember', 'EstimatedSalary', 'Exited'],
dtype='object')
1 df
RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure
0 1 15634602 Hargrave 619 France Female 42 2
1 2 15647311 Hill 608 Spain Female 41 1
2 3 15619304 Onio 502 France Female 42 8 1
3 4 15701354 Boni 699 France Female 39 1
4 5 15737888 Mitchell 850 Spain Female 43 2 1
... ... ... ... ... ... ... ... ...
9995 9996 15606229 Obijiaku 771 France Male 39 5
9996 9997 15569892 Johnstone 516 France Male 35 10
9997 9998 15584532 Liu 709 France Female 36 7
9998 9999 15682355 Sabbatini 772 Germany Male 42 3
9999 10000 15628319 Walker 792 France Female 28 4 1
10000 rows × 14 columns
https://colab.research.google.com/drive/1Zx8TMXeq4zEzr_v6XR1zI1RWcex3zgBh#printMode=true 1/7
10/29/23, 11:00 PM lab3.ipynb - Colaboratory
1 df.head()
RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Bala
0 1 15634602 Hargrave 619 France Female 42 2
1 2 15647311 Hill 608 Spain Female 41 1 8380
2 3 15619304 Onio 502 France Female 42 8 15966
3 4 15701354 Boni 699 France Female 39 1
4 5 15737888 Mitchell 850 Spain Female 43 2 12551
1 df.tail()
RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure
9995 9996 15606229 Obijiaku 771 France Male 39 5
9996 9997 15569892 Johnstone 516 France Male 35 10
9997 9998 15584532 Liu 709 France Female 36 7
9998 9999 15682355 Sabbatini 772 Germany Male 42 3
9999 10000 15628319 Walker 792 France Female 28 4 1
1 df.head
<bound method NDFrame.head of RowNumber CustomerId Surname CreditScore Geography Gender Age \
0 1 15634602 Hargrave 619 France Female 42
1 2 15647311 Hill 608 Spain Female 41
2 3 15619304 Onio 502 France Female 42
3 4 15701354 Boni 699 France Female 39
4 5 15737888 Mitchell 850 Spain Female 43
... ... ... ... ... ... ... ...
9995 9996 15606229 Obijiaku 771 France Male 39
9996 9997 15569892 Johnstone 516 France Male 35
9997 9998 15584532 Liu 709 France Female 36
9998 9999 15682355 Sabbatini 772 Germany Male 42
9999 10000 15628319 Walker 792 France Female 28
Tenure Balance NumOfProducts HasCrCard IsActiveMember \
0 2 0.00 1 1 1
1 1 83807.86 1 0 1
2 8 159660.80 3 1 0
3 1 0.00 2 0 0
4 2 125510.82 1 1 1
... ... ... ... ... ...
9995 5 0.00 2 1 0
9996 10 57369.61 1 1 1
9997 7 0.00 1 0 1
9998 3 75075.31 2 1 0
9999 4 130142.79 1 1 0
EstimatedSalary Exited
0 101348.88 1
1 112542.58 0
2 113931.57 1
3 93826.63 0
4 79084.10 0
... ... ...
9995 96270.64 0
9996 101699.77 0
9997 42085.58 1
9998 92888.52 1
9999 38190.78 0
[10000 rows x 14 columns]>
1 df.tail
<bound method NDFrame.tail of RowNumber CustomerId Surname CreditScore Geography Gender Age \
0 1 15634602 Hargrave 619 France Female 42
1 2 15647311 Hill 608 Spain Female 41
2 3 15619304 Onio 502 France Female 42
3 4 15701354 Boni 699 France Female 39
4 5 15737888 Mitchell 850 Spain Female 43
... ... ... ... ... ... ... ...
9995 9996 15606229 Obijiaku 771 France Male 39
9996 9997 15569892 Johnstone 516 France Male 35
9997 9998 15584532 Liu 709 France Female 36
9998 9999 15682355 Sabbatini 772 Germany Male 42
9999 10000 15628319 Walker 792 France Female 28
Tenure Balance NumOfProducts HasCrCard IsActiveMember \
https://colab.research.google.com/drive/1Zx8TMXeq4zEzr_v6XR1zI1RWcex3zgBh#printMode=true 2/7
10/29/23, 11:00 PM lab3.ipynb - Colaboratory
0 2 0.00 1 1 1
1 1 83807.86 1 0 1
2 8 159660.80 3 1 0
3 1 0.00 2 0 0
4 2 125510.82 1 1 1
... ... ... ... ... ...
9995 5 0.00 2 1 0
9996 10 57369.61 1 1 1
9997 7 0.00 1 0 1
9998 3 75075.31 2 1 0
9999 4 130142.79 1 1 0
EstimatedSalary Exited
0 101348.88 1
1 112542.58 0
2 113931.57 1
3 93826.63 0
4 79084.10 0
... ... ...
9995 96270.64 0
9996 101699.77 0
9997 42085.58 1
9998 92888.52 1
9999 38190.78 0
[10000 rows x 14 columns]>
1 x=df.drop(['Geography', 'Gender', 'RowNumber', 'Gender', 'Exited','Surname'], axis=1)
2 x
CustomerId CreditScore Age Tenure Balance NumOfProducts HasCrCard IsAct
0 15634602 619 42 2 0.00 1 1
1 15647311 608 41 1 83807.86 1 0
2 15619304 502 42 8 159660.80 3 1
3 15701354 699 39 1 0.00 2 0
4 15737888 850 43 2 125510.82 1 1
... ... ... ... ... ... ... ...
9995 15606229 771 39 5 0.00 2 1
9996 15569892 516 35 10 57369.61 1 1
9997 15584532 709 36 7 0.00 1 0
9998 15682355 772 42 3 75075.31 2 1
9999 15628319 792 28 4 130142.79 1 1
10000 rows × 9 columns
1 df.head()
RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Bala
0 1 15634602 Hargrave 619 France Female 42 2
1 2 15647311 Hill 608 Spain Female 41 1 8380
2 3 15619304 Onio 502 France Female 42 8 15966
3 4 15701354 Boni 699 France Female 39 1
4 5 15737888 Mitchell 850 Spain Female 43 2 12551
1 y=df["Exited"]
2
1 y
0 1
1 0
2 1
3 0
4 0
..
9995 0
9996 0
9997 1
9998 1
9999 0
Name: Exited, Length: 10000, dtype: int64
https://colab.research.google.com/drive/1Zx8TMXeq4zEzr_v6XR1zI1RWcex3zgBh#printMode=true 3/7
10/29/23, 11:00 PM lab3.ipynb - Colaboratory
1 sns.countplot(x=y)
<Axes: xlabel='Exited', ylabel='count'>
Feature Scaling
1 from sklearn.preprocessing import MinMaxScaler
2 o = MinMaxScaler()
1 X_scale=o.fit_transform(x)
2 X_scale
array([[0.27561613, 0.538 , 0.32432432, ..., 1. , 1. ,
0.50673489],
[0.32645436, 0.516 , 0.31081081, ..., 0. , 1. ,
0.56270874],
[0.21442143, 0.304 , 0.32432432, ..., 1. , 0. ,
0.56965435],
...,
[0.07532731, 0.718 , 0.24324324, ..., 0. , 1. ,
0.21039009],
[0.46663653, 0.844 , 0.32432432, ..., 1. , 0. ,
0.46442905],
[0.25048302, 0.884 , 0.13513514, ..., 1. , 0. ,
0.19091423]])
Cross Validation
1 from sklearn.model_selection import train_test_split
2 X_train,X_test,Y_train,Y_test = train_test_split(X_scale,y, random_state=2,test_size=0.25)
1 X_train.shape
(7500, 9)
1 X_test.shape
(2500, 9)
Initialize and build the model
1 from sklearn.neural_network import MLPClassifier
2 ann=MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=100,activation="relu", random_state=2)
1 ann.fit(X_train,Y_train)
https://colab.research.google.com/drive/1Zx8TMXeq4zEzr_v6XR1zI1RWcex3zgBh#printMode=true 4/7
10/29/23, 11:00 PM lab3.ipynb - Colaboratory
C:\ProgramData\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_percept
warnings.warn(
▾ MLPClassifier
MLPClassifier(hidden_layer_sizes=(100, 100, 100), max_iter=100, random_state=2)
1 y_pred=ann.predict(X_test)
1 y_pred
array([0, 0, 0, ..., 0, 0, 1], dtype=int64)
1 from sklearn.metrics import ConfusionMatrixDisplay,accuracy_score,classification_report
1 accuracy_score(Y_test,y_pred)
0.8424
1 print(classification_report(Y_test,y_pred))
precision recall f1-score support
0 0.87 0.94 0.91 2015
1 0.64 0.43 0.51 485
accuracy 0.84 2500
macro avg 0.76 0.68 0.71 2500
weighted avg 0.83 0.84 0.83 2500
1 ConfusionMatrixDisplay.from_predictions(Y_test,y_pred)
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x2201a10c6d0>
1 !pip install imbalanced-learn
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: imbalanced-learn in c:\programdata\anaconda3\lib\site-packages (0.10.1)
Requirement already satisfied: scikit-learn>=1.0.2 in c:\programdata\anaconda3\lib\site-packages (from imbalanced-learn) (1.2.1)
Requirement already satisfied: numpy>=1.17.3 in c:\programdata\anaconda3\lib\site-packages (from imbalanced-learn) (1.23.5)
Requirement already satisfied: scipy>=1.3.2 in c:\programdata\anaconda3\lib\site-packages (from imbalanced-learn) (1.10.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from imbalanced-learn) (2.2.0)
Requirement already satisfied: joblib>=1.1.1 in c:\programdata\anaconda3\lib\site-packages (from imbalanced-learn) (1.1.1)
1 from imblearn.over_sampling import RandomOverSampler
2 ros= RandomOverSampler(random_state=0)
3
1 x_bal,y_bal = ros.fit_resample(x,y)
1 sns.countplot(x=y_bal)
https://colab.research.google.com/drive/1Zx8TMXeq4zEzr_v6XR1zI1RWcex3zgBh#printMode=true 5/7
10/29/23, 11:00 PM lab3.ipynb - Colaboratory
<Axes: xlabel='Exited', ylabel='count'>
1 from sklearn.model_selection import train_test_split
2 X_train,X_test,Y_train,Y_test = train_test_split(x_bal,y_bal, random_state=2,test_size=0.25)
1 X_train.shape
2
(11944, 9)
1 X_test.shape
(3982, 9)
1 from sklearn.neural_network import MLPClassifier
2 ann=MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=100,activation="relu", random_state=2)
1 ann.fit(X_train,Y_train)
▾ MLPClassifier
MLPClassifier(hidden_layer_sizes=(100, 100, 100), max_iter=100, random_state=2)
1 y_pred=ann.predict(X_test)
1 y_pred
2
array([1, 1, 1, ..., 1, 1, 1], dtype=int64)
1 from sklearn.metrics import ConfusionMatrixDisplay,accuracy_score,classification_report
1 accuracy_score(Y_test,y_pred)
0.5012556504269211
https://colab.research.google.com/drive/1Zx8TMXeq4zEzr_v6XR1zI1RWcex3zgBh#printMode=true 6/7
10/29/23, 11:00 PM lab3.ipynb - Colaboratory
https://colab.research.google.com/drive/1Zx8TMXeq4zEzr_v6XR1zI1RWcex3zgBh#printMode=true 7/7