GlassClassification
September 2, 2017
In [1]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
In [2]: data = pd.read_csv("GLASS/glass.csv")
In [3]: data.shape
Out[3]: (214, 10)
In [5]: data.head(10)
Out[5]: RI Na Mg Al Si K Ca Ba Fe Type
0 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0.0 0.00 1
1 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0.0 0.00 1
2 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0.0 0.00 1
3 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0.0 0.00 1
4 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0.0 0.00 1
5 1.51596 12.79 3.61 1.62 72.97 0.64 8.07 0.0 0.26 1
6 1.51743 13.30 3.60 1.14 73.09 0.58 8.17 0.0 0.00 1
7 1.51756 13.15 3.61 1.05 73.24 0.57 8.24 0.0 0.00 1
8 1.51918 14.04 3.58 1.37 72.08 0.56 8.30 0.0 0.00 1
9 1.51755 13.00 3.60 1.36 72.99 0.57 8.40 0.0 0.11 1
In [6]: data.dtypes
Out[6]: RI float64
Na float64
Mg float64
Al float64
Si float64
K float64
Ca float64
Ba float64
Fe float64
Type int64
dtype: object
1
In [7]: data.shape
Out[7]: (214, 10)
In [8]: data.dropna(axis = 0, inplace = True)
In [9]: data.head()
Out[9]: RI Na Mg Al Si K Ca Ba Fe Type
0 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0.0 0.0 1
1 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0.0 0.0 1
2 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0.0 0.0 1
3 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0.0 0.0 1
4 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0.0 0.0 1
In [10]: data.shape
Out[10]: (214, 10)
In [11]: from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
/home/siddharth/Documents/Python_virtual/anaconda/envs/py35/lib/python3.5/site-pack
"This module will be removed in 0.20.", DeprecationWarning)
In [12]: target = data['Type']
In [13]: data.drop("Type", axis =1, inplace =True)
In [14]: X_train, X_test,y_train, y_test = train_test_split(data, target,test_size=
In [15]: X_train.head()
Out[15]: RI Na Mg Al Si K Ca Ba Fe
145 1.51839 12.85 3.67 1.24 72.57 0.62 8.68 0.0 0.35
152 1.51779 13.64 3.65 0.65 73.00 0.06 8.93 0.0 0.00
20 1.51750 12.82 3.55 1.49 72.75 0.54 8.52 0.0 0.19
91 1.51605 12.90 3.44 1.45 73.06 0.44 8.27 0.0 0.00
62 1.52172 13.51 3.86 0.88 71.79 0.23 9.54 0.0 0.11
In [16]: from id3 import Id3Estimator
from id3 import export_graphviz
In [17]: estimator = Id3Estimator()
In [18]: estimator.fit(X_train, y_train)
Out[18]: Id3Estimator(gain_ratio=False, is_repeating=False, max_depth=None,
min_entropy_decrease=0.0, min_samples_split=2, prune=False)
2
In [19]: export_graphviz(estimator.tree_, 'tree.dot', X_train.columns)
Out[19]: <_io.TextIOWrapper name='tree.dot' mode='w' encoding='utf8'>
In [20]: pred = estimator.predict(X_test)
In [21]: acc= accuracy_score(pred, y_test)
In [24]: print("Accuracy Of The Classifier=",acc )
Accuracy Of The Classifier= 0.662790697674
In [ ]: