1.
Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data
from a .CSV file.
trainingdata.csv(IN EXCELSHEET CSV FORMAT)
Sky         airTemp     humidity    wind        water       forecast    enjoySport
Sunny       Warm        Normal      Strong      Warm        Same        Yes
Sunny       Warm        High        Strong      Warm        Same        Yes
Rainy       Cold        High        Strong      Warm        Change      No
Sunny       Warm        High        Strong      Cool        Change      Yes
INPUT
import csv
hypo = ['%','%','%','%','%','%'];
with open('trainingdata.csv') as csv_file:
    readcsv = csv.reader(csv_file, delimiter=',')
    print(readcsv)
    data = []
    print("\nThe given training examples are:")
    for row in readcsv:
        print(row)
        if row[len(row)-1].upper() == "YES":
            data.append(row)
print("\nThe positive examples are:");
for x in data:
    print(x);
print("\n");
TotalExamples = len(data);
i=0;
j=0;
k=0;
print("The steps of the Find-s algorithm are :\n",hypo);
list = [];
p=0;
d=len(data[p])-1;
for j in range(d):
     list.append(data[i][j]);
hypo=list;
i=1;
for i in range(TotalExamples):
     for k in range(d):
         if hypo[k]!=data[i][k]:
             hypo[k]='?';
             k=k+1;
         else:
             hypo[k];
     print(hypo);
i=i+1;
print("\nThe maximally specific Find-s hypothesis for the given training
examples is :");
list=[];
for i in range(d):
    list.append(hypo[i]);
print(list);
OUTPUT
<_csv.reader object at 0x0000001781862BA8>
The given training examples are:
['sky', 'airTemp', 'humidity', 'wind', 'water', 'forecast', 'enjoySport']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']
The positive examples are:
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']
The maximally specific Find-s hypothesis for the given training examples is :
['Sunny', 'Warm', '?', 'Strong', '?', '?']
The steps of the Find-s algorithm are :
 ['%', '%', '%', '%', '%', '%']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', '?', '?']
2. For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the
set of all hypotheses consistent with the training examples.
trainingdata.csv(IN EXCELSHEET CSV FORMAT)
 Sky         airTemp     humidity    wind        water       forecast     enjoySport
 Sunny       Warm        Normal      Strong      Warm        Same         Yes
 Sunny       Warm        High        Strong      Warm        Same         Yes
 Rainy       Cold        High        Strong      Warm        Change       No
 Sunny       Warm        High        Strong      Cool        Change       Yes
INPUT
import numpy as np
import pandas as pd
# Loading Data from a CSV File
data = pd.DataFrame(data=pd.read_csv('trainingdata.csv'))
print(data)
# Separating concept features from Target
concepts = np.array(data.iloc[:,0:-1])
print(concepts)
# Isolating target into a separate DataFrame
# copying last column to target array
target = np.array(data.iloc[:,-1])
print(target)
def learn(concepts, target):
    '''
    learn() function implements the learning method of the Candidate
elimination algorithm.
    Arguments:
        concepts - a data frame with all the features
        target - a data frame with corresponding output values
    '''
    # Initialise S0 with the first instance from concepts
    # .copy() makes sure a new list is created instead of just pointing to the
same memory location
    specific_h = concepts[0].copy()
    print("\nInitialization of specific_h and general_h")
    print(specific_h)
    #h=["#" for i in range(0,5)]
    #print(h)
    general_h = [["?" for i in range(len(specific_h))] for i in
range(len(specific_h))]
    print(general_h)
    # The learning iterations
    for i, h in enumerate(concepts):
        # Checking if the hypothesis has a positive target
        if target[i] == "Yes":
            for x in range(len(specific_h)):
                # Change values in S & G only if values change
                if h[x] != specific_h[x]:
                    specific_h[x] = '?'
                    general_h[x][x] = '?'
        # Checking if the hypothesis has a positive target
        if target[i] == "No":
            for x in range(len(specific_h)):
                # For negative hyposthesis change values only    in G
                if h[x] != specific_h[x]:
                    general_h[x][x] = specific_h[x]
                else:
                    general_h[x][x] = '?'
        print("\nSteps of Candidate Elimination Algorithm",i+1)
        print(specific_h)
        print(general_h)
    # find indices where we have empty rows, meaning those that are unchanged
    indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?',
'?', '?', '?']]
    for i in indices:
        # remove those rows from general_h
        general_h.remove(['?', '?', '?', '?', '?', '?'])
    # Return final values
    return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("\nFinal Specific_h:", s_final, sep="\n")
print("\nFinal General_h:", g_final, sep="\n")
OUTPUT
sky airTemp   humidity    wind water   forecast enjoySport
0 Sunny       Warm   Normal Strong     Warm     Same         Yes
1 Sunny       Warm     High Strong     Warm     Same         Yes
2 Rainy       Cold     High Strong     Warm   Change          No
3 Sunny       Warm     High Strong     Cool   Change         Yes
[['Sunny'   'Warm'   'Normal' 'Strong' 'Warm' 'Same']
 ['Sunny'   'Warm'   'High' 'Strong' 'Warm' 'Same']
 ['Rainy'   'Cold'   'High' 'Strong' 'Warm' 'Change']
 ['Sunny'   'Warm'   'High' 'Strong' 'Cool' 'Change']]
['Yes' 'Yes' 'No' 'Yes']
Initialization of specific_h and general_h
['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?']]
Steps of Candidate Elimination Algorithm 1
['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?']]
Steps of Candidate Elimination Algorithm 2
['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?']]
Steps of Candidate Elimination Algorithm 3
['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', 'Same']]
Steps of Candidate Elimination Algorithm 4
['Sunny' 'Warm' '?' 'Strong' '?' '?']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?']]
Final Specific_h:
['Sunny' 'Warm' '?' 'Strong' '?' '?']
Final General_h:
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
3. Write a program to demonstrate the working of the decision tree
based ID3 Algorithm. Use an appropriate data set for building the
decision tree and apply this knowledge to classify a new sample.
tennisdata.csv (IN EXCELSHEET CSV FORMAT)
 Outlook    Temperatu   Humidity   Windy   PlayTenn
            re                             is
 Sunny      Hot         High       False   No
 Sunny      Hot         High       True    No
 Overcast   Hot         High       False   Yes
 Rainy      Mild        High       False   Yes
 Rainy      Cool        Normal     False   Yes
 Rainy      Cool        Normal     True    No
 Overcast   Cool        Normal     True    Yes
 Sunny      Mild        High       False   No
 Sunny      Cool        Normal     False   Yes
 Rainy      Mild        Normal     False   Yes
 Sunny      Mild        Normal     True    Yes
 Overcast   Mild        High       True    Yes
 Overcast   Hot         Normal     False   Yes
 Rainy      Mild        High       True    No
INPUT
import numpy as np
import math
import csv
def read_data(filename):
    with open(filename, 'r') as csvfile:
        datareader = csv.reader(csvfile, delimiter=',')
        headers = next(datareader)
        metadata = []
        traindata = []
        for name in headers:
            metadata.append(name)
        for row in datareader:
            traindata.append(row)
    return (metadata, traindata)
class Node:
    def __init__(self, attribute):
        self.attribute = attribute
        self.children = []
        self.answer = ""
    def __str__(self):
        return self.attribute
def subtables(data, col, delete):
    dict = {}
    items = np.unique(data[:, col])
    count = np.zeros((items.shape[0], 1), dtype=np.int32)
    for x in range(items.shape[0]):
        for y in range(data.shape[0]):
            if data[y, col] == items[x]:
                count[x] += 1
    for x in range(items.shape[0]):
        dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|
S32")
        pos = 0
        for y in range(data.shape[0]):
            if data[y, col] == items[x]:
                dict[items[x]][pos] = data[y]
                pos += 1
        if delete:
            dict[items[x]] = np.delete(dict[items[x]], col, 1)
    return items, dict
def entropy(S):
    items = np.unique(S)
    if items.size == 1:
        return 0
    counts = np.zeros((items.shape[0], 1))
    sums = 0
    for x in range(items.shape[0]):
        counts[x] = sum(S == items[x]) / (S.size * 1.0)
    for count in counts:
        sums += -1 * count * math.log(count, 2)
    return sums
def gain_ratio(data, col):
    items, dict = subtables(data, col, delete=False)
    total_size = data.shape[0]
    entropies = np.zeros((items.shape[0], 1))
    intrinsic = np.zeros((items.shape[0], 1))
    for x in range(items.shape[0]):
        ratio = dict[items[x]].shape[0]/(total_size * 1.0)
        entropies[x] = ratio * entropy(dict[items[x]][:, -1])
        intrinsic[x] = ratio * math.log(ratio, 2)
    total_entropy = entropy(data[:, -1])
    iv = -1 * sum(intrinsic)
    for x in range(entropies.shape[0]):
        total_entropy -= entropies[x]
    return total_entropy / iv
def create_node(data, metadata):
    if (np.unique(data[:, -1])).shape[0] == 1:
        node = Node("")
        node.answer = np.unique(data[:, -1])[0]
        return node
    gains = np.zeros((data.shape[1] - 1, 1))
    for col in range(data.shape[1] - 1):
        gains[col] = gain_ratio(data, col)
    split = np.argmax(gains)
    node = Node(metadata[split])
    metadata = np.delete(metadata, split, 0)
    items, dict = subtables(data, split, delete=True)
    for x in range(items.shape[0]):
        child = create_node(dict[items[x]], metadata)
        node.children.append((items[x], child))
    return node
def empty(size):
    s = ""
    for x in range(size):
        s += "   "
    return s
def print_tree(node, level):
    if node.answer != "":
        print(empty(level), node.answer)
        return
    print(empty(level), node.attribute)
    for value, n in node.children:
        print(empty(level + 1), value)
        print_tree(n, level + 2)
metadata, traindata = read_data("tennisdata.csv")
data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)
OUTPUT
Outlook
    Overcast
       b'Yes'
    Rainy
       Windy
          b'False'
              b'Yes'
          b'True'
              b'No'
    Sunny
       Humidity
          b'High'
              b'No'
          b'Normal'
              b'Yes'
4. Build an Artificial Neural Network by implementing the Backpropagation
algorithm and test the same using appropriate data sets.
INPUT
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)         # X = (hours sleeping,
hours studying)
y = np.array(([92], [86], [89]), dtype=float)               # y = score on test
# scale units
X = X/np.amax(X, axis=0)          # maximum of X array
y = y/100                         # max test score is 100
class Neural_Network(object):
    def __init__(self):
                            # Parameters
        self.inputSize = 2
        self.outputSize = 1
        self.hiddenSize = 3
                              # Weights
        self.W1 = np.random.randn(self.inputSize, self.hiddenSize)            #
(3x2) weight matrix from input to hidden layer
        self.W2 = np.random.randn(self.hiddenSize, self.outputSize)           #
(3x1) weight matrix from hidden to output layer
    def forward(self, X):
                             #forward propagation   through our network
        self.z = np.dot(X, self.W1)                 # dot product of X (input)
and first set of 3x2 weights
        self.z2 = self.sigmoid(self.z)              # activation function
        self.z3 = np.dot(self.z2, self.W2)          # dot product of hidden
layer (z2) and second set of 3x1 weights
        o = self.sigmoid(self.z3)                   # final activation function
        return o
    def sigmoid(self, s):
        return 1/(1+np.exp(-s))       # activation function
    def sigmoidPrime(self, s):
        return s * (1 - s)          # derivative of sigmoid
    def backward(self, X, y, o):
                                    # backward propgate through the network
        self.o_error = y - o        # error in output
        self.o_delta = self.o_error*self.sigmoidPrime(o) # applying derivative
of sigmoid to
        self.z2_error = self.o_delta.dot(self.W2.T)    # z2 error: how much
our hidden layer weights contributed to output error
        self.z2_delta = self.z2_error*self.sigmoidPrime(self.z2) # applying
derivative of sigmoid to z2 error
        self.W1 += X.T.dot(self.z2_delta)       # adjusting first set (input
--> hidden) weights
        self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden
--> output) weights
    def train (self, X, y):
        o = self.forward(X)
        self.backward(X, y, o)
NN = Neural_Network()
for i in range(1000): # trains the NN 1,000 times
    print ("\nInput: \n" + str(X))
    print ("\nActual Output: \n" + str(y))
    print ("\nPredicted Output: \n" + str(NN.forward(X)))
    print ("\nLoss: \n" + str(np.mean(np.square(y - NN.forward(X)))))     #
mean sum squared loss)
    NN.train(X, y)
OUTPUT
Input:
[[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]
Actual Output:
[[0.92]
 [0.86]
 [0.89]]
Predicted Output:
[[0.47212874]
 [0.42728946]
 [0.40891365]]
Loss:
0.20642371917499927
Input:
[[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]
Actual Output:
[[0.92]
 [0.86]
 [0.89]]
Predicted Output:
[[0.55398066]
 [0.49831918]
 [0.50254468]]
Loss:
0.13830159742519685
333 0.55555556]
 [1.         0.66666667]]
Actual Output:
[[0.92]
 [0.86]
 [0.89]]
Predicted Output:
[[0.90738512]
 [0.85762296]
 [0.90442507]]
Loss:
0.0001242893939220947
Input:
[[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]
Actual Output:
[[0.92]
 [0.86]
 [0.89]]
Predicted Output:
[[0.90738861]
 [0.85762488]
 [0.90442029]]
Loss:
0.0001242110058786677
..
..
..
CODE EXCUTES CONTI……
..
Input:
[[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]
Actual Output:
[[0.92]
 [0.86]
 [0.89]]
Predicted Output:
[[0.90939036]
 [0.85857836]
 [0.90083978]]
Loss:
7.736212176292079e-05
Input:
[[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]
Actual Output:
[[0.92]
 [0.86]
 [0.89]]
Predicted Output:
[[0.90939277]
 [0.85857961]
 [0.90083551]]
Loss:
7.731308968994962e-05
5. Write a program to implement the naïve Bayesian classifier for a sample
training data set stored as a .CSV file. Compute the accuracy of the classifier,
considering few test data sets.
tennisdata.csv (IN EXCELSHEET CSV FORMAT)
 Outlook    Temperatu   Humidity   Windy   PlayTenn
            re                             is
 Sunny      Hot         High       False   No
 Sunny      Hot         High       True    No
 Overcast   Hot         High       False   Yes
 Rainy      Mild        High       False   Yes
 Rainy      Cool        Normal     False   Yes
 Rainy      Cool        Normal     True    No
 Overcast   Cool        Normal     True    Yes
 Sunny      Mild        High       False   No
 Sunny      Cool        Normal     False   Yes
 Rainy      Mild        Normal     False   Yes
 Sunny      Mild        Normal     True    Yes
 Overcast   Mild        High       True    Yes
 Overcast   Hot         Normal     False   Yes
 Rainy      Mild        High       True    No
INPUT
# import necessary libarities
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
# load data from CSV
data = pd.read_csv('tennisdata.csv')
print("THe first 5 values of data is :\n",data.head())
# obtain Train data and Train output
X = data.iloc[:,:-1]
print("\nThe First 5 values of train data is\n",X.head())
y = data.iloc[:,-1]
print("\nThe first 5 values of Train output is\n",y.head())
# Convert then in numbers
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)
le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)
le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)
print("\nNow the Train data is :\n",X.head())
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)
classifier = GaussianNB()
classifier.fit(X_train,y_train)
from sklearn.metrics import accuracy_score
print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))
OUTPUT
THe first 5 values of data is :
     Outlook Temperature Humidity     Windy PlayTennis
0     Sunny         Hot     High     False         No
1     Sunny         Hot     High      True         No
2 Overcast          Hot     High     False        Yes
3     Rainy        Mild     High     False        Yes
4     Rainy        Cool   Normal     False        Yes
The First 5 values of train data is
     Outlook Temperature Humidity Windy
0     Sunny         Hot     High False
1     Sunny         Hot     High    True
2 Overcast          Hot     High False
3     Rainy        Mild     High False
4     Rainy        Cool   Normal False
The first 5 values of Train output is
  0    No
1     No
2    Yes
3    Yes
4    Yes
Name: PlayTennis, dtype: object
Now the Train data is :
    Outlook Temperature     Humidity   Windy
0        2            1           0       0
1        2            1           0       1
2        0            1           0       0
3        1            2           0       0
4        1            0           1       0
Now the Train output is
 [0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Accuracy is: 0.6666666666666666
6. Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Built-in Java classes/API can be used to write
the program. Calculate the accuracy, precision, and recall for your data set.
document.csv(IN EXCELSHEET CSV FORMAT)
 I love this sandwich                       pos
 This is an amazing place                   pos
I feel very good about these beers      pos
This is my best work                    pos
What an awesome view                    pos
I do not like this restaurant           neg
I am tired of this stuff                neg
I can't deal with this                  neg
He is my sworn enemy                    neg
My boss is horrible                     neg
This is an awesome place                pos
I do not like the taste of this juice   neg
I love to dance                         pos
I am sick and tired of this place       neg
What a great holiday                    pos
That is a bad locality to stay          neg
We will have good fun tomorrow          pos
I went to my enemy's house today        neg
INPUT
import pandas as pd
msg = pd.read_csv('document.csv', names=['message', 'label'])
print("Total Instances of Dataset: ", msg.shape[0])
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})
X = msg.message
y = msg.labelnum
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y)
from sklearn.feature_extraction.text import CountVectorizer
count_v = CountVectorizer()
Xtrain_dm = count_v.fit_transform(Xtrain)
Xtest_dm = count_v.transform(Xtest)
df = pd.DataFrame(Xtrain_dm.toarray(),columns=count_v.get_feature_names())
print(df[0:5])
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(Xtrain_dm, ytrain)
pred = clf.predict(Xtest_dm)
for doc, p in zip(Xtrain, pred):
    p = 'pos' if p == 1 else 'neg'
    print("%s -> %s" % (doc, p))
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score,
recall_score
print('Accuracy Metrics: \n')
print('Accuracy: ', accuracy_score(ytest, pred))
print('Recall: ', recall_score(ytest, pred))
print('Precision: ', precision_score(ytest, pred))
print('Confusion Matrix: \n', confusion_matrix(ytest, pred))
OUTPUT
Total Instances of Dataset:           18
about    am    an   and   awesome     bad   beers    best    boss    can   ...   tired   to   \
0        0     1    0     1           0     0        0       0       0     0 ...         1    0
1        0     0    0     0           0     0        0       0       0     0 ...         0    0
2        0     0    0     0           0     0        0       0       0     0 ...         0    0
3        0     0    0     0           0     0        0       0       0     1 ...         0    0
4        0     0    0     0           0     0        0       0       0     0 ...         0    0
     today    tomorrow    very   we   went    will    with    work
0        0           0       0    0      0       0       0       0
1        0           0       0    0      0       0       0       0
2        0           0       0    0      0       0       0       0
3        0           0       0    0      0       0       1       0
4        0           0       0    0      0       0       0       0
[5 rows x 49 columns]
I   am sick and tired of this place -> pos
I   do not like the taste of this juice -> neg
I   love this sandwich -> neg
I   can't deal with this -> pos
I   do not like this restaurant -> neg
Accuracy Metrics:
Accuracy: 0.6
Recall: 0.5
Precision: 1.0
Confusion Matrix:
 [[1 0]
 [2 2]]
7. Write a program to construct a Bayesian network considering medical data. Use
this model to demonstrate the diagnosis of heart patients using standard Heart
Disease Data Set. You can use Java/Python ML library classes/API.
heartdisease.csv (IN EXCELSHEET CSV FORMAT)
 Age       Gender      Family    diet    Lifestyle                     cholestrol   heartdisease
 0         0           1         1       3                             0            1
 0         1           1         1       3                             0            1
 1         0           0         0       2                             1            1
 4         0           1         1       3                             2            0
 3         1           1         0       0                             2            0
 2         0           1         1       1                             0            1
 4         0           1         0       2                             0            1
 0         0           1         1       3                             0            1
 3         1           1         0       0                             2            0
 1         1           0         0       0                             2            1
 4         1           0         1       2                             0            1
 4         0           1         1       3                             2            0
2              1             0           0          0            0             0
2              0             1           1          1            0             1
3              1             1           0          0            1             0
0              0             1           0          0            2             1
1              1             0           1          2            1             1
3              1             1           1          0            1             0
4              0             1           1          3            2             0
import pandas as pd
data=pd.read_csv("heartdisease.csv")
heart_disease=pd.DataFrame(data)
print(heart_disease)
from pgmpy.models import BayesianModel
model=BayesianModel([
('age','Lifestyle'),
('Gender','Lifestyle'),
('Family','heartdisease'),
('diet','cholestrol'),
('Lifestyle','diet'),
('cholestrol','heartdisease'),
('diet','cholestrol')
])
from pgmpy.estimators import MaximumLikelihoodEstimator
model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)
from pgmpy.inference import VariableElimination
HeartDisease_infer = VariableElimination(model)
print('For age Enter { SuperSeniorCitizen:0, SeniorCitizen:1, MiddleAged:2,
Youth:3, Teen:4 }')
print('For Gender Enter { Male:0, Female:1 }')
print('For Family History Enter { yes:1, No:0 }')
print('For diet Enter { High:0, Medium:1 }')
print('For lifeStyle Enter { Athlete:0, Active:1, Moderate:2, Sedentary:3 }')
print('For cholesterol Enter { High:0, BorderLine:1, Normal:2 }')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={
    'age':int(input('Enter age :')),
    'Gender':int(input('Enter Gender :')),
    'Family':int(input('Enter Family history :')),
    'diet':int(input('Enter diet :')),
    'Lifestyle':int(input('Enter Lifestyle :')),
    'cholestrol':int(input('Enter cholestrol :'))
    })
print(q['heartdisease'])
OUTPUT
age   Gender       Family   diet   Lifestyle   cholestrol   heartdisease
0      0            0        1      1           3            0             1
1      0            1        1      1           3            0             1
2      1            0        0      0           2            1             1
3       4     0        1     1           3           2              0
4       3     1        1     0           0           2              0
5       2     0        1     1           1           0              1
6       4     0        1     0           2           0              1
7       0     0        1     1           3           0              1
8       3     1        1     0           0           2              0
9       1     1        0     0           0           2              1
10      4     1        0     1           2           0              1
11      4     0        1     1           3           2              0
12      2     1        0     0           0           0              0
13      2     0        1     1           1           0              1
14      3     1        1     0           0           1              0
15      0     0        1     0           0           2              1
16      1     1        0     1           2           1              1
17      3     1        1     1           0           1              0
18      4     0        1     1           3           2              0
For age Enter { SuperSeniorCitizen:0, SeniorCitizen:1, MiddleAged:2, Youth:3,
Teen:4 }
For Gender Enter { Male:0, Female:1 }
For Family History Enter { yes:1, No:0 }
For diet Enter { High:0, Medium:1 }
For lifeStyle Enter { Athlete:0, Active:1, Moderate:2, Sedentary:3 }
For cholesterol Enter { High:0, BorderLine:1, Normal:2 }
Enter age :1
Enter Gender :1
Enter Family history :0
Enter diet :1
Enter Lifestyle :0
Enter cholestrol :1
+----------------+---------------------+
| heartdisease   |   phi(heartdisease) |
+================+=====================+
| heartdisease_0 |              0.0000 |
+----------------+---------------------+
| heartdisease_1 |              1.0000 |
+----------------+---------------------+
8. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same
data set for clustering using k-Means algorithm. Compare the results of these two
algorithms and comment on the quality of clustering. You can add Java/Python ML
library classes/API in the program.
INPUT
from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.mixture import GaussianMixture
from sklearn.datasets import load_iris
import sklearn.metrics as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset=load_iris()
# print(dataset)
X=pd.DataFrame(dataset.data)
X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y=pd.DataFrame(dataset.target)
y.columns=['Targets']
# print(X)
plt.figure(figsize=(14,7))
colormap=np.array(['red','lime','black'])
# REAL PLOT
plt.subplot(1,3,1)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40)
plt.title('Real')
# K-PLOT
plt.subplot(1,3,2)
model=KMeans(n_clusters=3)
model.fit(X)
predY=np.choose(model.labels_,[0,1,2]).astype(np.int64)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40)
plt.title('KMeans')
# GMM PLOT
scaler=preprocessing.StandardScaler()
scaler.fit(X)
xsa=scaler.transform(X)
xs=pd.DataFrame(xsa,columns=X.columns)
gmm=GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm=gmm.predict(xs)
plt.subplot(1,3,3)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40)
plt.title('GMM Classification')
OUTPUT
Text(0.5, 1.0, 'GMM Classification')
9. Write a program to implement k-Nearest Neighbour algorithm to classify the
iris data set. Print both correct and wrong predictions. Java/Python ML library
classes can be used for this problem.
INPUT
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np
dataset=load_iris()
#print(dataset)
X_train,X_test,y_train,y_test=train_test_split(dataset["data"],dataset["target
"],random_state=0)
kn=KNeighborsClassifier(n_neighbors=1)
kn.fit(X_train,y_train)
for i in range(len(X_test)):
    x=X_test[i]
    x_new=np.array([x])
    prediction=kn.predict(x_new)
    print("TARGET=",y_test[i],dataset["target_names"]
[y_test[i]],"PREDICTED=",prediction,dataset["target_names"][prediction])
print(kn.score(X_test,y_test))
OUTPUT
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [2] ['virginica']
0.9736842105263158
10. Implement the non-parametric Locally Weighted Regression Algorithm in
order to fit data points. Select appropriate data set for your experiment and draw
graphs.
INPUT
from math import ceil
import numpy as np
from scipy import linalg
def lowess(x, y, f, iterations):
    n = len(x)
    r = int(ceil(f * n))
    h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
    w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
    w = (1 - w ** 3) ** 3
    yest = np.zeros(n)
    delta = np.ones(n)
    for iteration in range(iterations):
        for i in range(n):
            weights = delta * w[:, i]
            b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
            A = np.array([[np.sum(weights), np.sum(weights * x)],
[np.sum(weights * x), np.sum(weights * x * x)]])
            beta = linalg.solve(A, b)
            yest[i] = beta[0] + beta[1] * x[i]
         residuals = y - yest
         s = np.median(np.abs(residuals))
         delta = np.clip(residuals / (6.0 * s), -1, 1)
         delta = (1 - delta ** 2) ** 2
    return yest
import math
n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f =0.25
iterations=3
yest = lowess(x, y, f, iterations)
import matplotlib.pyplot as plt
plt.plot(x,y,"r.")
plt.plot(x,yest,"b-")
OUTPUT
[<matplotlib.lines.Line2D at 0x37459696d8>]