1.
Implement and demonstrate the FIND-S algorithm for finding the most
specific hypothesis based on a given set of training data samples. Read the
training data from a.CSV file.
import csv
from google.colab import files
uploaded = files.upload()
with open('tennis.csv', 'r') as f:
reader = csv.reader(f)
your_list = list(reader)
h = [['0', '0', '0', '0', '0', '0']]
for i in your_list:
print(i)
if i[-1] == "true":
j = 0
for x in i:
if x != "true":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
else:
pass
j = j + 1
print("Maximally specific hypothesis is:")
print(h)
INPUT:
sunny' warm' normal' strong' warm' same' True
sunny' warm' high' strong' warm' same' True
rainy' cold' high' strong' warm' change' False
sunny' warm' high' strong' cool' change' False
OUTPUT
2. For a given set of training data examples stored in a .CSV file, implement
and Demonstrate the Candidate-Elimination algorithm to output a
description of the set of all hypotheses consistent with the training
examples.
import numpy as np
import pandas as pd
data=pd.DataFrame(data=pd.read_csv('/content/tennis.csv'))
concepts=np.array(data.iloc[:,0:-1])
target=np.array(data.iloc[:,-1])
def learn(concepts, target):
specific_h = concepts[0].copy()
print("initialization of specific_h and general_h")
print(specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in
range(len(specific_h))]
print(general_h)
for i, h in enumerate(concepts):
if target[i] == "yes":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x] = '?'
if target[i] == "no":
for x in range(len(specific_h)): # Fixed: Changed 'i' to
'x' to match outer loop
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("steps in candidate elimination algorithm\n", i + 1)
print(specific_h)
print(general_h)
indices = [i for i, val in enumerate(general_h) if val == ['?',
'?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("final specific_h:", s_final, sep="\n")
print("final general_h:", g_final, sep="\n")
INPUT:
sunny' warm' normal' strong' warm' same' yes
sunny' warm' high' strong' warm' same' yes
rainy' cold' high' strong' warm' change' no
sunny' warm' high' strong' cool' change' no
OUTPUT:
initialization of specific_h and general_h
["sunny'" "warm'" "high'" "strong'" "warm'" "same' "]
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
steps in candidate elimination algorithm
1
["sunny'" "warm'" "high'" "strong'" "warm'" "same' "]
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
steps in candidate elimination algorithm
2
["sunny'" "warm'" "high'" "strong'" "warm'" "same' "]
[["sunny'", '?', '?', '?', '?', '?'], ['?', "warm'", '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', "same' "]]
steps in candidate elimination algorithm
3
["sunny'" "warm'" "high'" "strong'" "warm'" "same' "]
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', "warm'", '?'], ['?', '?', '?', '?', '?', "same' "]]
final specific_h:
["sunny'" "warm'" "high'" "strong'" "warm'" "same' "]
final general_h:
[['?', '?', '?', '?', "warm'", '?'], ['?', '?', '?', '?', '?',
"same' "]]
Program 3: Write a program to demonstrate the working of the decision tree
based ID3 algorithm. Use an appropriate data set for building the decision tree
and apply this knowledge to classify a new sample.
import pandas as pd
import numpy as np
dataset = pd.read_csv('/content/data3.csv', names=['outlook',
'temperature', 'humidity', 'wind', 'class'])
def entropy(target_col):
elements, counts = np.unique(target_col, return_counts=True)
entropy =
np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts))
for i in range(len(elements))])
return entropy
def InfoGain(data, split_attribute_name, target_name="class"):
total_entropy = entropy(data[target_name])
vals, counts = np.unique(data[split_attribute_name],
return_counts=True)
Weighted_Entropy =
np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[split_attrib
ute_name]==vals[i]).dropna()[target_name]) for i in range(len(vals))])
Information_Gain = total_entropy - Weighted_Entropy
return Information_Gain
def
ID3(data,originaldata,features,target_attribute_name="class",parent_nod
e_class = None):
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]
elif len(data)==0:
return np.unique(originaldata[target_attribute_name])
[np.argmax(np.unique(originaldata[target_attribute_name],return_counts=
True)[1])]
elif len(features) ==0:
return parent_node_class
else:
parent_node_class = np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name],return_counts=True)
[1])]
item_values = [InfoGain(data,feature,target_attribute_name) for
feature in features]
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]
tree = {best_feature:{}}
features = [i for i in features if i != best_feature]
for value in np.unique(data[best_feature]):
value = value
sub_data = data.where(data[best_feature] == value).dropna()
subtree =
ID3(sub_data,dataset,features,target_attribute_name,parent_node_class)
tree[best_feature][value] = subtree
return tree
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
return metadata, traindata
features = ['outlook', 'temperature', 'humidity', 'wind']
decision_tree = ID3(dataset, dataset, features, "class")
decision_tree
INPUT:
temperatur humidit
outlook e y wind target
sunny hot high weak no
sunny hot high strong no
overcas
t hot high weak yes
rain mild high weak yes
rain cool normal weak yes
rain cool normal strong no
overcas
t cool normal strong yes
sunny mild high weak no
sunny cool normal weak yes
rain mild normal weak yes
sunny mild normal strong yes
overcas
t mild high strong yes
overcas
t hot normal weak yes
rain mild high strong yes
OUTPUT:
Program 4: Build an Artificial Neural Network by implementing the
Backpropagation algorithm and test the same using appropriate data sets.
import numpy as np
X=np.array(([2,9],[1,5],[3,6]),dtype=float)
Y=np.array(([92],[86],[89]),dtype=float)
X=X/np.amax(X,axis=0)
Y=Y/100
def sigmoid (x):
return 1/(1+np.exp(-x)) # Indented this line by 4 spaces
def derivatives_sigmoid(x):
return x*(1-x) # Indented this line by 4 spaces
epoch=7000
lr=0.1
inputlayer_neurons=2
hiddenlayer_neurons=3
output_neurons=1
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
for i in range(epoch):
hinp1=np.dot(X,wh)
hinp=hinp1+bh
hlayer_act=sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp=outinp1+bout
output=sigmoid(outinp)
EO=Y-output
outgrad=derivatives_sigmoid(output)
d_output=EO* outgrad
EH=d_output.dot(wout.T)
hiddengrad=derivatives_sigmoid(hlayer_act)
d_hiddenlayer=EH*hiddengrad
wout+=hlayer_act.T.dot(d_output)*lr
wh+=X.T.dot(d_hiddenlayer)*lr
print("input:\n" + str(X))
print("Actual output:\n" + str(Y))
print("predicted output:\n",output)
input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual output:
[[0.92]
[0.86]
[0.89]]
predicted output:
[[0.89573364]
[0.88044261]
[0.89363697]]
Program 5: Write a program to implement the naïve Bayesian classifier for a
sample training data set stored as a .CSV file. Compute the accuracy of the
classifier, considering few test data sets.
import csv
import random
import math
def loadCsv(filename):
lines = csv.reader(open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def splitDataset(dataset, splitRatio):
#67% training size
trainSize = int(len(dataset) * splitRatio);
trainSet = []
copy = list(dataset);
while len(trainSet) < trainSize:
#generate indices for the dataset list randomly to pick ele for
training data
index = random.randrange(len(copy));
trainSet.append(copy.pop(index))
return [trainSet, copy]
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
# ... (rest of the code remains the same)
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-
1)
return math.sqrt(variance)
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in
zip(*dataset)];
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated = separateByClass(dataset);
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
def calculateProbability(x, mean, stdev):
# Handle zero standard deviation to avoid division by zero
if stdev == 0:
if x == mean:
return 1 # Probability is 1 if x equals the mean and stdev
is 0
else:
return 0 # Probability is 0 if x differs from the mean and
stdev is 0
else:
exponent = math.exp(-(math.pow(x - mean, 2) / (2 *
math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean,
stdev);
return probabilities
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions
def getAccuracy(testSet, predictions):
correct = 0
for i in range(len(testSet)):
if testSet[i][-1] == predictions[i]:
correct += 1
return (correct/float(len(testSet))) * 100.0
def main():
filename = '/content/5data.csv'
splitRatio = 0.67
dataset = loadCsv(filename);
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into train={1} and test={2}
rows'.format(len(dataset),
len(trainingSet), len(testSet)))
# prepare model
summaries = summarizeByClass(trainingSet);
# test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))
#Ensure you have math module imported
import math
main()
INPUT:
1 1 1 1 5
1 1 1 2 5
2 1 1 2 10
3 2 1 1 10
3 3 2 1 10
3 3 2 2 5
2 3 2 2 10
1 2 1 1 5
1 3 2 1 10
3 2 2 2 10
1 2 2 2 10
2 2 1 2 10
2 1 2 1 10
3 2 1 2 5
1 2 1 2 10
1 2 1 2 5
OUTPUT:
Split 16 rows into train=10 and test=6 rows
Accuracy of the classifier is : 16.666666666666664%
Anaconda Jupitor
For a given set of training data examples stored in a .CSV file, implement and
Demonstrate the Candidate-Elimination algorithm to output a description of the
set of all hypotheses consistent with the training examples.
import numpy as np
import pandas as pd
data=pd.DataFrame(data=pd.read_excel('D:/pathmanaban/ram.xlsx'))
concepts=np.array(data.iloc[:,0:-1])
target=np.array(data.iloc[:,-1])
def learn(concepts, target):
specific_h = concepts[0].copy()
print("initialization of specific_h and general_h")
print(specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print(general_h)
for i, h in enumerate(concepts):
if target[i] == "yes":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x] = '?'
if target[i] == "no":
for x in range(len(specific_h)): # Fixed: Changed 'i' to 'x' to match outer loop
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("steps in candidate elimination algorithm\n", i + 1)
print(specific_h)
print(general_h)
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("final specific_h:", s_final, sep="\n")
print("final general_h:", g_final, sep="\n")
Output:
initialization of specific_h and general_h
["sunny'" "warm'" "high'" "strong'" "warm'" "same'"]
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
steps in candidate elimination algorithm
1
["sunny'" "warm'" "high'" "strong'" "warm'" "same'"]
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
steps in candidate elimination algorithm
2
["sunny'" "warm'" "high'" "strong'" "warm'" "same'"]
[["sunny'", '?', '?', '?', '?', '?'], ['?', "warm'", '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?',
'?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', "same'"]]
steps in candidate elimination algorithm
3
["sunny'" "warm'" "high'" "strong'" "warm'" "same'"]
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', "warm'", '?'], ['?', '?', '?', '?', '?', "same'"]]
final specific_h:
["sunny'" "warm'" "high'" "strong'" "warm'" "same'"]
final general_h:
[['?', '?', '?', '?', "warm'", '?'], ['?', '?', '?', '?', '?', "same'"]]