KEMBAR78
Programs Lab Bca | PDF | Applied Mathematics | Statistical Analysis
0% found this document useful (0 votes)
38 views16 pages

Programs Lab Bca

Uploaded by

Gayu Gayu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
38 views16 pages

Programs Lab Bca

Uploaded by

Gayu Gayu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 16

Programs

8. Write a program to demonstrate Regression analysis with residual plots on a given data set.

SOURCE CODE:

import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):

# number of observations/points
n = np.size(x)

# mean of x and y vector


mx = np.mean(x)
my = np.mean(y)

# calculating cross-deviation and deviation about x


sxy = np.sum(y*x) - n*my*mx
sxx = np.sum(x*x) - n*mx*mx

# calculating regression coefficients


b1 = sxy / sxx
b0 = my - b1*mx
return (b0, b1)
def plot_regression_line(x, y, b):

# plotting the actual points as scatter plot


plt.scatter(x, y, color = "m",marker = "o", s = 30)
# predicted response vector
ypred = b[0] + b[1]*x

# plotting the regression line


plt.plot(x, ypred, color = "g")

# putting labels
plt.xlabel('x')
plt.ylabel('y')

# function to show plot


plt.show()
def main():
# observations or data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb0 = {} \nb1 = {}".format(b[0], b[1]))

# plotting regression line


plot_regression_line(x, y, b)
main()

OUTPUT:
Estimated coefficients:
b0 = 1.2363636363636363
b1 = 1.1696969696969697
9. Write a program to demonstrate the working of the decision tree-based ID3 algorithm.

SOURCE CODE:

Importing the required packages import


numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
#from sklearn.cross_validation import train_test_split from
sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score from
sklearn.metrics import classification_report

# Function importing Dataset


def importdata():
balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-'+
'databases/balance-scale/balance-scale.data',sep= ',', header = None)

# Printing the dataswet shape


print ("Dataset Length: ", len(balance_data)) print
("Dataset Shape: ", balance_data.shape)

# Printing the dataset obseravtions


print ("Dataset: ",balance_data.head()) return
balance_data

# Function to split the dataset def


splitdataset(balance_data):

# Separating the target variable


X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]

# Splitting the dataset into train and test


X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)
return X, Y, X_train, X_test, y_train, y_test

# Function to perform training with giniIndex.


def train_using_gini(X_train, X_test, y_train):

# Creating the classifier object


clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100, max_depth=3, min_samples_leaf=5)

# Performing training
clf_gini.fit(X_train, y_train)
return clf_gini
# Function to perform training with entropy.
def tarin_using_entropy(X_train, X_test, y_train):

# Decision tree with entropy


clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100, max_depth = 3,
min_samples_leaf = 5)

# Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy

# Function to make predictions def prediction


(X_test, clf_object):

# Predicton on test with giniIndex


y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred

# Function to calculate accuracy


def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ", confusion_matrix(y_test, y_pred))
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100)
print("Report : ",classification_report(y_test, y_pred))
def main():
# Building Phase
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)

# Operational Phase
print("Results Using Gini Index:")

# Prediction using gini


y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")

# Prediction using entropy


y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)

# Calling main function


if name = =" main ":
main()

OUTPUT:

Dataset Length: 625


Dataset Shape: (625, 5)
Dataset: 0 1 2 3 4
0 B 1 1 1 1
1 R 1 1 1 2
2 R 1 1 1 3
3 R 1 1 1 4
4 R 1 1 1 5
10. Write a program to implement the Naïve Bayesian classifier for a sample training data
set stored as

a .CSV file.

SOURCE CODE:

.CSVfile.

classNaiveBayesClassifier:
def init (self,X,y):
'''Xandydenotesthefeaturesandthetargetlabelsrespectively'''self.X, se
lf.y =X,y
self.N=len(self.X) #Lengthofthetraining set
self.dim=len(self.X[0]) #Dimensionofthevectoroffeatures
self.attrs = [[] for _ in range(self.dim)] # Here we'll store the columns of the training
setself.output_dom={} #Output classes with the number of occurrences in the training set. In this
case we have only 2classes
self.data=[] #To store every
row[Xi,yi]
for iinrange(len(self.X)):
for jinrange(self.dim): #if we have never seen this value for this attrs before, #then we add it to the
attrs array in the corresponding position if not
self.X[i][j]
inself.attrs[j]:
self.attrs[j].append(self.X[i][j]) #if we have never seen this output class before,
#then we add it to the output_domand count one occurrence for now if not
self.y[i]
inself.output_dom.keys():
self.output_dom[self.y[i]]=1
#otherwise, we increment the occurrence of this output in the training set by 1
else: self.output_dom[self.y[i]]+=1

# store the row self.


data.append([self.X[i],self.y[i]])

def classify(self, entry):

solve = None

# Final

resultmax_arg=-1 #partial maximum for y

inself.o utput_dom.keys():
prob=self.output_dom[y]/self.N #P(y) for i
inrange(self.dim):
cases=[x for x inself.data if x[0][i]==entry[i] and x[1]==y] #all rows with
Xi= xin=len(cases)
prob*=n/self.N
P*=P(Xi=xi) #if we have a greater prob for this output than the partial maximum...
if pro b>max_arg:
max_arg = probsolve=y

OUTPUT:
Array([‘Iris_virgincia’,’Iris_versicolor’,’Iris_setosa’,’ Iris_virgincia’,’ Iris_setosa’,
Iris_virgincia’, ’ Iris_setosa’, Iris_virgincia’, ’Iris_versicolor’, Iris_virgincia’, Iris_virgincia’,
Iris_virgincia’, ([‘Iris_virgincia’,’Iris_versicolor’,’Iris_setosa’,’ Iris_virgincia’,’ Iris_setosa’,
Iris_virgincia’, ’ Iris_setosa’, Iris_virgincia’, ’Iris_versicolor’, Iris_virgincia’, Iris_virgincia’,
Iris_virgincia’],dtype=’<U15’)
11. Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set.

SOURCE CODE:

Import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rc Params['font.sans- serif']=['SimHei']#Datageneration
train_num=200
test_num=100
config={ 'Corn':[[150,190],[40,70],[2,4]], 'Potato':[[30,60],[7,10],[1,2]], 'grass':[[10,40],[10,40],[0,1]]
}

plants=list(config.keys()) dataset=pd.DataFrame(columns=['height(cm)','Leaf length(cm)',


'Stemdiameter(cm)', 'type'])

index=0 #Natural
for pinconfig:
for iinrange(int(train_num/3- 3)): row=[]
for j, [min_val,max_val]inenumerate(config[p]): v=round(np.random.rand()*(max_val-
min_val)+min_val,2) while vin dataset[dataset.columns[j]]:
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
row.append(v)
row.append(p)
dataset.loc[index]=rowindex+= 1
#Wrongdata
for inrange(train_num-index):
k=np.random.randint( 3)
p = lants[k] row=[]
for j,[min_val,max_val]
inenumerate(config[p]):
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
while v in dataset[dataset.columns[j]]:
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
row.append(v)
row.append(plants[(k+1)%3])
dataset.loc[index]=row
index+=1
#dataset=dataset.infer_objects()
dataset=dataset.reindex(np.random.permutation(len(dataset)))
dataset.reset_index(drop=True,inplace=True)
dataset.iloc[:int(train_num),:- 1].to_csv('potato_train_data.csv',index=False)dataset.iloc[:int(train_num):,[-
1]].to_csv('potato_train_label.csv',index=False)

defvisualize(dataset,labels,features,classes,fig_size=(10,10),layout=None):plt.figure(figsize=
fig_size)
index=1 if layout==None:
layout=[len(features),1]
foriinrange(len(features)):
forjinrange(i+1,len(features)):
p=plt.subplot(layout[0],layout[1],index)
plt.subplots_adjust(hspace=0.4)
p.set_title(features[i]+'&'+features[j])p.set_xlabel(features[i])p.set_ylabel(features[ j])
forkinrange(len(classes)):
p.scatter(dataset[labels==k,i],dataset[labels==k,j],label=classes[k])p.legend()
index+=1
plt.show()
dataset=pd.read_csv('potato_train_data.csv')
labels=pd.read_csv('potato_train_label.csv')
feature s=list(dataset.keys())
classes= np.array(['Corn', 'Potato', 'grass'])
foriinrange(3):
labels.loc[labels['type']==classes[i],'type']=i
dataset = dataset.values
labels=labels[ 'type'].values
visualize(dataset,labels,features,classes)
OUTPUT:

12. Write a program to implement k-Means clustering algorithm to cluster the set of
data stored in .CSV file.

SOURCE CODE:

Froms klearn.cluster import KMeans


import pandas as pd
import numpy as np
import pickle

#read csv input file


input_data=pd.read_csv("input_data.txt",sep="\t")

#Initialize KMeans object specify in the number of desired


clusterskmeans=KMeans(n_clusters=4)

#learning the cluster from the input data


k means.fit(input_data.values) #output the labels for the input data
print(kmeans.labels_)

#Predict the classification for given data sample


predicted_class = kmeans.predict([[1, 10, 15]])
print(predicted_class)
OUTPUT:

Unnamed=0 unnamed=1 flow report sorted unnamed U by station

U OBS STATION SHIFT EMPLOYEE 0+ NO.OF


ROWS
0 1 Amberst 2 Hyme 1 4
1 2 Goshen 2 Peth 2 4
2 3 Hadley 2 John 3 3
3 4 Holyorce 1 Woxter 4 0
4 5 Holyorce 1 Barb 5 3
5 6 Orange 2 Card 6 5
6 7 Otis 1 Bey 7 0
7 8 Pledom 2 Mike 8 4
8 9 Standard 1 Sam 9 1
9 10 Suttled 2 Lisa 10 1
11 NAN NAN NAN NAN 11 NAN
13. Design a Python script using the Turtle graphics library to construct a turtle bar chart representing the
grades obtained by N students read from a file categorising them into distinction, first class, second class,
third class and failed.

SOURCE CODE:
import turtle
def drawBar (t, height):
""" Get turtle t to draw one bar, of height. """
t.begin_fill () # start filling this shape
t. left (90)
t.forward(height)
t. write (str(height))
t. right (90)
t.forward (40)
t. right (90)
t.forward(height)
t. left (90)
t. end_fill () # stop filling this shape
xs = [48, 117, 200, 240, 160, 260, 220] # here is the data
maxheight = max(xs)
numbars = len(xs)
border = 10
wn = turtle.Screen() # Set up the window and its attributes
wn.setworldcoordinates(0-border, 0-border, 40*numbars+border, maxheight+border)
wn.bgcolor("lightgreen")
tess = turtle.Turtle() # create tess and set some attributes
tess.color("blue")
tess.fillcolor("red")
tess.pensize(3)
for a in xs:
drawBar(tess, a)
wn.exitonclick()

OUTPUT:
14.To implement the program to plot a histogram of the given data.

10-15 15-20 20-25 25-30 30-35

5 6 9 8 2

SOURCE CODE:
import matplotlib.pyplot as plt

# Define the data


labels = ['10-15', '15-20', '20-25', '25-30', '30-35']
data = [5, 6, 9, 8, 2]

# Create the histogram


plt.bar(labels, data)
# Add labels and title
plt.xlabel('Range')
plt.ylabel('Frequency')
plt.title('Histogram of Data')

# Show the
plot
plt.show()

OUTPUT:
15. To implement the program to draw line plot and bar chart of the given data.

Elapsed time (s) 0 1 2 3 4 5 6

Speed(m/s) 0 3 7 12 20 30 45.6

SOURCE CODE:
import matplotlib.pyplot as plt

# Define the data


time = [0, 1, 2, 3, 4, 5, 6]
speed = [0, 3, 7, 12, 20, 30, 45.6]
# Create the line plot
plt.plot(time, speed)

# Add labels and title to the line


plot plt.xlabel('Elapsed time (s)')
plt.ylabel('Speed (m/s)')
plt.title('Line Plot')

# Show the line plot


plt.show()

# Create the bar chart


plt.bar(time, speed)

# Add labels and title to the bar chart


plt.xlabel('Elapsed time (s)')
plt.ylabel('Speed (m/s)')
plt.title('Bar Chart')

# Show the bar chart


plt.show()

OUTPUT:

You might also like