lOMoARcPSD|38377887
Machine Learning - ML practical
Machine Learning (University of Mumbai)
Scan to open on Studocu
Studocu is not sponsored or endorsed by any college or university
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PRACTICAL - 01
AIM : To study NumPy and Pandas library in Python
PYTHON CODE :
# Install numpy
!pip install numpy
# Import numpy library
import numpy as np
# Create a 1-D array A.
A = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
# Print elements and type of A.
print(A)
print(type(A))
# Create a 2-D array B. Print its elements and type.
B = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
print(B)
print(type(B))
# Create a 3-D array C. Print its elements and type.
C = np.array([[[1, 2], [3, 4], [5, 6]],
[[7, 8], [9, 10], [11, 12]],
[[13, 14], [15, 16], [17, 18]]])
print(C)
print(type(C))
# Print 3rd element of A
print(A[3])
# Print 2nd element of 3rd row of B (fixed index)
print(B[2,2]) #Corrected: B[2,2] (Python indexing starts at 0)
# Print 1st element of 2nd array of the 2nd array from C.
print(C[2,2,1]) #Corrected: No change needed, C[2,2,1] is valid
# Print last elements of C.
print(B[-1,-1]) #Corrected: B is a 2D array, hence use only two indices
# Slice elements from index 1 to 5 of A.
print(A[1:5])
# Slice elements from the beginning to 4th element of A
print(A[:5])
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
# Slice elements from 3rd element to the end of A.
print(A[3:])
# From the 2nd element of B, slice elements from 2 to 3 (fixed error)
print(B[2,2:3]) #Corrected: The column index 3 is out of range, so use 2:3 to get the last element
# Create an array fruit that contains names of the fruits.
fruit = np.array(['banana', 'apple', 'cherry', 'orange'])
# Reshape array A into a new 2-D array A1.
A1 = A.reshape(3, 3)
# Flatten the array C.
C1 = C.reshape(-1)
# Iterate through all elements of A to print them.
for x in A:
print(x)
# Iterate through all elements of B to print them.
for i in B:
for j in i:
print(j)
# Iterate through all elements of C to print them.
for i in C:
for j in i:
for k in j:
print(k)
# Create an array A3 from A, where all elements of A3 are 5 times elements of A. Join A and A3
into A3
A3 = 5 * A
print(A)
print(A3)
A3 = np.concatenate((A, A3))
# Create two 2-D arrays and perform element-wise multiplication.
P = np.array([[1, 2, 3],
[4, 5, 6]])
Q = np.array([[2, -7, 5],
[-6, 2, 0]])
R = np.multiply(P, Q)
# Create two 2-D arrays and perform multiplication.
P = np.array([[1, 2, 3],
[4, 5, 6]])
Q = np.array([[2, -7],
[-6, 2],
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
[1, 5]])
R = np.dot(P, Q)
# Find the square of all elements of B.
np.square(B)
# Find the addition of all elements of C.
np.sum(C)
# Find 1/√(ex-1) for all elements of C, where x is an element of C.
print(np.reciprocal(np.sqrt((np.exp(C) - 1))))
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PYTHON CODE :
import pandas as pd
# Read data from the CSV file into a DataFrame df
df = pd.read_csv('nba.csv')
# Check the size of the DataFrame to confirm it has the expected number of rows
print(f"Rows: {df.shape[0]}, Columns: {df.shape[1]}")
# Display the first few rows to check the data
print(df.head())
# Ensure there are at least 10 rows before accessing the 10th entry
if len(df) > 9:
print(df.iloc[9]) # This accesses the 10th row (index 9)
else:
print("The dataset has fewer than 10 rows.")
# Drop rows that are completely empty (if there are any)
df.dropna(how='all', inplace=True)
print(f"Rows after dropping empty ones: {df.shape[0]}")
# Access the 10th entry again, after cleaning the data
if len(df) > 9:
print(df.iloc[9]) # Access the 10th row after cleaning
else:
print("The dataset has fewer than 10 rows.")
# Additional checks
# Display the details of the player whose age is greater than 25.
print(df[df['Age'] > 25])
# Display the details of the player whose age is greater than 25 and belongs to team "Bolton
Celtics" or "Utah Jazz"
# Ensure column names match the dataset exactly and fix possible typos.
df[df['Age'] > 25 & df['Team'].isin(['Bolton Celtics', 'Utah Jazz'])]
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PRACTICAL – 02
AIM : To implement linear regression using python.
PYTHON CODE :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Generate random dataset
np.random.seed(0)
x = np.random.rand(100, 1) # Generate a 2-D array with 100 rows, each containing 1 random
number
y = 2 + 3 * x + np.random.rand(100, 1) # y = 2 + 3x + random noise
# Model initialization
regression_model = LinearRegression()
# Fit the model (train the model)
regression_model.fit(x, y)
# Predict
y_predicted = regression_model.predict(x)
# Model evaluation
rmse = np.sqrt(mean_squared_error(y, y_predicted)) # Root Mean Squared Error (RMSE)
r2 = r2_score(y, y_predicted)
# Printing values
print('Slope:', regression_model.coef_)
print('Intercept:', regression_model.intercept_)
print('Root Mean Squared Error (RMSE):', rmse)
print('R2 Score:', r2)
# Plotting values
# Data points
plt.scatter(x, y, s=10, label='Data points')
# Predicted values
plt.plot(x, y_predicted, color='r', label='Regression line')
# Labels and title
plt.xlabel('x-Values from 0-1')
plt.ylabel('y-values from 2-5')
plt.legend()
# Display the plot
plt.show()
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PRACTICAL – 03
AIM : To study logistic regression using python
PYTHON CODE :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
# Generate simple dataset (binary classification)
x = np.array([[0], [1]]) # Feature: 0 and 1
y = np.array([0, 1]) # Labels: 0 for x=0, 1 for x=1
# Model initialization
log_reg_model = LogisticRegression()
# Fit the model (train the model)
log_reg_model.fit(x, y)
# Predict using the model
y_predicted = log_reg_model.predict(x)
# Model evaluation: accuracy
accuracy = (y_predicted == y).mean()
# Print model parameters and accuracy
print('Slope:', log_reg_model.coef_)
print('Intercept:', log_reg_model.intercept_)
print('Accuracy:', accuracy)
# Plotting
plt.scatter(x, y, color='blue', label='Data points')
x_boundary = np.linspace(-0.1, 1.1, 300).reshape(-1, 1)
y_boundary = log_reg_model.predict(x_boundary)
# Plot the decision boundary
plt.plot(x_boundary, y_boundary, color='red', label='Decision boundary')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PRACTICAL – 04
AIM : To study KNN using python.
PYTHON CODE :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
# Generate simple dataset (binary classification)
x = np.array([[1], [2], [3], [4], [5], [6], [7], [8]]) # Feature values
y = np.array([0, 0, 0, 1, 1, 1, 1, 1]) # Labels: 0 for small values, 1 for large values
# Model initialization (using 3 nearest neighbors)
knn = KNeighborsClassifier(n_neighbors=3)
# Fit the model (train the model)
knn.fit(x, y)
# Predict using the model
x_test = np.array([[2.5], [6.5]]) # New data points to predict
y_predicted = knn.predict(x_test)
# Model evaluation: Print predictions
print("Predictions:", y_predicted)
# Plotting
plt.scatter(x, y, color='blue', label='Data points') # Original data points
plt.scatter(x_test, y_predicted, color='red', label='Predictions') # Predicted points
# Plot decision boundary
x_boundary = np.linspace(0, 8, 100).reshape(-1, 1)
y_boundary = knn.predict(x_boundary)
plt.plot(x_boundary, y_boundary, color='green', label='Decision boundary')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PRACTICAL – 05
AIM : To study Decision Tree using Python.
PYTHON CODE :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
# Generate a simple 2D dataset (binary classification)
x = np.array([[1, 2], [2, 3], [3, 3], [4, 5], [5, 4], [6, 7], [7, 8], [8, 9]]) # Feature values
y = np.array([0, 0, 0, 1, 1, 1, 1, 1]) # Labels: 0 for small values, 1 for large values
# Model initialization
dt_classifier = DecisionTreeClassifier()
# Fit the model (train the model)
dt_classifier.fit(x, y)
# Predict using the model
x_test = np.array([[2.5, 3.5], [6.5, 7.5]]) # New data points to predict
y_predicted = dt_classifier.predict(x_test)
# Model evaluation: Print predictions
print("Predictions:", y_predicted)
# Visualize the Decision Tree
plt.figure(figsize=(12, 8))
tree.plot_tree(dt_classifier, filled=True, feature_names=["Feature 1", "Feature 2"],
class_names=["Class 0", "Class 1"], rounded=True)
plt.title("Decision Tree Classifier Visualization")
plt.show()
# Plotting the decision boundary in 2D space
plt.figure(figsize=(8, 6))
# Create a meshgrid to plot the decision boundary
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# Predict the labels for all points in the meshgrid
Z = dt_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the decision boundary
plt.contourf(xx, yy, Z, alpha=0.4, cmap='coolwarm')
# Plot the original data points
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
plt.scatter(x[:, 0], x[:, 1], c=y, edgecolor='black', s=100, cmap='coolwarm', marker='^',
label='Data points')
# Plot the predicted test points
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_predicted, edgecolor='black', s=150, marker='o',
label='Predictions')
# Labels and Title
plt.xlabel('Feature 1', fontsize=12)
plt.ylabel('Feature 2', fontsize=12)
plt.title('Decision Tree - 2D Classification and Decision Boundary', fontsize=14)
plt.legend()
# Show the plot
plt.show()
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PRACTICAL – 06
AIM : To study SVM using Python.
PYTHON CODE :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.datasets import make_classification
# Generate a simple 2D dataset for classification with 2 informative features
x, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0,
n_classes=2, random_state=42)
# Initialize the Support Vector Machine (SVM) model with a linear kernel
svm_model = SVC(kernel='linear', C=1)
# Fit the model (train the model)
svm_model.fit(x, y)
# Predict using the model
y_predicted = svm_model.predict(x)
# Visualize the decision boundary
plt.figure(figsize=(8, 6))
# Create a meshgrid to plot the decision boundary
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# Predict the labels for all points in the meshgrid
Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the decision boundary
plt.contourf(xx, yy, Z, alpha=0.4, cmap='coolwarm')
# Plot the original data points
plt.scatter(x[:, 0], x[:, 1], c=y, edgecolor='black', s=100, cmap='coolwarm', marker='^',
label='Data points')
# Plot the support vectors
plt.scatter(svm_model.support_vectors_[:, 0], svm_model.support_vectors_[:, 1], s=200,
facecolors='none', edgecolors='red', marker='o', label='Support Vectors')
# Labels and Title
plt.xlabel('Feature 1', fontsize=12)
plt.ylabel('Feature 2', fontsize=12)
plt.title('SVM with Linear Kernel - Decision Boundary and Support Vectors', fontsize=14)
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
plt.legend()
# Show the plot
plt.show()
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PRACTICAL – 07
AIM : To study Random-Forest Algorithm using Python.
PYTHON CODE :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
# Generate a simple 2D dataset for classification
x, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0,
n_classes=2, random_state=42)
# Initialize the Random Forest model with 100 trees
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
# Fit the model (train the model)
rf_model.fit(x, y)
# Predict using the model
y_predicted = rf_model.predict(x)
# Visualize the decision boundary
plt.figure(figsize=(8, 6))
# Create a meshgrid to plot the decision boundary
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# Predict the labels for all points in the meshgrid
Z = rf_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the decision boundary
plt.contourf(xx, yy, Z, alpha=0.4, cmap='coolwarm')
# Plot the original data points
plt.scatter(x[:, 0], x[:, 1], c=y, edgecolor='black', s=100, cmap='coolwarm', marker='^',
label='Data points')
# Labels and Title
plt.xlabel('Feature 1', fontsize=12)
plt.ylabel('Feature 2', fontsize=12)
plt.title('Random Forest Classification - Decision Boundary', fontsize=14)
plt.legend()
# Show the plot
plt.show()
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
PRACTICAL – 08
AIM : To study K-Mean Clustering using Python.
PYTHON CODE :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
# Generate a synthetic dataset with 3 clusters
x, y = make_blobs(n_samples=300, centers=3, random_state=42)
# Initialize the KMeans model to form 3 clusters
kmeans = KMeans(n_clusters=3, random_state=42)
# Fit the model (train the model)
kmeans.fit(x)
# Predict the cluster labels
y_kmeans = kmeans.predict(x)
# Get the cluster centers
centers = kmeans.cluster_centers_
# Visualize the clusters
plt.figure(figsize=(8, 6))
# Plot the points colored by their cluster
plt.scatter(x[:, 0], x[:, 1], c=y_kmeans, s=50, cmap='viridis')
# Plot the centroids of the clusters
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.5, marker='X', label='Centroids')
# Labels and Title
plt.xlabel('Feature 1', fontsize=12)
plt.ylabel('Feature 2', fontsize=12)
plt.title('K-Means Clustering - Cluster Centers', fontsize=14)
plt.legend()
# Show the plot
plt.show()
Downloaded by Komala Devi (komala.somesula@gmail.com)
lOMoARcPSD|38377887
OUTPUT :
Downloaded by Komala Devi (komala.somesula@gmail.com)