0% found this document useful (0 votes)

60 views30 pages

ML Manual

The document outlines several programming tasks involving data analysis and machine learning using various datasets. It includes creating histograms and box plots, computing correlation matrices, implementing PCA, and applying different algorithms such as k-Nearest Neighbour, decision trees, and Naive Bayes. Additionally, it covers clustering techniques and regression analysis using datasets like California Housing, Iris, and Breast Cancer.

Uploaded by

Shradha J

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

60 views30 pages

ML Manual

Uploaded by

Shradha J

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 30

1.

Develop a program to create histograms for all numerical features and analyze the
distribution of each feature. Generate box plots for all numerical features and identify any
outliers. Use California Housing dataset.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the California Housing dataset

from sklearn.datasets import fetch_california_housing
housing_data = fetch_california_housing(as_frame=True)
data = housing_data.frame
data
OR
data = pd.read_csv("housing.csv")
data

#fetch numerical columns

numerical_features = data.select_dtypes(include=['float64', 'int64']).columns
print(f"Numerical features: {list(numerical_features)}")

#histogram
for feature in numerical_features:
plt.hist(data[feature], bins=30, color='skyblue', edgecolor='black')
plt.title(f"Histogram of {feature}")
plt.xlabel(feature)
plt.ylabel("Frequency")
plt.show()
#box plot
for feature in numerical_features:
plt.figure(figsize=(8, 5))
sns.boxplot(x=data[feature], color='lightgreen')
plt.title(f"Box Plot of {feature}")
plt.xlabel(feature)
plt.show()
#Outliers
for feature in numerical_features:
Q1 = data[feature].quantile(0.25) # 25th percentile
Q3 = data[feature].quantile(0.75) # 75th percentile
IQR = Q3 - Q1 # Interquartile range
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
outliers = data[(data[feature] < lower_bound) | (data[feature] > upper_bound)]
print(f"Outliers in {feature}:")
print(outliers[feature].sort_values())
2. Develop a program to Compute the correlation matrix to understand the relationships
between pairs of features. Visualize the correlation matrix using a heatmap to know which
variables have strong positive/negative correlations. Create a pair plot to visualize pairwise
relationships between features. Use California Housing dataset.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv("housing.csv")

#fetch numerical columns

numerical_features = data.select_dtypes(include=['float64', 'int64']).columns
print(f"Numerical features: {list(numerical_features)}")

correlation = data[numerical_features].corr()
correlation

#heat Map
plt.figure(figsize=(10, 8))
sns.heatmap(correlation, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title("Correlation Matrix Heatmap")
plt.show()

# Create a pair plot for pairwise relationships

sns.pairplot(data, diag_kind='kde', plot_kws={'alpha': 0.7})
plt.show()
3. Develop a program to implement Principal Component Analysis (PCA)
for reducing the dimensionality of the Iris dataset from 4 features to 2.

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

# Load the iris dataset

iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)

# Perform PCA to reduce dimensionality to 2 components

pca = PCA(n_components=2)
data_reduced = pca.fit_transform(iris_df)

# Create a DataFrame for the reduced data

reduced_df = pd.DataFrame(data_reduced, columns=['PC 1', 'PC 2'])
reduced_df['target'] = iris.target

# Plot the reduced data

colors = ['r', 'g', 'b']
target_names = iris.target_names

for i, label in enumerate(np.unique(reduced_df['target'])):

plt.scatter(
reduced_df[reduced_df['target'] == label]['PC 1'],
reduced_df[reduced_df['target'] == label]['PC 2'],
label=target_names[label],
color=colors[i]
)

plt.title('PCA on Iris Dataset')

plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.grid()
plt.show()
4. For a given set of training data examples stored in a .CSV file, implement and demonstrate
the Find-S algorithm to output a description of the set of all hypotheses consistent with the
training examples.
import pandas as pd
def find_s_algorithm(file_path):
# Read the dataset properly
data = pd.read_csv(file_path, header=None)
print("Training data:\n", data)

attributes = data.iloc[:, :-1]

class_label = data.iloc[:, -1]

for index, row in attributes.iterrows():

if class_label[index] == 'Yes':
hypothesis = list(row)
break
for index, row in attributes.iterrows():
if class_label[index] == 'Yes':
for i in range(len(hypothesis)):
if hypothesis[i] != row[i]:
hypothesis[i] = '?'
return hypothesis

file_path = '4th.csv'
hypothesis = find_s_algorithm(file_path)
print("\nThe final hypothesis is:", hypothesis)
5. Develop a program to implement k-Nearest Neighbour algorithm to classify the randomly
generated 100 values of x in the range of [0,1]. Perform the following based on dataset generated.
1. Label the first 50 points {x1,……,x50} as follows: if (xi ≤ 0.5), then xi ∊ Class1, else xi ∊ Class1
2. Classify the remaining points, x51,……,x100 using KNN. Perform this for k=1,2,3,4,5,20,30

import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# Set random seed for reproducibility

np.random.seed(0)

# Generate 100 random values in the range [0, 1]

data = np.random.rand(100)

# Label the first 50 points

labels = np.array(['Class1' if x <= 0.5 else 'Class2' for x in data[:50]])

# Separate data into training and testing sets

train_data = data[:50].reshape(-1, 1)
train_labels = labels
test_data = data[50:].reshape(-1, 1)

# Perform KNN classification for different values of k

k_values = [1, 2, 3, 4, 5, 20, 30]
for k in k_values:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(train_data, train_labels)
predicted_labels = knn.predict(test_data)

print(f"K = {k}")
for i, (point, label) in enumerate(zip(data[50:], predicted_labels), start=51):
print(f"x{i} = {point:.3f} -> Predicted Label: {label}")
print()
6. Implement the non-parametric Locally Weighted Regression algorithm in order to fit
data points. Select appropriate data set for your experiment and draw graphs
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.nonparametric.smoothers_lowess import lowess

# Generate data
np.random.seed(42)
X = np.linspace(0, 2 * np.pi, 100)
y = np.sin(X) + 0.1 * np.random.randn(100)

# Apply LOWESS
lowess_result = lowess(y, X, frac=0.3) # 'frac' is the smoothing parameter

# Plotting
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='red', label='Training Data', alpha=0.7)
plt.plot(lowess_result[:, 0], lowess_result[:, 1], color='blue', label='LOWESS Fit', linewidth=2)
plt.xlabel('X', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title('Locally Weighted Regression using statsmodels', fontsize=14)
plt.legend(fontsize=10)
plt.grid(alpha=0.3)
plt.show()
OUTPUT:
7. Develop a program to demonstrate the working of Linear Regression and Polynomial
Regression. Use Boston Housing Dataset for Linear Regression and Auto MPG Dataset
(for vehicle fuel efficiency prediction) for Polynomial Regression.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Linear Regression - Boston Housing Dataset

def linear_regression_boston():
boston = fetch_california_housing()
X, y = boston.data[:, 5].reshape(-1, 1), boston.target
# Using 'RM' (average rooms per dwelling)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression().fit(X_train, y_train)

y_pred = model.predict(X_test)

plt.scatter(X_test, y_test, color="blue", label="Actual")

plt.plot(X_test, y_pred, color="red", label="Predicted")
plt.xlabel("Average Rooms per Dwelling (RM)")
plt.ylabel("Median House Price ($1000s)")
plt.title("Linear Regression - Boston Housing")
plt.legend()
plt.show()
print("Linear Regression - Boston Housing")
print("MSE:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

# Polynomial Regression - Auto MPG Dataset

def polynomial_regression_auto_mpg():
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
cols = ["mpg", "cylinders", "displacement", "horsepower", "weight", "acceleration", "model_year",
"origin"]
data = pd.read_csv(url, sep='\s+', names=cols, na_values="?").dropna()

X, y = data["displacement"].values.reshape(-1, 1), data["mpg"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = make_pipeline(PolynomialFeatures(degree=2), StandardScaler(),

LinearRegression()).fit(X_train, y_train)
y_pred = model.predict(X_test)

plt.scatter(X_test, y_test, color="blue", label="Actual")

plt.scatter(X_test, y_pred, color="red", label="Predicted")
plt.xlabel("Displacement")
plt.ylabel("MPG")
plt.title("Polynomial Regression - Auto MPG")
plt.legend()
plt.show()

print("Polynomial Regression - Auto MPG")

print("MSE:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))
if __name__ == "__main__":
linear_regression_boston()
polynomial_regression_auto_mpg()
OUTPUT:
8. Develop a program to demonstrate the working of the decision tree algorithm. Use
Breast Cancer Data set for building the decision tree and apply this knowledge to
classify a new sample.
# Importing necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")
new_sample = np.array([X_test[0]])
prediction = clf.predict(new_sample)

prediction_class = "Benign" if prediction == 1 else "Malignant"

print(f"Predicted Class for the new sample: {prediction_class}")

plt.figure(figsize=(12,8))
tree.plot_tree(clf, filled=True, feature_names=data.feature_names, class_names=data.target_names)
plt.title("Decision Tree - Breast Cancer Dataset")
plt.show()
OUTPUT:
9. Develop a program to implement the Naive Bayesian classifier considering Olivetti Face
Data set for training. Compute the accuracy of the classifier, considering a few test data
sets.
import numpy as np
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

data = fetch_olivetti_faces(shuffle=True, random_state=42)

X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=1))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

cross_val_accuracy = cross_val_score(gnb, X, y, cv=5, scoring='accuracy')

print(f'\nCross-validation accuracy: {cross_val_accuracy.mean() * 100:.2f}%')
fig, axes = plt.subplots(3, 5, figsize=(12, 8))
for ax, image, label, prediction in zip(axes.ravel(), X_test, y_test, y_pred):
ax.imshow(image.reshape(64, 64), cmap=plt.cm.gray)
ax.set_title(f"True: {label}, Pred: {prediction}")
ax.axis('off')

plt.show()
OUTPUT:
10. Develop a program to implement k-means clustering using Wisconsin Breast Cancer
data set and visualize the clustering result.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, classification_report

data = load_breast_cancer()
X = data.data
y = data.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=2, random_state=42)

y_kmeans = kmeans.fit_predict(X_scaled)

print("Confusion Matrix:")
print(confusion_matrix(y, y_kmeans))
print("\nClassification Report:")
print(classification_report(y, y_kmeans))

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
df['Cluster'] = y_kmeans
df['True Label'] = y

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', palette='Set1', s=100, edgecolor='black',
alpha=0.7)
plt.title('K-Means Clustering of Breast Cancer Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="Cluster")
plt.show()

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='True Label', palette='coolwarm', s=100, edgecolor='black',
alpha=0.7)
plt.title('True Labels of Breast Cancer Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="True Label")
plt.show()

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', palette='Set1', s=100, edgecolor='black',
alpha=0.7)
centers = pca.transform(kmeans.cluster_centers_)
plt.scatter(centers[:, 0], centers[:, 1], s=200, c='red', marker='X', label='Centroids')
plt.title('K-Means Clustering with Centroids')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="Cluster")
plt.show()

OUTPUT:

Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
ML Manual
No ratings yet
ML Manual
9 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
ML Spy Programs
No ratings yet
ML Spy Programs
16 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
18 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
ML Lab Manual
No ratings yet
ML Lab Manual
24 pages
ML Programs
No ratings yet
ML Programs
14 pages
ML Labmanual
No ratings yet
ML Labmanual
33 pages
Machine Learning Lab Manaul BCSL606
No ratings yet
Machine Learning Lab Manaul BCSL606
27 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
ML Experiment WithDataset
No ratings yet
ML Experiment WithDataset
23 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
33 pages
ML - Datascience Manual
No ratings yet
ML - Datascience Manual
64 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
ML Full For Print New 1
No ratings yet
ML Full For Print New 1
38 pages
Lab Extern L
No ratings yet
Lab Extern L
8 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
BCSL606 Machine Learning Lab
No ratings yet
BCSL606 Machine Learning Lab
33 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
33 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
34 pages
V
No ratings yet
V
8 pages
Train
No ratings yet
Train
17 pages
ML Lab Manual
No ratings yet
ML Lab Manual
25 pages
ML Lab Manual
No ratings yet
ML Lab Manual
60 pages
ML Lab Manual for CSE Students
No ratings yet
ML Lab Manual for CSE Students
32 pages
Lab Manual ML
No ratings yet
Lab Manual ML
26 pages
ML 3
No ratings yet
ML 3
24 pages
ML Lab Manual
No ratings yet
ML Lab Manual
43 pages
Argha's ML LAB - 240927 - 121838
No ratings yet
Argha's ML LAB - 240927 - 121838
13 pages
ML Lab Mannual1
No ratings yet
ML Lab Mannual1
37 pages
1
No ratings yet
1
13 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
External
No ratings yet
External
11 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
M PDF
No ratings yet
M PDF
13 pages
Experiment 1111
No ratings yet
Experiment 1111
25 pages
Document From Jahnavi
No ratings yet
Document From Jahnavi
20 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
Strangers
No ratings yet
Strangers
8 pages
Machine Learning Labnem
No ratings yet
Machine Learning Labnem
5 pages
ML Manual
No ratings yet
ML Manual
24 pages
T2 Summary VHA
No ratings yet
T2 Summary VHA
14 pages
Linear Regression with Boston Housing Data
No ratings yet
Linear Regression with Boston Housing Data
14 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
22 pages
Experiment 1
No ratings yet
Experiment 1
19 pages
Machine Learning
No ratings yet
Machine Learning
22 pages
Lab Manual ML
No ratings yet
Lab Manual ML
23 pages
Machine Learning
No ratings yet
Machine Learning
10 pages
ML Record
No ratings yet
ML Record
19 pages
Machine Learning Laboratory
No ratings yet
Machine Learning Laboratory
23 pages
IoT Task4 21BEC0384
No ratings yet
IoT Task4 21BEC0384
9 pages
Wa0003
No ratings yet
Wa0003
16 pages
ML Lab
No ratings yet
ML Lab
5 pages
ML Lab Experiment Shivansh
No ratings yet
ML Lab Experiment Shivansh
29 pages
Machine Learning Project: TITLE: Predicting The Sale Price of A House Using Linear Regression
No ratings yet
Machine Learning Project: TITLE: Predicting The Sale Price of A House Using Linear Regression
20 pages
Concepts of EDA, Outliers-Detection and Treatment
No ratings yet
Concepts of EDA, Outliers-Detection and Treatment
99 pages
Statistical Treatment 2-20-18
0% (2)
Statistical Treatment 2-20-18
3 pages
Papers Citation vs H-Index Analysis
No ratings yet
Papers Citation vs H-Index Analysis
22 pages
1 s2.0 S0260691719300668 Main
No ratings yet
1 s2.0 S0260691719300668 Main
8 pages
Master's Dissertation Guide
100% (2)
Master's Dissertation Guide
8 pages
Mid Review Math 2205
No ratings yet
Mid Review Math 2205
7 pages
Test For Stat 25 - 04
No ratings yet
Test For Stat 25 - 04
12 pages
Statistical Inference Course Guide
No ratings yet
Statistical Inference Course Guide
69 pages
Grade 12 Paper 2 Revision Pack
No ratings yet
Grade 12 Paper 2 Revision Pack
108 pages
Business Stats MCQs for Students
75% (4)
Business Stats MCQs for Students
24 pages
Lean Six Sigma Guide Step 5
No ratings yet
Lean Six Sigma Guide Step 5
80 pages
Data Science Training Report 2023
No ratings yet
Data Science Training Report 2023
32 pages
Unit III
No ratings yet
Unit III
17 pages
2019 La Salle F6 Math Exam Solutions
No ratings yet
2019 La Salle F6 Math Exam Solutions
7 pages
DAA - Chapter 03
No ratings yet
DAA - Chapter 03
19 pages
Team 19 Project Report
No ratings yet
Team 19 Project Report
27 pages
Stat 166 Final Paper
No ratings yet
Stat 166 Final Paper
59 pages
KSSM Form 4 Math Topics
No ratings yet
KSSM Form 4 Math Topics
31 pages
TCAP PsychAss Day 1
No ratings yet
TCAP PsychAss Day 1
173 pages
4024 Y12 SP 2
No ratings yet
4024 Y12 SP 2
20 pages
Atmospheric Precursors From Multiple Satellites Associat 2024 Advances in SP
No ratings yet
Atmospheric Precursors From Multiple Satellites Associat 2024 Advances in SP
16 pages
FDS Main
No ratings yet
FDS Main
141 pages
Math GR10 Qtr4-Module-1
100% (1)
Math GR10 Qtr4-Module-1
24 pages
Sa3 - 2
No ratings yet
Sa3 - 2
72 pages
Module13 PDF
No ratings yet
Module13 PDF
44 pages
UNIT-1 (Preparing To Model)
No ratings yet
UNIT-1 (Preparing To Model)
82 pages
RM 10 - Engineering Data Analysis 1 (Part 01)
No ratings yet
RM 10 - Engineering Data Analysis 1 (Part 01)
2 pages
Biostatistics Exam: Frequency & Hypothesis
No ratings yet
Biostatistics Exam: Frequency & Hypothesis
9 pages
Measures of Variation
No ratings yet
Measures of Variation
20 pages
ECE Math Convolution & Probability
No ratings yet
ECE Math Convolution & Probability
9 pages

ML Manual

Uploaded by

ML Manual

Uploaded by

1.

# Load the California Housing dataset

#fetch numerical columns

#fetch numerical columns

# Create a pair plot for pairwise relationships

# Load the iris dataset

# Perform PCA to reduce dimensionality to 2 components

# Create a DataFrame for the reduced data

# Plot the reduced data

for i, label in enumerate(np.unique(reduced_df['target'])):

plt.title('PCA on Iris Dataset')

attributes = data.iloc[:, :-1]

for index, row in attributes.iterrows():

# Set random seed for reproducibility

# Generate 100 random values in the range [0, 1]

# Label the first 50 points

# Separate data into training and testing sets

# Perform KNN classification for different values of k

# Linear Regression - Boston Housing Dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression().fit(X_train, y_train)

plt.scatter(X_test, y_test, color="blue", label="Actual")

# Polynomial Regression - Auto MPG Dataset

X, y = data["displacement"].values.reshape(-1, 1), data["mpg"].values

model = make_pipeline(PolynomialFeatures(degree=2), StandardScaler(),

plt.scatter(X_test, y_test, color="blue", label="Actual")

print("Polynomial Regression - Auto MPG")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

accuracy = accuracy_score(y_test, y_pred)

prediction_class = "Benign" if prediction == 1 else "Malignant"

data = fetch_olivetti_faces(shuffle=True, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

accuracy = accuracy_score(y_test, y_pred)

cross_val_accuracy = cross_val_score(gnb, X, y, cv=5, scoring='accuracy')

kmeans = KMeans(n_clusters=2, random_state=42)

You might also like