-- Create the books table
CREATE TABLE books (
book_id INT PRIMARY KEY,
title VARCHAR(255) NOT NULL,
edition VARCHAR(50),
author VARCHAR(255)
);
-- Insert data into the books table
INSERT INTO books (book_id, title, edition, author) VALUES (1, 'RAM', '1 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (2, 'sham', '2 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (3, 'pik', '3 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (4, 'ton', '4 EDITION', 'JOHN');
INSERT INTO books (book_id, title, edition, author) VALUES (5, 'mon', '5 EDITION', 'JOHN');
-- SLICE operation: count the books with '1 EDITION'
SELECT COUNT(book_id) AS num_books
FROM books
WHERE edition = '1 EDITION';
-- DICE operation: select books with the title 'pik'
SELECT book_id, title, edition
FROM books
WHERE title = 'pik';
-- ROLL-UP operation: group by edition and count the number of books for each
SELECT edition, COUNT(*) AS num_books
FROM books
GROUP BY edition;
-- DRILL-DOWN operation: select all books with the author 'JOHN'
SELECT *
FROM books
WHERE author = 'JOHN';
PAGE RANK
import numpy as np
def page_rank(n, links, d=0.85, max_iter=100, tol=1e-6):
transition_matrix = np.where(links.sum(axis=0) == 0, 1.0 / n, links / links.sum(axis=0))
ranks = np.ones(n) / n
for _ in range(max_iter):
new_ranks = (1 - d) / n + d * transition_matrix @ ranks
if np.linalg.norm(new_ranks - ranks, 1) < tol:
break
ranks = new_ranks
return ranks
def main():
n = int(input("Enter the number of pages: "))
links = np.array([list(map(int, input(f"Row {i + 1}: ").split())) for i in range(n)])
ranks = page_rank(n, links)
print("\nPageRank Values:")
for i, rank in enumerate(ranks, 1):
print(f"Page {i}: {rank:.6f}")
if __name__ == "__main__":
main()
output: Enter the number of pages: 3 PageRank Values:
Row 1: 0 1 1 Page 1: 0.333333
Row 2: 1 0 1 Page 2: 0.333333
Row 3: 1 1 0 Page 3: 0.333333
//DECISION TREE----------------------------------------------------------------------------------
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
print("Dataset:")
print(df.head())
X = df.drop(columns=['target'])
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
APRIORI ALGO---------------------------------------------------------------------------------
class Transaction:
def __init__(self, items):
self.items = items
class Itemset:
def __init__(self, itemset):
self.itemset = itemset
self.count = 0
def get_user_input():
transactions = []
for _ in range(int(input("Enter the number of transactions: "))):
items = list(map(int, input("Enter items (comma-separated): ").split(',')))
transactions.append(Transaction(items))
return transactions
def count_items(transactions, num_items):
item_counts = [0] * (num_items + 1)
for transaction in transactions:
for item in transaction.items:
item_counts[item] += 1
return item_counts
def generate_candidates(prev_candidates, k):
return [Itemset(prev_candidates[i].itemset + [prev_candidates[j].itemset[k-2]])
for i in range(len(prev_candidates)) for j in range(i + 1, len(prev_candidates))
if prev_candidates[i].itemset[:k-2] == prev_candidates[j].itemset[:k-2]]
def calculate_support(candidates, transactions):
for candidate in candidates:
candidate.count = sum(1 for transaction in transactions if
set(candidate.itemset).issubset(transaction.items))
def prune_candidates(candidates, min_support):
return [candidate for candidate in candidates if candidate.count >= min_support]
def apriori(transactions, min_support):
item_counts = count_items(transactions, max(item for transaction in transactions for item in
transaction.items))
candidates = [Itemset([i]) for i in range(1, len(item_counts)) if item_counts[i] >= min_support]
k=2
while candidates:
calculate_support(candidates, transactions)
candidates = prune_candidates(candidates, min_support)
if candidates:
print(f"\nFrequent Itemsets of size {k}:")
for candidate in candidates:
print(f"{' '.join(map(str, candidate.itemset))} - Support: {candidate.count}")
candidates = generate_candidates(candidates, k)
k += 1
transactions = get_user_input()
min_support = int(input("Enter the minimum support (e.g., 2): "))
apriori(transactions, min_support)
OUTPUT:-------
Enter the number of transactions: 5
Enter items (comma-separated): 1,2
Enter items (comma-separated): 1
Enter items (comma-separated): 2
Enter items (comma-separated): 1,2,3
Enter items (comma-separated): 2,3
Enter the minimum support (e.g., 2): 2
Frequent Itemsets of size 2:
1 - Support: 3 Frequent Itemsets of size 3:
2 - Support: 4 1 2 - Support: 2
3 - Support: 2 2 3 - Support: 2
AGGLOMERATIVE hierarchical clustering--------------------------------------------------
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.cluster.hierarchy import linkage, dendrogram as dendogram
def get_user_input():
n = int(input("Enter the number of points in the dataset: "))
X = []
print("Enter the co-ordinates (x,y) for each point: ")
for i in range(n):
while True:
try:
coords = input(f"Point {i+1}: ").split()
if len(coords) != 2:
raise ValueError("Please enter exactly two values separated by a space: ")
x, y = map(float, coords)
X.append([x, y])
break
except ValueError as e:
print(f"Invalid input: {e}. Please try again")
return np.array(X)
def hierarchical_clustering_with_dendogram(X, method='single'):
Z = linkage(X, method=method)
plt.figure(figsize=(8, 5))
dendogram(Z, labels=[f"Point {i+1}" for i in range(len(X))])
plt.title(f'Dendrogram ({method.capitalize()} Linkage)')
plt.xlabel('Point')
plt.ylabel('Distance')
plt.show()
table = pd.DataFrame(Z, columns=["Cluster 1", "Cluster 2", "Distance", "New Cluster Size"])
table["Cluster 1"] = table["Cluster 1"].astype(int) + 1
table["Cluster 2"] = table["Cluster 2"].astype(int) + 1
print(f"\n{method.capitalize()} Linkage Clustering Merges in Tabular Format")
print(table)
X = get_user_input()
print("Single Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='single')
print("Complete Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='complete')
print("Average Linkage Clustering:")
hierarchical_clustering_with_dendogram(X, method='average')
Output------------
Enter the number of points in the dataset: 6
Enter the co-ordinates (x,y) for each point:
Point 1: 0.4 0.53
Point 2: 0.22 0.38
Point 3: 0.35 0.32
Point 4: 0.26 0.19
Point 5: 0.08 0.41
Point 6: 0.45 0.30
Single Linkage Clustering:
Single Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size
3 5 0.101980 2.0
2 7 0.143178 3.0
6 8 0.143178 4.0
4 9 0.158114 5.0
1 10 0.215870 6.0
Complete Linkage Clustering:
Complete Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size
3 5 0.101980 2.0
2 6 0.143178 2.0
4 7 0.219545 3.0
1 8 0.341760 3.0
9 10 0.386005 6.0
Average Linkage Clustering:
Average Linkage Clustering Merges in Tabular Format
Cluster 1 Cluster 2 Distance New Cluster Size 4 7 0.188829 3.0
3 5 0.101980 2.0 8 9 0.255954 5.0
2 6 0.143178 2.0 1 10 0.279001 6.0
KMEAN ALGO-----------------------------------------------------------------------------------
from sklearn.cluster import KMeans
import numpy as np
# Get user input for data points
def get_user_data():
n = int(input("Enter the number of points: "))
data = []
for i in range(n):
value = float(input(f"Enter value for point {i+1}: "))
data.append([value])
return np.array(data)
data = get_user_data()
# Create and fit the KMeans model
kmeans = KMeans(n_clusters=2, random_state=0).fit(data)
# Retrieve the clusters and print the results
cluster_1 = data[kmeans.labels_ == 0]
cluster_2 = data[kmeans.labels_ == 1]
print("Cluster 1:", *cluster_1.flatten())
print("Cluster 2:", *cluster_2.flatten())
OUTPUT : -------------
Enter the number of points: 9
Enter value for point 1: 2
Enter value for point 2: 4
Enter value for point 3: 10
Enter value for point 4: 12
Enter value for point 5: 3
Enter value for point 6: 20
Enter value for point 7: 30
Enter value for point 8: 11
Enter value for point 9: 25
Cluster 1: 2.0 4.0 10.0 12.0 3.0 11.0
Cluster 2: 20.0 30.0 25.0