#Q1)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score,
classification_report, confusion_matrix
# Generate an imbalanced dataset (fraud detection example)
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.95, 0.05], flip_y=0.01, random_state=42)
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Compute Precision, Recall, and F1-score
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Display results
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("\nClassification Report:\n", classification_report(y_test,
y_pred))
Precision: 0.72
Recall: 0.32142857142857145
F1-score: 0.4444444444444444
Classification Report:
precision recall f1-score support
0 0.96 0.99 0.98 944
1 0.72 0.32 0.44 56
accuracy 0.95 1000
macro avg 0.84 0.66 0.71 1000
weighted avg 0.95 0.95 0.95 1000
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)
# Plot Confusion Matrix
import seaborn as sns
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not
Fraud', 'Fraud'], yticklabels=['Not Fraud',
'Fraud'])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()
#Q2)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_curve,
classification_report
# Generate an imbalanced dataset
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.9, 0.1], flip_y=0.01, random_state=42)
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Train Support Vector Machine (SVM) with probability estimation
enabled
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)
# Get predicted probabilities
y_scores = svm_model.predict_proba(X_test)[:, 1]
# Compute Precision-Recall curve
precision, recall, thresholds = precision_recall_curve(y_test,
y_scores)
# Plot Precision-Recall Curve
plt.figure(figsize=(8,6))
plt.plot(recall, precision, marker='.', label="Precision-Recall
Curve")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve for SVM")
plt.legend()
plt.grid()
plt.show()
# Adjust decision threshold
threshold = 0.5 # You can change this value to observe the effect
y_pred = (y_scores >= threshold).astype(int)
# Print classification report
print(f"\nClassification Report at threshold={threshold}:\n")
print(classification_report(y_test, y_pred))
# Plot Precision & Recall vs Threshold
plt.figure(figsize=(8,6))
plt.plot(thresholds, precision[:-1], label="Precision")
plt.plot(thresholds, recall[:-1], label="Recall")
plt.xlabel("Decision Threshold")
plt.ylabel("Score")
plt.title("Precision and Recall vs Threshold")
plt.legend()
plt.grid()
plt.show()
#Q3)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve,
classification_report, accuracy_score
# Generate an imbalanced dataset (90% class 0, 10% class 1)
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.9, 0.1], flip_y=0.01, random_state=42)
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# BASELINE MODEL (Without Class Weighting)
logreg_baseline = LogisticRegression(random_state=42)
logreg_baseline.fit(X_train, y_train)
# Predictions
y_pred_baseline = logreg_baseline.predict(X_test)
y_prob_baseline = logreg_baseline.predict_proba(X_test)[:, 1]
# Compute Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y_test, y_prob_baseline)
# Evaluate Baseline Model
print("BASELINE MODEL (Logistic Regression):")
print(f"Accuracy: {accuracy_score(y_test, y_pred_baseline):.4f}")
print(classification_report(y_test, y_pred_baseline))
# Plot Precision-Recall Curve
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, marker='.', label="Baseline Model")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve for Logistic Regression")
plt.legend()
plt.grid()
plt.show()
# CLASS-WEIGHTED MODEL
logreg_weighted = LogisticRegression(class_weight='balanced',
random_state=42)
logreg_weighted.fit(X_train, y_train)