KEMBAR78
Import As Import As From Import From Import From Import From Import | PDF | Algorithms | Statistical Classification
0% found this document useful (0 votes)
23 views4 pages

Import As Import As From Import From Import From Import From Import

The document contains Python code for training and evaluating machine learning models on imbalanced datasets, specifically using Logistic Regression and Support Vector Machine (SVM). It includes generating datasets, splitting them into training and testing sets, computing performance metrics like precision, recall, and F1-score, and visualizing results with confusion matrices and precision-recall curves. The code demonstrates the impact of class weighting on model performance and decision thresholds in classification tasks.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOC, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
23 views4 pages

Import As Import As From Import From Import From Import From Import

The document contains Python code for training and evaluating machine learning models on imbalanced datasets, specifically using Logistic Regression and Support Vector Machine (SVM). It includes generating datasets, splitting them into training and testing sets, computing performance metrics like precision, recall, and F1-score, and visualizing results with confusion matrices and precision-recall curves. The code demonstrates the impact of class weighting on model performance and decision thresholds in classification tasks.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOC, PDF, TXT or read online on Scribd
You are on page 1/ 4

#Q1)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score,
classification_report, confusion_matrix
# Generate an imbalanced dataset (fraud detection example)
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.95, 0.05], flip_y=0.01, random_state=42)
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Compute Precision, Recall, and F1-score
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Display results
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("\nClassification Report:\n", classification_report(y_test,
y_pred))

Precision: 0.72
Recall: 0.32142857142857145
F1-score: 0.4444444444444444

Classification Report:
precision recall f1-score support

0 0.96 0.99 0.98 944


1 0.72 0.32 0.44 56

accuracy 0.95 1000


macro avg 0.84 0.66 0.71 1000
weighted avg 0.95 0.95 0.95 1000

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)
# Plot Confusion Matrix
import seaborn as sns
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not
Fraud', 'Fraud'], yticklabels=['Not Fraud',
'Fraud'])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

#Q2)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_curve,
classification_report
# Generate an imbalanced dataset
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.9, 0.1], flip_y=0.01, random_state=42)
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Train Support Vector Machine (SVM) with probability estimation
enabled
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)
# Get predicted probabilities
y_scores = svm_model.predict_proba(X_test)[:, 1]
# Compute Precision-Recall curve
precision, recall, thresholds = precision_recall_curve(y_test,
y_scores)
# Plot Precision-Recall Curve
plt.figure(figsize=(8,6))
plt.plot(recall, precision, marker='.', label="Precision-Recall
Curve")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve for SVM")
plt.legend()
plt.grid()
plt.show()

# Adjust decision threshold


threshold = 0.5 # You can change this value to observe the effect
y_pred = (y_scores >= threshold).astype(int)
# Print classification report
print(f"\nClassification Report at threshold={threshold}:\n")
print(classification_report(y_test, y_pred))
# Plot Precision & Recall vs Threshold
plt.figure(figsize=(8,6))
plt.plot(thresholds, precision[:-1], label="Precision")
plt.plot(thresholds, recall[:-1], label="Recall")
plt.xlabel("Decision Threshold")
plt.ylabel("Score")
plt.title("Precision and Recall vs Threshold")
plt.legend()
plt.grid()
plt.show()

#Q3)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve,
classification_report, accuracy_score
# Generate an imbalanced dataset (90% class 0, 10% class 1)
X, y = make_classification(n_samples=5000, n_features=20, n_classes=2,
weights=[0.9, 0.1], flip_y=0.01, random_state=42)
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# BASELINE MODEL (Without Class Weighting)
logreg_baseline = LogisticRegression(random_state=42)
logreg_baseline.fit(X_train, y_train)
# Predictions
y_pred_baseline = logreg_baseline.predict(X_test)
y_prob_baseline = logreg_baseline.predict_proba(X_test)[:, 1]
# Compute Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y_test, y_prob_baseline)
# Evaluate Baseline Model
print("BASELINE MODEL (Logistic Regression):")
print(f"Accuracy: {accuracy_score(y_test, y_pred_baseline):.4f}")
print(classification_report(y_test, y_pred_baseline))
# Plot Precision-Recall Curve
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, marker='.', label="Baseline Model")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve for Logistic Regression")
plt.legend()
plt.grid()
plt.show()
# CLASS-WEIGHTED MODEL
logreg_weighted = LogisticRegression(class_weight='balanced',
random_state=42)
logreg_weighted.fit(X_train, y_train)

You might also like