# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report,
accuracy_score
# 1. Load the dataset
credit_data = pd.read_csv("C:\\Users\\bharg\\Downloads\\
creditcardfraud.csv")
# 2. Preprocess the data
# Drop the Time column
credit_data.drop("Time", axis=1, inplace=True)
# Scale the Amount column using a standard scaler
scaler = StandardScaler()
credit_data['Amount'] = scaler.fit_transform(credit_data[['Amount']])
# 3. Split the data into training and test sets
X = credit_data.drop("Class", axis=1)
y = credit_data["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# 4. Train a logistic regression model on the training set
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
LogisticRegression()
# 5. Evaluate the logistic regression model's performance on the test
set
y_pred_logistic = logistic_model.predict(X_test)
print("Logistic Regression Metrics:")
print("Confusion Matrix:\n", confusion_matrix(y_test,
y_pred_logistic))
print("Classification Report:\n", classification_report(y_test,
y_pred_logistic))
print("Accuracy Score:", accuracy_score(y_test, y_pred_logistic))
Logistic Regression Metrics:
Confusion Matrix:
[[59 3]
[ 4 54]]
Classification Report:
precision recall f1-score support
0 0.94 0.95 0.94 62
1 0.95 0.93 0.94 58
accuracy 0.94 120
macro avg 0.94 0.94 0.94 120
weighted avg 0.94 0.94 0.94 120
Accuracy Score: 0.9416666666666667
# 6. Train an SVM model on the training set
svm_model = SVC()
svm_model.fit(X_train, y_train)
SVC()
# 7. Evaluate the SVM model's performance on the test set
y_pred_svm = svm_model.predict(X_test)
print("\nSVM Metrics:")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test,
y_pred_svm))
print("Accuracy Score:", accuracy_score(y_test, y_pred_svm))
SVM Metrics:
Confusion Matrix:
[[62 0]
[ 6 52]]
Classification Report:
precision recall f1-score support
0 0.91 1.00 0.95 62
1 1.00 0.90 0.95 58
accuracy 0.95 120
macro avg 0.96 0.95 0.95 120
weighted avg 0.95 0.95 0.95 120
Accuracy Score: 0.95
# 8. Tune hyperparameters using grid search cross-validation for both
models
param_grid_logistic = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_logistic = GridSearchCV(LogisticRegression(),
param_grid_logistic, cv=5, scoring='accuracy')
grid_logistic.fit(X_train, y_train)
param_grid_svm = {'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10]}
grid_svm = GridSearchCV(SVC(), param_grid_svm, cv=5,
scoring='accuracy')
grid_svm.fit(X_train, y_train)
C:\Users\bharg\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as
shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\bharg\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as
shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\bharg\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as
shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\bharg\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as
shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\bharg\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as
shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
GridSearchCV(cv=5, estimator=SVC(),
param_grid={'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10]},
scoring='accuracy')
# 9. Train models with optimal hyperparameters
optimal_logistic_model = grid_logistic.best_estimator_
optimal_svm_model = grid_svm.best_estimator_
optimal_logistic_model.fit(X_train, y_train)
optimal_svm_model.fit(X_train, y_train)
# Evaluate performance on the test set
y_pred_optimal_logistic = optimal_logistic_model.predict(X_test)
y_pred_optimal_svm = optimal_svm_model.predict(X_test)
print("\nOptimal Logistic Regression Metrics:")
print("Confusion Matrix:\n", confusion_matrix(y_test,
y_pred_optimal_logistic))
print("Classification Report:\n", classification_report(y_test,
y_pred_optimal_logistic))
print("Accuracy Score:", accuracy_score(y_test,
y_pred_optimal_logistic))
print("\nOptimal SVM Metrics:")
print("Confusion Matrix:\n", confusion_matrix(y_test,
y_pred_optimal_svm))
print("Classification Report:\n", classification_report(y_test,
y_pred_optimal_svm))
print("Accuracy Score:", accuracy_score(y_test, y_pred_optimal_svm))
Optimal Logistic Regression Metrics:
Confusion Matrix:
[[61 1]
[ 4 54]]
Classification Report:
precision recall f1-score support
0 0.94 0.98 0.96 62
1 0.98 0.93 0.96 58
accuracy 0.96 120
macro avg 0.96 0.96 0.96 120
weighted avg 0.96 0.96 0.96 120
Accuracy Score: 0.9583333333333334
Optimal SVM Metrics:
Confusion Matrix:
[[54 8]
[ 1 57]]
Classification Report:
precision recall f1-score support
0 0.98 0.87 0.92 62
1 0.88 0.98 0.93 58
accuracy 0.93 120
macro avg 0.93 0.93 0.92 120
weighted avg 0.93 0.93 0.92 120
Accuracy Score: 0.925
# 10. Compare performance and provide insights
print("\nComparison of Logistic Regression and SVM Models:")
print("-------------------------------------------------")
# Logistic Regression Metrics
print("Logistic Regression Metrics:")
print("Accuracy Score:", accuracy_score(y_test,
y_pred_optimal_logistic))
print("Confusion Matrix:\n", confusion_matrix(y_test,
y_pred_optimal_logistic))
print("Classification Report:\n", classification_report(y_test,
y_pred_optimal_logistic))
# SVM Metrics
print("\nSVM Metrics:")
print("Accuracy Score:", accuracy_score(y_test, y_pred_optimal_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test,
y_pred_optimal_svm))
print("Classification Report:\n", classification_report(y_test,
y_pred_optimal_svm))
# Interpretation and Insights
print("\nInterpretation and Insights:")
print("----------------------------")
# Compare accuracy scores
if accuracy_score(y_test, y_pred_optimal_logistic) >
accuracy_score(y_test, y_pred_optimal_svm):
print("Logistic Regression performed better in terms of
accuracy.")
elif accuracy_score(y_test, y_pred_optimal_logistic) <
accuracy_score(y_test, y_pred_optimal_svm):
print("SVM performed better in terms of accuracy.")
else:
print("Both models performed equally in terms of accuracy.")
Comparison of Logistic Regression and SVM Models:
-------------------------------------------------
Logistic Regression Metrics:
Accuracy Score: 0.9583333333333334
Confusion Matrix:
[[61 1]
[ 4 54]]
Classification Report:
precision recall f1-score support
0 0.94 0.98 0.96 62
1 0.98 0.93 0.96 58
accuracy 0.96 120
macro avg 0.96 0.96 0.96 120
weighted avg 0.96 0.96 0.96 120
SVM Metrics:
Accuracy Score: 0.925
Confusion Matrix:
[[54 8]
[ 1 57]]
Classification Report:
precision recall f1-score support
0 0.98 0.87 0.92 62
1 0.88 0.98 0.93 58
accuracy 0.93 120
macro avg 0.93 0.93 0.92 120
weighted avg 0.93 0.93 0.92 120
Interpretation and Insights:
----------------------------
Logistic Regression performed better in terms of accuracy.