EX : 1 1.
IMPLEMENTATION OF BASIC IMAGE PROCESSING
OPERATIONS INCLUDING FEATURE REPRESENTATION
AND FEATURE EXTRACTION
Program:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Step 1: Load the image
image = cv2.imread('image.jpg') # Replace 'image.jpg' with your image path
# Step 2: Convert the image to grayscale
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Step 3: Apply edge detection (using Canny)
edges = cv2.Canny(gray_image, threshold1=100, threshold2=200)
# Step 4: Feature extraction (Using Harris Corner Detection)
# Convert image to float32 for Harris detection
gray_float = np.float32(gray_image)
dst = cv2.cornerHarris(gray_float, 2, 3, 0.04)
# Dilate to mark the corners
dst = cv2.dilate(dst, None)
# Step 5: Mark the corners in the original image
image_with_corners = image.copy()
image_with_corners[dst > 0.01 * dst.max()] = [0, 0, 255] # Red color for corners
# Step 6: Display the results
# Original image with corners marked
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.title("Original Image")
plt.axis('off')
# Image with edges highlighted
plt.subplot(1, 2, 2)
plt.imshow(edges, cmap='gray')
plt.title("Edge Detection (Canny)")
plt.axis('off')
# Show corners on the image
plt.figure()
plt.imshow(cv2.cvtColor(image_with_corners, cv2.COLOR_BGR2RGB))
plt.title("Feature Extraction (Corners)")
plt.axis('off'
) plt.show()
# Step 7: Save the results (optional)
cv2.imwrite('edges_output.jpg', edges)
cv2.imwrite('corners_output.jpg', image_with_corners)
OUTPUT:
EX : 2 2. IMPLEMENTATION OF SIMPLE NEURAL
NETWORK
Program :
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
# Step 1: Load the Iris dataset
iris = datasets.load_iris()
X = iris.data # Features
y = iris.target # Labels
# Step 2: Preprocess the data
# Convert labels to one-hot encoding y_encoded
= to_categorical(y, num_classes=3)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2,
random_state=42)
# Step 3: Build the Neural Network Model
model = Sequential()
# Input layer and first hidden layer with 10 neurons and ReLU activation
model.add(Dense(10, input_dim=4, activation='relu'))
# Second hidden layer with 8 neurons and ReLU activation
model.add(Dense(8, activation='relu'))
# Output layer with 3 neurons (one for each class) and softmax activation
model.add(Dense(3, activation='softmax'))
# Step 4: Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Step 5: Train the model
model.fit(X_train, y_train, epochs=100, batch_size=5, verbose=1)
# Step 6: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
# Output results
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")
Output :
Epoch 1/100
24/24 ━━━━━━━━━━━━━━━━━━━━2s 4ms/step - accuracy: 0.3858 - loss: 1.2491 Epoch 2/100
24/24 ━━━━━━━━━━━━━━━━━━━━0s 4ms/step - accuracy: 0.7343 - loss: 0.8896
Epoch 3/100
24/24 ━━━━━━━━━━━━━━━━━━━━0s 5ms/step - accuracy: 0.6574 - loss: 0.8656 Epoch 4/100
.
.
.
.
.
.
.
24/2━━━━━━━━━━━━━0s 3ms/step - accuracy: 0.9697 - loss: 0.0752 Epoch
100/100
24/24 ━━━━━━━━━━━━━━━━━━━━0s 3ms/step - accuracy: 0.9831 - loss: 0.0714 1/1
━━━━━━━━━━━━━━━━━━━━0s 279ms/step - accuracy: 0.9667 - loss: 0.0997 Test Loss:
0.0997
Test Accuracy: 0.9667
EX : 3 3. STUDY OF PRETRAINED DEEP NEURAL
NETWORK MODEL FOR IMAGES
Program :
# Import required libraries
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
import numpy as np
import matplotlib.pyplot as plt
# Step 1: Load Pretrained Model (VGG16 in this case)
model = VGG16(weights='imagenet')
# Step 2: Load and Preprocess the Image
img_path = 'your_image_path.jpg' # Replace with the path to your image
img = image.load_img(img_path, target_size=(224, 224)) # Resize image to 224x224
img_array = image.img_to_array(img) # Convert image to numpy array
img_array = np.expand_dims(img_array, axis=0) # Add batch dimension
img_array = preprocess_input(img_array) # Preprocess image for VGG16
# Step 3: Predict the Image Class predictions
= model.predict(img_array)
# Step 4: Decode the Predictions
decoded_predictions = decode_predictions(predictions, top=3)[0]
for i, (imagenet_id, label, score) in enumerate(decoded_predictions):
print(f"{i + 1}: {label} ({score * 100:.2f}%)")
# Step 5: Display the Image
plt.imshow(img)
plt.title(f"Prediction: {decoded_predictions[0][1]} - {decoded_predictions[0][2]*100:.2f}%")
plt.axis('off')
plt.show()
OUTPUT :
1/1 ━━━━━━━━━━━━━━━━━━━━1s 770ms/step 1: banana
(99.85%)
2: pineapple (0.05%)
3: zucchini (0.02%)
EX : 4 4. CNN FOR IMAGE CLASSIFICATION
Program :
# Import libraries
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import cifar10
# Load and preprocess CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Normalize the image values
x_train, x_test = x_train / 255.0, x_test / 255.0
# Create the CNN model
model = models.Sequential([
# First Convolutional Layer
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),
# Second Convolutional Layer
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
# Third Convolutional Layer
layers.Conv2D(64, (3, 3), activation='relu'),
# Flatten the data and add a Dense layer
layers.Flatten(),
layers.Dense(64, activation='relu'),
# Flatten the data and add a Dense layer
layers.Flatten(),
layers.Dense(64, activation='relu'),
# Output layer with 10 classes layers.Dense(10,
activation='softmax')
])
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Train the model
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2) print(f"Test
accuracy: {test_acc}")
# Plot training accuracy
plt.plot(history.history['accuracy'], label='Training accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()
Output :
Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━78s 49ms/step - accuracy: 0.3510 - loss: 1.7526 - val_accuracy:
0.5510 - val_loss: 1.2774
Epoch 2/10
….
..
…
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━73s 47ms/step - accuracy: 0.8050 - loss: 0.5588 - val_accuracy:
0.7163 - val_loss: 0.8717
313/313 - 5s - 16ms/step - accuracy: 0.7163 - loss:
0.8717 Test accuracy: 0.7163000106811523
EX : 5 5. CNN FOR IMAGE SEGMENTATION
Program:
# Import libraries
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
# Step 1: Load and Preprocess the Image
img_path = 'your_image_path.jpg' # Replace with the path to your image
img = image.load_img(img_path, target_size=(128, 128)) # Resize image to 128x128
img_array = image.img_to_array(img) # Convert image to numpy array
img_array = np.expand_dims(img_array, axis=0) # Add batch dimension
img_array = img_array / 255.0 # Normalize pixel values
# Step 2: Build CNN Model for Image Segmentation
model = models.Sequential([
# Convolutional layers for feature extraction
layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(128, 128, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
layers.MaxPooling2D((2, 2)),
# Upsampling to match the original image size
layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same'),
layers.Conv2DTranspose(32, (3, 3), strides=(2, 2), padding='same'),
# Output layer for pixel-wise classification
layers.Conv2D(3, (1, 1), activation='softmax', padding='same') # 3 classes (background,
object 1, object 2)
])
# Step 3: Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Step 4: Make predictions on the input image
segmentation_output = model.predict(img_array)
# Step 5: Display the original image and segmented output
plt.figure(figsize=(12, 6))
# Original image
plt.subplot(1, 2, 1)
plt.imshow(img)
plt.title("Original
Image") plt.axis('off')
# Segmented output (showing class predictions)
plt.subplot(1, 2, 2)
plt.imshow(np.argmax(segmentation_output[0], axis=-1)) # Convert to class labels (argmax for
segmentation)
plt.title("Segmented Image")
plt.axis('off')
plt.show()
Inference:
1. Prepare the Image:
○ Ensure that the image file (e.g., 'your_image_path.jpg') exists at the specified
location on your system.
2. Model Training:
○ The model you’ve built has no pre-trained weights. To get meaningful results,
you'll need to either train the model on labeled segmentation data or load a
pre-trained model for segmentation tasks.
3. Visualize the Results:
○ Once the model is trained or fine-tuned, running the prediction on the input
image will display the segmented regions
OUTPUT:
● Original Image: Displays the original image (e.g., a photo of an object or
scene).
● Segmented Image: The segmented output, with each pixel labeled according to
the predicted class (background, object 1, or object 2), will be displayed with the
mapped colors.
EX : 6 6. RNN FOR VIDEO PROCESSING
Program:
import tensorflow as tf
from tensorflow.keras import layers,
models import numpy as np
import cv2
import os
# Step 1: Load and preprocess video data
def load_video(video_path, frame_size=(64, 64),
max_frames=30): cap = cv2.VideoCapture(video_path)
frames = []
while len(frames) <
max_frames: ret, frame =
cap.read()
if not
ret:
break
frame = cv2.resize(frame, frame_size) # Resize
frame frame = frame / 255.0 # Normalize
frames.append(frame)
cap.release()
frames = np.array(frames)
if len(frames) < max_frames:
# Padding if less frames
padding = np.zeros((max_frames - len(frames),
*frames[0].shape)) frames = np.concatenate
frames = np.concatenate((frames, padding))
return frames
# Sample video path (replace with
your file) video_path =
"sample_video.mp4" video_data =
load_video(video_path
# Step 2: Create dataset
video_sequences = np.expand_dims(video_data, axis=0) # Add
batch dimension labels = np.array([0]) # Example label for one
video
# Step 3: Build RNN model
cnn_model =
models.Sequential([
layers.TimeDistributed(layers.Conv2D(32, (3, 3), activation='relu'), input_shape=(30, 64, 64, 3)),
layers.TimeDistributed(layers.MaxPooling2D((2, 2))),
layers.TimeDistributed(layers.Flatten()),
layers.LSTM(64, return_sequences=False), # Temporal modeling
layers.Dense(32, activation='relu'),
layers.Dense(1, activation='sigmoid') # Example for binary classification
])
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Step 4: Training
# cnn_model.fit(video_sequences, labels, epochs=10) # Example training
# Step 5: Prediction
predictions =
cnn_model.predict(video_sequences)
print("Predictions:", predictions)
OUTPUT:
1. Predictions:
The output is a classification probability for the video sequence, e.g.,
2. Predictions: [[0.85]]
3. Visual Output:
Not directly provided in this code but can be added to visualize feature extraction or
prediction results.
EX : 7 7. IMPLEMENTATION OF DEEP GENERATIVE
MODEL FOR IMAGE EDITING
Program :
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets, utils
from torchvision.models import vgg19
import os
# Define Generator
class Generator(nn.Module):
def init (self, in_channels, out_channels):
super(Generator, self). init ()
self.encoder = nn.Sequential(
self._conv_block(in_channels, 64, 4, 2, 1),
self._conv_block(64, 128, 4, 2, 1),
self._conv_block(128, 256, 4, 2, 1),
self._conv_block(256, 512, 4, 2, 1)
)
self.decoder =
nn.Sequential( elf._deconv_block(512,
256, 4, 2, 1),
self._deconv_block(256, 128, 4, 2, 1),
self._deconv_block(128, 64, 4, 2, 1),
nn.ConvTranspose2d(64, out_channels, kernel_size=4, stride=2, padding=1),
nn.Tanh()
)
def _conv_block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.2, inplace=True)
)
def _deconv_block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding,
bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
# Define Discriminator
class Discriminator(nn.Module):
def init (self, in_channels):
super(Discriminator, self). init ()
self.model = nn.Sequential(
self._conv_block(in_channels, 64, 4, 2, 1),
self._conv_block(64, 128, 4, 2, 1),
self._conv_block(128, 256, 4, 2, 1),
nn.Conv2d(256, 1, kernel_size=4, stride=1, padding=1)
)
def _conv_block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.2, inplace=True)
)
def forward(self, x):
return self.model(x)
# Hyperparameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lr = 2e-4
batch_size = 16
epochs = 100
image_size = 128
in_channels = 3
out_channels = 3
x = self.decoder(x)
return x
# Define Discriminator
class Discriminator(nn.Module):
def init (self, in_channels):
super(Discriminator, self). init ()
self.model = nn.Sequential(
self._conv_block(in_channels, 64, 4, 2, 1),
self._conv_block(64, 128, 4, 2, 1),
self._conv_block(128, 256, 4, 2, 1),
nn.Conv2d(256, 1, kernel_size=4, stride=1, padding=1)
)
def _conv_block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.2, inplace=True)
)
def forward(self, x):
return self.model(x)
# Hyperparameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lr = 2e-4
batch_size = 16
epochs = 100
image_size = 128
in_channels = 3
out_channels = 3
# Data Preparation
transform =
transforms.Compose([ transforms.Resize((image
_size, image_size)), transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
dataset = datasets.ImageFolder(root='path/to/dataset', transform=transform) dataloader =
DataLoader(dataset, batch_size=batch_size, shuffle=True)
# Initialize Models
generator = Generator(in_channels, out_channels).to(device) discriminator
= Discriminator(in_channels + out_channels).to(device)
# Optimizers and Losses
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))
adversarial_loss = nn.BCEWithLogitsLoss()
pixel_loss = nn.L1Loss()
# Training Loop
for epoch in range(epochs):
for i, (input_image, target_image) in enumerate(dataloader):
input_image, target_image = input_image.to(device), target_image.to(device)
real_labels = torch.ones((input_image.size(0), 1), requires_grad=False).to(device)
fake_labels = torch.zeros((input_image.size(0), 1), requires_grad=False).to(device)
# Train Generator
optimizer_G.zero_grad()
generated_image = generator(input_image)
disc_fake = discriminator(torch.cat((input_image, generated_image), dim=1))
g_loss = adversarial_loss(disc_fake, real_labels) + pixel_loss(generated_image,
target_image)
g_loss.backward()
optimizer_G.step()
# Train Discriminator
optimizer_D.zero_grad()
disc_real = discriminator(torch.cat((input_image, target_image), dim=1))
real_loss = adversarial_loss(disc_real, real_labels)
disc_fake = discriminator(torch.cat((input_image, generated_image.detach()), dim=1))
fake_loss = adversarial_loss(disc_fake, fake_labels)
d_loss = (real_loss + fake_loss) / 2
d_loss.backward()
optimizer_D.step()
# Logging
if i % 50 == 0:
print(f"Epoch [{epoch}/{epochs}] Batch {i}/{len(dataloader)} - Loss D: {d_loss.item()},
Loss G: {g_loss.item()}")
# Save Models
torch.save(generator.state_dict(), "generator.pth")
torch.save(discriminator.state_dict(), "discriminator.pth")
print("Training Complete!")
Inference :
1. Image Input Transformation:
The input image is resized to 128x128 pixels, normalized to the range [-1, 1], and
converted into a tensor, making it suitable for deep learning models.
2. Generator Functionality:
The generator takes the input image and learns to modify or transform it into the desired
output image using an encoder-decoder architecture.
3. Discriminator Role:
The discriminator evaluates the generator's output by distinguishing between real images
(ground truth) and generated images. It learns to guide the generator to improve its
output.
4. Loss Calculation:
Two types of losses are used:
○ Adversarial Loss: Ensures the generated images are realistic enough to fool the
discriminator.
○ Pixel Loss (L1 Loss): Ensures the generated image matches the target image at
the pixel level.
5. Training Loop:
The training alternates between improving the generator and the discriminator. The
generator aims to produce convincing image edits, while the discriminator learns to
identify imperfections in the generated images.
Simple Autoencoder-Based Generator :
import torch
import torch.nn as nn
import torchvision.transforms as T
from PIL import Image
# Generator (Autoencoder)
class Generator(nn.Module):
def init (self):
super(). init ()
self.encoder = nn.Sequential(nn.Conv2d(3, 64, 4, 2, 1), nn.ReLU())
self.decoder = nn.Sequential(nn.ConvTranspose2d(64, 3, 4, 2, 1), nn.Tanh())
def forward(self, x):
return self.decoder(self.encoder(x))
# Load image and preprocess
transform = T.Compose([T.Resize((64, 64)), T.ToTensor(), T.Normalize((0.5,), (0.5,))])
image = transform(Image.open("image.jpg")).unsqueeze(0)
# Initialize model, edit image, and save output
generator = Generator()
edited_image = generator(image)
output = T.ToPILImage()((edited_image.squeeze(0) * 0.5 + 0.5).clamp(0, 1))
output.save("edited_image.jpg")