KEMBAR78
DL Programs | PDF | Applied Mathematics | Learning
0% found this document useful (0 votes)
29 views36 pages

DL Programs

The document outlines various implementations of neural network models using Keras and TensorFlow for different tasks, including regression for house price prediction, classification for heart disease prediction, CNN for dog/cat classification, object detection, time series prediction using RNN and LSTM, and a Seq2Seq model for machine translation. Each section provides source code examples demonstrating the model architecture, data preprocessing, training, and evaluation. The document serves as a comprehensive guide for implementing various neural network architectures for specific machine learning problems.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
29 views36 pages

DL Programs

The document outlines various implementations of neural network models using Keras and TensorFlow for different tasks, including regression for house price prediction, classification for heart disease prediction, CNN for dog/cat classification, object detection, time series prediction using RNN and LSTM, and a Seq2Seq model for machine translation. Each section provides source code examples demonstrating the model architecture, data preprocessing, training, and evaluation. The document serves as a comprehensive guide for implementing various neural network architectures for specific machine learning problems.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 36

2.

AIM: To implement multi layer perceptron using keras with tensorflow and fine_tune neural
network hyper parameter for regression problem(House Price Prediction).

Sourcecode:

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

import pandas as pd

df=pd.read_csv(r"C:\Users\Administrator.L2133\Desktop\Real_estate.csv")

x=df.iloc[:,:-1]

y=df.iloc[:,-1]

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.05,random_state=0)

model=Sequential([

Dense(64,activation='relu',input_shape=(x_train.shape[1],)),

Dense(64,activation='relu'),

Dense(1)

])

model.compile(optimizer='adam',loss='mean_squared_error')

model.fit(x_train,y_train,epochs=10,batch_size=32,validation_split=0.2)
Output:

loss=model.evaluate(x_test,y_test)

print(f'test loss:{loss}')

Output:

predictions=model.predict(x_test)

print(predictions[:5])

Output:
3.Aim: To implement MLP using keras with TensorFlow for classification problem(Heart
Disease Prediction).

Source code:

import tensorflow as tf

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import keras

from keras.models import Sequential

from keras.layers import Dense

from sklearn.metrics import confusion_matrix

from sklearn.model_selection import train_test_split

data=pd.read_csv(r'C:\Users\Administrator.L2133\Downloads\heart.csv')

data.head()

Output:

data.describe()
Output:
data.isnull().any()

Output:

x=data.iloc[:,:13].values

y=data["target"].values

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0)

from sklearn.preprocessing import StandardScaler

sc=StandardScaler()

x_train=sc.fit_transform(x_train)

x_test=sc.transform(x_test)

classifier=Sequential()

classifier.add(Dense(activation='relu',input_dim=13,units=8,kernel_initializer="uniform"))

classifier.add(Dense(activation='relu',units=14,kernel_initializer="uniform"))

classifier.add(Dense(activation='sigmoid',units=1,kernel_initializer="uniform"))

classifier.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

classifier.fit(x_train,y_train,batch_size=8,epochs=10)
Output:

y_pred=classifier.predict(x_test)

y_pred=(y_pred>0.5)

Output:

cm=confusion_matrix(y_test,y_pred)

cm

Output:

accuracy=(cm[0][0]+cm[1][1])/(cm[0][1]+cm[1][0]+cm[0][0]+cm[1][1])

print(accuracy*100)

Output:
4.Aim: To implement CNN for Dog or Cat Classification problem using keras.

Source code:

import tensorflow as tf

import numpy as np

from tensorflow import keras

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras import layers, models

import matplotlib.pyplot as plt

from keras import Sequential

from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten

from tensorflow.keras.preprocessing import image

image_size = (150, 150)

batch_size = 32

train_datagen = ImageDataGenerator(

rescale=1./255,

rotation_range=40,

width_shift_range=0.2,

height_shift_range=0.2,

shear_range=0.2,

zoom_range=0.2,

horizontal_flip=True,

fill_mode='nearest'

test_datagen = ImageDataGenerator(rescale=1./255)

# Paths to the dataset directories

train_dir = r'C:\Users\Administrator.L2133\Desktop\dogs_vs_cats\train' # Update with the actual


path
validation_dir = r'C:\Users\Administrator.L2133\Desktop\dogs_vs_cats\test' # Update with the actual
path

# Load training data

train_generator = train_datagen.flow_from_directory(

train_dir,

target_size=image_size,

batch_size=batch_size,

class_mode='binary'

# Load validation data

validation_generator = test_datagen.flow_from_directory(

validation_dir,

target_size=image_size,

batch_size=batch_size,

class_mode='binary'

model = models.Sequential([

layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),

layers.MaxPooling2D((2, 2)),

layers.Conv2D(64, (3, 3), activation='relu'),

layers.MaxPooling2D((2, 2)),

layers.Conv2D(128, (3, 3), activation='relu'),

layers.MaxPooling2D((2, 2)),

layers.Flatten(),
layers.Dense(512, activation='relu'),

layers.Dense(1, activation='sigmoid')

])

# Compile the model

model.compile(optimizer='adam',

loss='binary_crossentropy',

metrics=['accuracy'])

# Train the model

history = model.fit(

train_generator,

steps_per_epoch=train_generator.samples // batch_size,

epochs=5,

validation_data=validation_generator,

validation_steps=validation_generator.samples // batch_size

def predict_image(image_path):

img = image.load_img(image_path, target_size=image_size)

img_array = image.img_to_array(img)

img_array = np.expand_dims(img_array, axis=0) # Add batch dimension

img_array /= 255.0 # Rescale the image

prediction = model.predict(img_array)

if prediction[0] > 0.5:

print("This is a Dog!")

else:

print("This is a Cat!")
# Example of predicting a new image

predict_image(r'C:\Users\Administrator.L2133\Desktop\dog.jpg')

Output:
5.Aim: To implement CNN for Object Detection in the given String.

Source code:

import numpy as np

import cv2

from tensorflow.keras import layers, models

from tensorflow.keras.preprocessing import image

import matplotlib.pyplot as plt

# Define a simple CNN model for object detection (bounding boxes + classification)

def create_model():

model = models.Sequential([

layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),

layers.MaxPooling2D(2, 2),

layers.Conv2D(64, (3, 3), activation='relu'),

layers.MaxPooling2D(2, 2),

layers.Conv2D(128, (3, 3), activation='relu'),

layers.MaxPooling2D(2, 2),

layers.Flatten(),

layers.Dense(512, activation='relu'),

# Output layer: 4 for bounding box (x, y, w, h) + 1 for class prediction

layers.Dense(5, activation='sigmoid') # 4 coordinates and 1 class probability

])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

return model
# Helper function to load image and preprocess

def load_image(image_path):

img = image.load_img(image_path, target_size=(150, 150))

img_array = image.img_to_array(img)

img_array = np.expand_dims(img_array, axis=0) # Add batch dimension

img_array /= 255.0 # Normalize

return img_array

# Visualize the bounding box and classification result

def visualize_detection(image_path, model, threshold=0.5):

img_array = load_image(image_path)

# Make predictions: 4 coordinates (bounding box) and 1 class score

predictions = model.predict(img_array)

# Extract bounding box coordinates and class probability

box = predictions[0][:4] # x, y, w, h

class_prob = predictions[0][4] # probability for object presence

if class_prob > threshold: # If the confidence is high enough

# Load image for visualization

img = cv2.imread(image_path)

h, w, _ = img.shape

# Scale bounding box coordinates back to image size

x1 = int(box[0] * w)

y1 = int(box[1] * h)

x2 = int((box[0] + box[2]) * w)

y2 = int((box[1] + box[3]) * h)
# Draw the bounding box

cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)

# Label the object

label = f"Class Probability: {class_prob:.2f}"

cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

# Show the image with bounding box

plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

plt.axis('off')

plt.show()

# Example usage:

# Create and compile the model

model = create_model()

# Dummy training: In a real-world scenario, you should have labeled data (images, boxes, and
classes)

# For simplicity, this is not done here.

# Test with an image (make sure you have a valid image path)

image_path = r'C:\Users\Administrator.L2133\Downloads\dog.jpg' # Update with the path to an


actual image

# Visualize detection on the image

visualize_detection(image_path, model)
Output:
6.Aim: To implement Recurrent Neural Networks for predicating time series data.

Source code:

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import yfinance as yf

from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, SimpleRNN

ticker = 'AAPL'

data = yf.download(ticker, start="2020-01-01", end="2025-01-01")

data = data['Close'].values.reshape(-1, 1)

Output:

scaler = MinMaxScaler(feature_range=(0, 1))

scaled_data = scaler.fit_transform(data)

def create_dataset(data, time_step=60):

X, y = [], []

for i in range(len(data) - time_step - 1):

X.append(data[i:(i + time_step), 0])

y.append(data[i + time_step, 0])

return np.array(X), np.array(y)

X, y = create_dataset(scaled_data)

X = X.reshape(X.shape[0], X.shape[1], 1)

train_size = int(len(X) * 0.8)

X_train, X_test = X[:train_size], X[train_size:]

y_train, y_test = y[:train_size], y[train_size:]


model = Sequential()

model.add(SimpleRNN(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))

model.add(SimpleRNN(units=50, return_sequences=False))

model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train, y_train, epochs=10, batch_size=64)

predictions = model.predict(X_test)

predictions = scaler.inverse_transform(predictions)

Output:

plt.figure(figsize=(5,3))

plt.plot(scaler.inverse_transform(y_test.reshape(-1, 1)), color='blue', label='Real Stock Price')

plt.plot(predictions, color='red', label='Predicted Stock Price')

plt.title(f'{ticker} Stock Price Prediction')

plt.xlabel('Time')

plt.ylabel('Stock Price')

plt.legend()
plt.show()

Output:
7.Aim: To implement Long Short Term Memory(LSTM) for predicating Time Series Data.

Source code:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.optimizers import Adam
data=np.sin(np.linspace(0,100,1000))
data=data.reshape(-1,1)
scaler=MinMaxScaler(feature_range=(0,1))
data_scaled=scaler.fit_transform(data)
def create_dataset(data,time_step=1):
X,y=[],[]
for i in range(len(data)-time_step-1):
X.append(data[i:(i+time_step),0])
y.append(data[i+time_step,0])
return np.array(X),np.array(y)
time_step=60
X,y=create_dataset(data_scaled,time_step)
X=X.reshape(X.shape[0],X.shape[1],1)
train_size=int(len(X)*0.8)
X_train,X_test=X[:train_size],X[train_size:]
y_train,y_test=y[:train_size],y[train_size:]
model=Sequential()
model.add(LSTM(units=50,return_sequences=False,input_shape=(X_train.shape[1],1)))
model.add(Dense(units=1))
model.compile(optimizer=Adam(),loss='mean_squared_error')
model.fit(X_train,y_train,epochs=10,batch_size=32,validation_data=(X_test,y_test))
Output:

predicted_y=model.predict(X_test)

Output:

predicted_y=scaler.inverse_transform(predicted_y)
y_test=scaler.inverse_transform(y_test.reshape(-1,1))
plt.figure(figsize=(10,6))
plt.plot(y_test,label='Actual Data')
plt.plot(predicted_y,label='Predicted Data')
plt.legend()
plt.show()

Output:
8.Aim: To implement a Sequential to Sequential model for neural machine translation keras.

Source code:

import numpy as np

from tensorflow.keras.models import Model

from tensorflow.keras.layers import Input, LSTM, Dense, Embedding

from tensorflow.keras.optimizers import Adam

# Example data

input_texts = ['hello', 'how are you', 'good morning']

target_texts = ['hola', 'como estas', 'buenos dias']

# Create a character-level vocabulary

input_vocab = set(' '.join(input_texts))

target_vocab = set(' '.join(target_texts))

input_vocab_size = len(input_vocab) + 1 # +1 for padding token

target_vocab_size = len(target_vocab) + 1 # +1 for padding token

input_token_index = {char: i + 1 for i, char in enumerate(input_vocab)}

target_token_index = {char: i + 1 for i, char in enumerate(target_vocab)}

reverse_input_token_index = {i: char for char, i in input_token_index.items()}

reverse_target_token_index = {i: char for char, i in target_token_index.items()}

# Prepare input and output sequences

max_input_len = max([len(txt) for txt in input_texts])

max_target_len = max([len(txt) for txt in target_texts])


encoder_input_data = np.zeros((len(input_texts), max_input_len), dtype='float32')

decoder_input_data = np.zeros((len(target_texts), max_target_len), dtype='float32')

decoder_target_data = np.zeros((len(target_texts), max_target_len, target_vocab_size),


dtype='float32')

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):

for t, char in enumerate(input_text):

encoder_input_data[i, t] = input_token_index[char]

for t, char in enumerate(target_text):

decoder_input_data[i, t] = target_token_index[char]

# Build the Seq2Seq model

# Encoder

encoder_inputs = Input(shape=(None,))

encoder_embedding = Embedding(input_vocab_size, 256)(encoder_inputs)

encoder_lstm = LSTM(256, return_state=True)

encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)

encoder_states = [state_h, state_c]

# Decoder

decoder_inputs = Input(shape=(None,))

decoder_embedding = Embedding(target_vocab_size, 256)(decoder_inputs)

decoder_lstm = LSTM(256, return_sequences=True, return_state=True)

decoder_lstm_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

decoder_dense = Dense(target_vocab_size, activation='softmax')

decoder_outputs = decoder_dense(decoder_lstm_outputs)
# Define the model

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model

model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

# Train the model

model.fit([encoder_input_data, decoder_input_data], decoder_target_data,

batch_size=16,

epochs=5,

validation_split=0.2)

# Define the inference models

# Encoder inference model

encoder_model = Model(encoder_inputs, encoder_states)

# Decoder inference model

decoder_state_input_h = Input(shape=(256,))

decoder_state_input_c = Input(shape=(256,))

decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_embedding_inf = Embedding(target_vocab_size, 256)

decoder_lstm_inf = LSTM(256, return_sequences=True, return_state=True)

decoder_dense_inf = Dense(target_vocab_size, activation='softmax')

# Decoder setup for inference

decoder_inputs_inf = Input(shape=(None,))
decoder_emb_inf = decoder_embedding_inf(decoder_inputs_inf)

decoder_lstm_inf_out, state_h_inf, state_c_inf = decoder_lstm_inf(decoder_emb_inf,


initial_state=decoder_states_inputs)

decoder_outputs_inf = decoder_dense_inf(decoder_lstm_inf_out)

# Define the inference model

decoder_model = Model([decoder_inputs_inf] + decoder_states_inputs, [decoder_outputs_inf,


state_h_inf, state_c_inf])

# Decode a sequence

def decode_sequence(input_seq):

states_value = encoder_model.predict(input_seq)

# Start the decoding process with the start token (e.g., '<start>')

target_seq = np.zeros((1, 1)) # Assuming '<start>' token is 0

stop_condition = False

decoded_sentence = ''

while not stop_condition:

output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

# Get the most probable token

sampled_token_index = np.argmax(output_tokens[0, -1, :])

sampled_char = reverse_target_token_index[sampled_token_index]

decoded_sentence += sampled_char

# Exit condition: either hit max length or find stop token.

if sampled_char == '<end>' or len(decoded_sentence) > max_target_len:

stop_condition = True
# Update the target sequence and states

target_seq = np.zeros((1, 1))

target_seq[0, 0] = sampled_token_index

states_value = [h, c]

return decoded_sentence

# Example usage for decoding a sequence

input_seq = encoder_input_data[0:1] # Take the first example for inference

decoded_sentence = decode_sequence(input_seq)

print(f"Decoded sentence: {decoded_sentence}")

Output:
9.Aim: To implement an encoder and decoder RNN model for neural machine translation.

Source code:

import numpy as np

from tensorflow.keras.models import Model

from tensorflow.keras.layers import Input, LSTM, Dense, Embedding

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.preprocessing.sequence import pad_sequences

# Example toy dataset: English to French translation

input_texts = ['hello', 'how are you', 'good morning']

target_texts = ['hola', 'como estas', 'buenos dias']

# Create character-level vocabularies for both input (English) and output (French)

input_vocab = set(' '.join(input_texts))

target_vocab = set(' '.join(target_texts))

input_vocab_size = len(input_vocab) + 1 # +1 for padding token

target_vocab_size = len(target_vocab) + 1 # +1 for padding token

# Tokenize data: create dictionaries for converting characters to indices

input_token_index = {char: i + 1 for i, char in enumerate(input_vocab)}

target_token_index = {char: i + 1 for i, char in enumerate(target_vocab)}

reverse_input_token_index = {i: char for char, i in input_token_index.items()}

reverse_target_token_index = {i: char for char, i in target_token_index.items()}

# Prepare input and output sequences (convert text to integer sequences)

max_input_len = max([len(txt) for txt in input_texts])


max_target_len = max([len(txt) for txt in target_texts])

encoder_input_data = np.zeros((len(input_texts), max_input_len), dtype='float32')

decoder_input_data = np.zeros((len(target_texts), max_target_len), dtype='float32')

decoder_target_data = np.zeros((len(target_texts), max_target_len, target_vocab_size),


dtype='float32')

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):

for t, char in enumerate(input_text):

encoder_input_data[i, t] = input_token_index[char]

for t, char in enumerate(target_text):

decoder_input_data[i, t] = target_token_index[char]

# Build the Encoder-Decoder Model

# Encoder

encoder_inputs = Input(shape=(None,))

encoder_embedding = Embedding(input_vocab_size, 256)(encoder_inputs)

encoder_lstm = LSTM(256, return_state=True)

encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)

encoder_states = [state_h, state_c]

# Decoder

decoder_inputs = Input(shape=(None,))

decoder_embedding = Embedding(target_vocab_size, 256)(decoder_inputs)

decoder_lstm = LSTM(256, return_sequences=True, return_state=True)

decoder_lstm_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

decoder_dense = Dense(target_vocab_size, activation='softmax')

decoder_outputs = decoder_dense(decoder_lstm_outputs)
# Define the model

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model

model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

# Train the model

model.fit([encoder_input_data, decoder_input_data], decoder_target_data,

batch_size=16,

epochs=5,

validation_split=0.2)

# Define the Inference Models

# Encoder inference model

encoder_model = Model(encoder_inputs, encoder_states)

# Decoder inference model

decoder_state_input_h = Input(shape=(256,))

decoder_state_input_c = Input(shape=(256,))

decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_embedding_inf = Embedding(target_vocab_size, 256)

decoder_lstm_inf = LSTM(256, return_sequences=True, return_state=True)

decoder_dense_inf = Dense(target_vocab_size, activation='softmax')

# Decoder setup for inference

decoder_inputs_inf = Input(shape=(None,))

decoder_emb_inf = decoder_embedding_inf(decoder_inputs_inf)
decoder_lstm_inf_out, state_h_inf, state_c_inf = decoder_lstm_inf(decoder_emb_inf,
initial_state=decoder_states_inputs)

decoder_outputs_inf = decoder_dense_inf(decoder_lstm_inf_out)

# Define the inference model

decoder_model = Model([decoder_inputs_inf] + decoder_states_inputs, [decoder_outputs_inf,


state_h_inf, state_c_inf])

# Define a function to decode sequences (generate translations)

def decode_sequence(input_seq):

states_value = encoder_model.predict(input_seq)

# Start the decoding process with the start token (e.g., '<start>')

target_seq = np.zeros((1, 1)) # Assuming '<start>' token is 0

stop_condition = False

decoded_sentence = ''

while not stop_condition:

output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

# Get the most probable token

sampled_token_index = np.argmax(output_tokens[0, -1, :])

sampled_char = reverse_target_token_index[sampled_token_index]

decoded_sentence += sampled_char

# Exit condition: either hit max length or find stop token

if sampled_char == '<end>' or len(decoded_sentence) > max_target_len:

stop_condition = True
# Update the target sequence and states

target_seq = np.zeros((1, 1))

target_seq[0, 0] = sampled_token_index

states_value = [h, c]

return decoded_sentence

# Example usage for decoding a sequence

input_seq = encoder_input_data[0:1] # Take the first example for inference

decoded_sentence = decode_sequence(input_seq)

print(f"Decoded sentence: {decoded_sentence}")

Output:
10.Aim: To implement Multi model emotion recognition using Transformers.

SourceCode:

#Import Necessary Libraries

import numpy as np

from tensorflow.keras import layers, models

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.models import Model

#Create the Transformer Encoder Block

def transformer_encoder(inputs, num_heads, key_dim, ff_dim):

"""

A transformer encoder block that includes multi-head attention,

feed-forward layers, and residual connections with layer normalization.

"""

# Multi-head self-attention layer

attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(inputs,


inputs)

attention_output = layers.LayerNormalization(epsilon=1e-6)(inputs + attention_output) # Residual


connection

# Feed-forward network

ff_output = layers.Dense(ff_dim, activation='relu')(attention_output)

ff_output = layers.Dense(inputs.shape[-1])(ff_output)

output = layers.LayerNormalization(epsilon=1e-6)(attention_output + ff_output) # Residual


connection

return output

#. Create the Model Architecture

def build_multi_modal_motion_recognition_model(video_input_shape, sensor_input_shape,


num_classes,

num_heads=4, key_dim=64, ff_dim=256):


# Video input processing (e.g., CNN feature extractor, assuming pre-processed features)

video_inputs = layers.Input(shape=video_input_shape)

video_transformer_input = layers.Reshape((-1, video_input_shape[1]))(video_inputs) # Reshape


for transformer input

video_transformer_output = transformer_encoder(video_transformer_input, num_heads, key_dim,


ff_dim)

video_transformer_output = layers.GlobalAveragePooling1D()(video_transformer_output)

# Sensor input processing (e.g., accelerometer or gyroscope data)

sensor_inputs = layers.Input(shape=sensor_input_shape)

sensor_transformer_input = layers.Reshape((-1, sensor_input_shape[1]))(sensor_inputs) # Reshape


for transformer input

sensor_transformer_output = transformer_encoder(sensor_transformer_input, num_heads,


key_dim, ff_dim)

sensor_transformer_output = layers.GlobalAveragePooling1D()(sensor_transformer_output)

# Concatenate both modalities (video and sensor)

merged = layers.Concatenate()([video_transformer_output, sensor_transformer_output])

# Fully connected layers for classification

dense = layers.Dense(128, activation='relu')(merged)

output = layers.Dense(num_classes, activation='softmax')(dense)

# Define the model

model = Model(inputs=[video_inputs, sensor_inputs], outputs=output)

return model

# Data Preparation

# Simulated data for demonstration purposes

num_samples = 1000
video_input_shape = (10, 512) # 10 frames, 512 features per frame (CNN-based features)

sensor_input_shape = (30, 6) # 30 timesteps, 6 sensor features (e.g., accelerometer/gyroscope)

# 5 classes for action recognition

num_classes = 5

# Create random sample data (replace with actual dataset)

video_data = np.random.rand(num_samples, *video_input_shape)

sensor_data = np.random.rand(num_samples, *sensor_input_shape)

labels = np.random.randint(0, num_classes, num_samples)

# Split into train and test sets

train_size = int(0.8 * num_samples)

X_video_train, X_video_test = video_data[:train_size], video_data[train_size:]

X_sensor_train, X_sensor_test = sensor_data[:train_size], sensor_data[train_size:]

y_train, y_test = labels[:train_size], labels[train_size:]

#Compile and Train the Model

# Build the model

model = build_multi_modal_motion_recognition_model(video_input_shape, sensor_input_shape,


num_classes)

# Compile the model

model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary of the model

model.summary()

# Train the model

history = model.fit(
[X_video_train, X_sensor_train], y_train,

validation_data=([X_video_test, X_sensor_test], y_test),

batch_size=32,

epochs=10

#Evaluate the Model

# Evaluate the model

loss, accuracy = model.evaluate([X_video_test, X_sensor_test], y_test)

print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Make Predictions

# Example: Predict the action class for a new sample

new_video_data = np.random.rand(1, *video_input_shape) # Replace with actual video data

new_sensor_data = np.random.rand(1, *sensor_input_shape) # Replace with actual sensor data

# Predict the action class

predicted_class = model.predict([new_video_data, new_sensor_data])

print(f"Predicted action class: {np.argmax(predicted_class)}")

Output:

You might also like