from google.
colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
import os
Root = "/content/drive/MyDrive/Dataset"
os.chdir(Root)
import tensorflow as tf
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import categorical_crossentropy
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D,GlobalAveragePooling2D
from keras.layers import Activation, Dropout, BatchNormalization, Flatten, Dense, AvgPool2D,MaxPool2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
import cv2
data = '/content/drive/MyDrive/Dataset'
No_breast_cancer = '/content/drive/MyDrive/Dataset/train/0'
Yes_breast_cancer = '/content/drive/MyDrive/Dataset/train/1'
dirlist=[No_breast_cancer, Yes_breast_cancer]
classes=['No', 'Yes']
filepaths=[]
labels=[]
for i,j in zip(dirlist, classes):
filelist=os.listdir(i)
for f in filelist:
filepath=os.path.join (i,f)
filepaths.append(filepath)
labels.append(j)
print ('filepaths: ', len(filepaths), ' labels: ', len(labels))
filepaths: 975 labels: 975
Files=pd.Series(filepaths, name='filepaths')
Label=pd.Series(labels, name='labels')
df=pd.concat([Files,Label], axis=1)
df=pd.DataFrame(np.array(df).reshape(975,2), columns = ['filepaths', 'labels'])
df.head()
filepaths labels
0 /content/drive/MyDrive/Dataset/train/0/18536_1... No
1 /content/drive/MyDrive/Dataset/train/0/21827_9... No
2 /content/drive/MyDrive/Dataset/train/0/22528_1... No
3 /content/drive/MyDrive/Dataset/train/0/20657_1... No
4 /content/drive/MyDrive/Dataset/train/0/23419_1... No
Next steps: toggle_off View recommended plots
print(df['labels'].value_counts())
labels
Yes 710
No 265
Name: count, dtype: int64
#visualize images
plt.figure(figsize=(12,8))
for i in range(15):
random = np.random.randint(1,len(df))
plt.subplot(3,5,i+1)
plt.imshow(cv2.imread(df.loc[random,"filepaths"]))
plt.title(df.loc[random, "labels"], size = 15, color = "white")
plt.xticks([])
plt.yticks([])
plt.show()
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, train_size=0.95, random_state=0)
train_new, valid = train_test_split(train, train_size=0.90, random_state=0)
print(f"train set shape: {train_new.shape}")
print(f"test set shape: {test.shape}")
print(f"validation set shape: {valid.shape}")
train set shape: (833, 2)
test set shape: (49, 2)
validation set shape: (93, 2)
train_datagen = ImageDataGenerator(rescale = 1./255.,rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2,
shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, vertical_flip =True)
test_datagen = ImageDataGenerator(rescale = 1.0/255.)
train_gen = train_datagen.flow_from_dataframe(dataframe = train_new,
x_col = 'filepaths', y_col ='labels',
target_size = (224,224), batch_size = 32,
class_mode = 'binary', shuffle = True)
val_gen = train_datagen.flow_from_dataframe(valid,
target_size=(224,224), x_col = 'filepaths', y_col ='labels',
class_mode='binary',
batch_size= 16, shuffle=True)
test_gen = test_datagen.flow_from_dataframe(test,
target_size = (224,224), x_col = 'filepaths', y_col ='labels',
class_mode = 'binary',
batch_size = 16, shuffle = False)
Found 833 validated image filenames belonging to 2 classes.
Found 93 validated image filenames belonging to 2 classes.
Found 49 validated image filenames belonging to 2 classes.
train_gen.class_indices
{'No': 0, 'Yes': 1}
from tensorflow import keras
base_model = keras.applications.ResNet50V2(
weights="imagenet", # Load weights pre-trained on ImageNet.
input_shape=(424, 424, 3),
include_top=False,
) # Do not include the ImageNet classifier at the top.
# Freeze the base_model
base_model.trainable = False
# Create new model on top
inputs = keras.Input(shape=(424, 424, 3))
# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x) # Regularize with dropout
outputs = keras.layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 424, 424, 3)] 0
resnet50v2 (Functional) (None, 14, 14, 2048) 23564800
global_average_pooling2d_1 (None, 2048) 0
(GlobalAveragePooling2D)
dropout_1 (Dropout) (None, 2048) 0
dense_1 (Dense) (None, 1) 2049
=================================================================
Total params: 23566849 (89.90 MB)
Trainable params: 2049 (8.00 KB)
Non-trainable params: 23564800 (89.89 MB)
_________________________________________________________________
callbacks = [
tf.keras.callbacks.ModelCheckpoint("Tumor_classifier_model.h5", save_best_only=True, verbose = 0)
]
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate= 0.0001), metrics=['accuracy'])
history = model.fit(train_gen, validation_data = val_gen, epochs = 5,
callbacks = [callbacks], verbose = 1)
Epoch 1/5
27/27 [==============================] - ETA: 0s - loss: 0.6680 - accuracy: 0.6651/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py
saving_api.save_model(
27/27 [==============================] - 204s 7s/step - loss: 0.6680 - accuracy: 0.6651 - val_loss: 0.6124 - val_accuracy: 0.7097
Epoch 2/5
27/27 [==============================] - 175s 6s/step - loss: 0.6402 - accuracy: 0.6951 - val_loss: 0.5612 - val_accuracy: 0.7527
Epoch 3/5
27/27 [==============================] - 171s 6s/step - loss: 0.6121 - accuracy: 0.7119 - val_loss: 0.5783 - val_accuracy: 0.7204
Epoch 4/5
27/27 [==============================] - 182s 7s/step - loss: 0.5796 - accuracy: 0.7167 - val_loss: 0.5777 - val_accuracy: 0.7527
Epoch 5/5
27/27 [==============================] - 178s 7s/step - loss: 0.5873 - accuracy: 0.7131 - val_loss: 0.5583 - val_accuracy: 0.7527
model.save("model.h5")
/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. T
saving_api.save_model(
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or e
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
from PIL import Image
model_path = "model.h5"
loaded_model = tf.keras.models.load_model(model_path)
# import matplotlib.pyplot as plt
import numpy as np
image = cv2.imread("/content/drive/MyDrive/Dataset/train/0/18984_1193779604_ce0f4e42f05f418d84c7c86a827bbf1f.png")
image_fromarray = Image.fromarray(image, 'RGB')
resize_image = image_fromarray.resize((424, 424))
expand_input = np.expand_dims(resize_image,axis=0)
input_data = np.array(expand_input)
input_data = input_data/255
pred = loaded_model.predict(input_data)
if pred >= 0.5:
print("Yes")
else:
print("No")
WARNING:tensorflow:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.
1/1 [==============================] - 2s 2s/step
Yes
train_gen.class_indices
{'No': 0, 'Yes': 1}