In this notebook we will build and train a Convolutional Neural Network for classifying images as food, nature, or people. However, any custom dataset can be used.
To accomplish this, we will use Transfer Learning by reusing the feature extraction layers of an existing Convolutional Neural Network pretrained on ImageNet.
We will use Keras to build our model and will use Tensorflow Datasets and TFRecord files to construct a scalable input pipeline.
import tensorflow as tf
import numpy as np
from tensorflow import keras
import matplotlib .pyplot as plt
import glob
import skimage
import skimage .io
NATURE_PATH = 'nature/'
PEOPLE_PATH = 'people/'
FOOD_PATH = 'food/'
DATA_PATH = './data/'
Preprocess data and write it to several TFRecord files
BytesList = tf .train .BytesList
FloatList = tf .train .FloatList
Int64List = tf .train .Int64List
Feature = tf .train .Feature
Features = tf .train .Features
Example = tf .train .Example
def load_images (root , path ):
imagepaths = glob .glob (root + path + '*' )
images = [skimage .io .imread (path ) for path in imagepaths ]
images = [image [:,:,:3 ] for image in images ] # only RGB
return images
def preprocess_image (image ):
image = tf .constant (image , dtype = tf .float32 )
image = tf .image .resize (image , (224 ,224 ))
image = tf .keras .applications .mobilenet_v2 .preprocess_input (image )
return image
def to_protobuf (image , label ):
image_data = tf .io .serialize_tensor (image )
return Example (
features = Features (
feature = {
'image' : Feature (bytes_list = BytesList (value = [image_data .numpy ()])),
'label' : Feature (int64_list = Int64List (value = [label ]))
def to_tfrecord (data , root , filename ):
with tf .io .TFRecordWriter (root + filename ) as writer :
for instance in data :
writer .write (instance .SerializeToString ())
def preprocess_data (root , path , class_label , output_filename ):
images = load_images (root , path )
images = [preprocess_image (image ) for image in images ]
data = [to_protobuf (image , class_label ) for image in images ]
to_tfrecord (data , root , output_filename )
NATURE_FILENAME = 'nature_instances.tfrecord'
PEOPLE_FILENAME = 'people_instances.tfrecord'
FOOD_FILENAME = 'food_instances.tfrecord'
Creat Input Pipeline/Load Data into Tensorflow Datasets
def parse_tfrecord (tfrecord ):
feature_descriptions = {
"image" : tf .io .FixedLenFeature ([], tf .string , default_value = "" ),
"label" : tf .io .FixedLenFeature ([], tf .int64 , default_value = - 1 )
protobuf = tf .io .parse_single_example (tfrecord , feature_descriptions )
image = tf .io .parse_tensor (protobuf ['image' ], out_type = tf .float32 )
return image , protobuf ['label' ]
dataset_files = tf .data .Dataset .list_files (filepaths , seed = 42 )
n_readers = 3
dataset = dataset_files .interleave (
lambda filepath : tf .data .TFRecordDataset (filepath ),
cycle_length = n_readers ,
num_parallel_calls = n_readers )
dataset = dataset .map (parse_tfrecord , num_parallel_calls = 6 )
test_dataset = dataset .take (60 ).batch (32 )
train_dataset = dataset .skip (60 ).batch (32 ).shuffle (390 ).prefetch (1 )
Quickly, plot some images to verify that there are no problems
plt .figure (figsize = (20 , 20 ))
for (x , y ), index in zip (test_dataset .unbatch ().take (16 ), range (1 , 100 )):
#print(np.amin(x), np.amax(x))
x = (x + 1. ) / 2.
print (y )
plt .subplot (4 , 4 , index )
plt .imshow (x )
test_dataset = dataset .take (60 ).batch (32 ) # Reset the Dataset after iterating over it
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
Create the model using Transfer Learning (Keras Functional API)
base_model = keras .applications .mobilenet_v2 .MobileNetV2 (weights = "imagenet" ,
include_top = False )
avg = keras .layers .GlobalAveragePooling2D ()(base_model .output )
output = keras .layers .Dense (N_CLASSES , activation = "softmax" )(avg )
model = keras .Model (inputs = base_model .input , outputs = output )
for layer in base_model .layers :
layer .trainable = False
model .compile (loss = 'sparse_categorical_crossentropy' ,
optimizer = keras .optimizers .Nadam (0.002 ),
metrics = 'accuracy' )
model .summary ()
WARNING:tensorflow:`input_shape` is undefined or non-square, or `rows` is not in [96, 128, 160, 192, 224]. Weights for input shape (224, 224) will be loaded as the default.
Downloading data from
9412608/9406464 [==============================] - 1s 0us/step
Model: "model_4"
Layer (type) Output Shape Param # Connected to
input_6 (InputLayer) [(None, None, None, 0
Conv1_pad (ZeroPadding2D) (None, None, None, 3 0 input_6[0][0]
Conv1 (Conv2D) (None, None, None, 3 864 Conv1_pad[0][0]
global_average_pooling2d_3 (Glo (None, 1280) 0 out_relu[0][0]
dense_5 (Dense) (None, 3) 3843 global_average_pooling2d_3[0][0]
Total params: 2,261,827
Trainable params: 3,843
Non-trainable params: 2,257,984
import datetime
log_dir = "logs/fit/" + datetime .datetime .now ().strftime ("%Y%m%d-%H%M%S" )
tensorboard = tf .keras .callbacks .TensorBoard (log_dir = log_dir , histogram_freq = 1 , profile_batch = (10 ,10 ))
earlystopping = tf .keras .callbacks .EarlyStopping (patience = 2 , restore_best_weights = True )
history = model .fit (train_dataset , epochs = 10 , validation_data = (test_dataset ),
metrics = [tensorboard , earlystopping ])
Epoch 1/10
13/13 [==============================] - 44s 3s/step - loss: 0.6851 - accuracy: 0.6872 - val_loss: 0.2462 - val_accuracy: 0.9500
Epoch 2/10
13/13 [==============================] - 47s 4s/step - loss: 0.2002 - accuracy: 0.9667 - val_loss: 0.1005 - val_accuracy: 0.9500
Epoch 3/10
13/13 [==============================] - 47s 4s/step - loss: 0.0960 - accuracy: 0.9897 - val_loss: 0.0566 - val_accuracy: 1.0000
Epoch 4/10
13/13 [==============================] - 41s 3s/step - loss: 0.0644 - accuracy: 0.9923 - val_loss: 0.0386 - val_accuracy: 1.0000
Epoch 5/10
13/13 [==============================] - 43s 3s/step - loss: 0.0459 - accuracy: 0.9974 - val_loss: 0.0339 - val_accuracy: 1.0000
Epoch 6/10
13/13 [==============================] - 43s 3s/step - loss: 0.0338 - accuracy: 1.0000 - val_loss: 0.0290 - val_accuracy: 1.0000
Epoch 7/10
13/13 [==============================] - 43s 3s/step - loss: 0.0283 - accuracy: 1.0000 - val_loss: 0.0242 - val_accuracy: 1.0000
Epoch 8/10
13/13 [==============================] - 45s 3s/step - loss: 0.0234 - accuracy: 1.0000 - val_loss: 0.0246 - val_accuracy: 1.0000
Epoch 9/10
13/13 [==============================] - 42s 3s/step - loss: 0.0197 - accuracy: 1.0000 - val_loss: 0.0201 - val_accuracy: 1.0000
Epoch 10/10
13/13 [==============================] - 44s 3s/step - loss: 0.0166 - accuracy: 1.0000 - val_loss: 0.0185 - val_accuracy: 1.0000
loaded_model = keras .models .load_model ("model.h5" )
loaded_model .evaluate (test_dataset ) # Verify
WARNING:tensorflow:Error in loading the saved optimizer state. As a result, your model is starting with a freshly initialized optimizer.
2/2 [==============================] - 1s 312ms/step - loss: 0.0185 - accuracy: 1.0000
[0.0185115784406662, 1.0]
TEST_PATH = 'test/'
DATA_PATH = './data/'
images = load_images (DATA_PATH , TEST_PATH )
images = [preprocess_image (image ) for image in images ]
images = np .array (images )
preds = model .predict (images )
label_preds = np .argmax (preds , axis = 1 )
labels = ['nature' , 'person' , 'food' ]
plt .figure (figsize = (20 , 20 ))
for image , label , probabilities , index in zip (images , label_preds , preds , range (1 ,10 )):
image = (image + 1. ) / 2.
plt .subplot (3 , 3 , index )
plt .imshow (image )
plt .title (labels [label ] + ' ' + str (probabilities ))