Transfer Learning

"How to reuse knowledge infused in a model to perform a similar yet slightly different task"

# importing required libraries

from keras.models import Sequential
import cv2
get_ipython().magic('matplotlib inline')
import matplotlib.pyplot as plt
import numpy as np
import keras
from keras.layers import Dense
import pandas as pd

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import numpy as np
from keras.applications.vgg16 import decode_predictions

from pathlib import Path
mnist_path = Path("mnist")
img_path = Path("mnist/images/")

Generate Dataset from scikit learn dataset

from sklearn import datasets
digits = datasets.load_digits()

for i, im in enumerate(digits['images']):
    cv2.imwrite(Path.joinpath(img_path, f"{i}.png").as_posix(),im*16)

(pd.DataFrame({"labels": digits["target"]})
 .to_csv(Path.joinpath(img_path, "labels.csv").as_posix())
)

nb_img = len(list(img_path.glob('*.png')))
limit = int(nb_img*0.75)
train_range = range(limit)
test_range = range(limit, nb_img)
print(nb_img, limit, train_range, test_range)

1797 1347 range(0, 1347) range(1347, 1797)

train_img=[]
for i in train_range:
    temp_img=image.load_img(
        Path.joinpath(img_path, f"{i}.png").as_posix(),
        target_size=(224,224)
    )
    temp_img=image.img_to_array(temp_img)
    train_img.append(temp_img)

#converting train images to array and applying mean subtraction processing

train_img=np.array(train_img)
train_img=preprocess_input(train_img)
# applying the same procedure with the test dataset

test_img=[]
for i in test_range:
    temp_img=image.load_img(
        Path.joinpath(img_path, f"{i}.png").as_posix(),
        target_size=(224,224)
    )
    temp_img=image.img_to_array(temp_img)
    test_img.append(temp_img)

test_img=np.array(test_img)
test_img=preprocess_input(test_img)

(224, 224, 3)

# loading VGG16 model weights
transferred_model = VGG16(weights='imagenet', include_top=False)
transferred_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
input_8 (InputLayer)         [(None, None, None, 3)]   0
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, None, None, 256)   0
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, None, None, 512)   0
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808
_________________________________________________________________
block5_conv2 (Conv2D)        (None, None, None, 512)   2359808
_________________________________________________________________
block5_conv3 (Conv2D)        (None, None, None, 512)   2359808
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, None, None, 512)   0
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________

re_extract_features = False
if re_extract_features:
    # Extracting features from the train dataset using the VGG16 pre-trained model
    features_train=transferred_model.predict(train_img)
    # Extracting features from the train dataset using the VGG16 pre-trained model
    features_test=transferred_model.predict(test_img)
else:
    import h5py
    with h5py.File("mnist/mnist_features.hdf5", "r") as f:
        features_train = np.array(f["features/train"])
        features_test = np.array(f["features/test"])
        print(features_train.shape, features_test.shape)

(1347, 7, 7, 512) (450, 7, 7, 512)

Store the model and the features

features_train.shape, features_test.shape

((1347, 7, 7, 512), (450, 7, 7, 512))

import h5py

with h5py.File("mnist/mnist_features.hdf5", "w") as f:
    dset_train = f.create_dataset("features/train", data=features_train)
    dset_test = f.create_dataset("features/test", data=features_test)

Note: Ajouter une phase de mise en évidence des features #explainability

Adapt features to new MLP

labels = pd.read_csv(Path.joinpath(img_path, "labels.csv").as_posix(), index_col=0)
labels

	labels
0	0
1	1
2	2
3	3
4	4
...	...
1792	9
1793	0
1794	8
1795	9
1796	8

# flattening the layers to conform to MLP input (N, 7, 7, 512) --> (N, 25088)
train_x=features_train.reshape(features_train.shape[0], 25088)

# converting target variable to array
train_y=labels["labels"].values[train_range]
# performing one-hot encoding for the target variable
train_y=pd.get_dummies(train_y)
train_y=np.array(train_y)
# creating training and validation set
print("Training set", train_x.shape, train_y.shape)

# flattening the layers to conform to MLP input (N, 7, 7, 512) --> (N, 25088)
test_x=features_test.reshape(features_test.shape[0], 25088)

# converting target variable to array
test_y=labels["labels"].values[test_range]
# performing one-hot encoding for the target variable
test_y=pd.get_dummies(test_y)
test_y=np.array(test_y)
# creating testing and validation set
print("Testing set", test_x.shape, test_y.shape)

Training set (1347, 25088) (1347, 10)
Testing set (450, 25088) (450, 10)

Create the MLP model

from keras.layers import Dense, Activation
model=Sequential()

model.add(Dense(1000, input_dim=25088, activation='relu',kernel_initializer='uniform'))
keras.layers.core.Dropout(0.3, noise_shape=None, seed=None)

model.add(Dense(500,input_dim=1000,activation='sigmoid'))
keras.layers.core.Dropout(0.4, noise_shape=None, seed=None)

model.add(Dense(150,input_dim=500,activation='sigmoid'))
keras.layers.core.Dropout(0.2, noise_shape=None, seed=None)

model.add(Dense(units=10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
model.summary()

Train the new model

model.fit(
    train_x,
    train_y,
    epochs=20,
    batch_size=128,
    validation_data=(test_x,test_y),
)

Epoch 1/20
11/11 [==============================] - 2s 117ms/step - loss: 2.3253 - accuracy: 0.1903 - val_loss: 1.8503 - val_accuracy: 0.7089
Epoch 2/20
11/11 [==============================] - 1s 101ms/step - loss: 1.7412 - accuracy: 0.7974 - val_loss: 1.3985 - val_accuracy: 0.8378
Epoch 3/20
11/11 [==============================] - 1s 103ms/step - loss: 1.2646 - accuracy: 0.9024 - val_loss: 0.9943 - val_accuracy: 0.9289
Epoch 4/20
11/11 [==============================] - 1s 103ms/step - loss: 0.8349 - accuracy: 0.9636 - val_loss: 0.6862 - val_accuracy: 0.9378
Epoch 5/20
11/11 [==============================] - 1s 105ms/step - loss: 0.5149 - accuracy: 0.9812 - val_loss: 0.4834 - val_accuracy: 0.9422
Epoch 6/20
11/11 [==============================] - 1s 104ms/step - loss: 0.3083 - accuracy: 0.9904 - val_loss: 0.3589 - val_accuracy: 0.9356
Epoch 7/20
11/11 [==============================] - 1s 101ms/step - loss: 0.1978 - accuracy: 0.9961 - val_loss: 0.2829 - val_accuracy: 0.9489
Epoch 8/20
11/11 [==============================] - 1s 100ms/step - loss: 0.1226 - accuracy: 0.9994 - val_loss: 0.2266 - val_accuracy: 0.9533
Epoch 9/20
11/11 [==============================] - 1s 101ms/step - loss: 0.0802 - accuracy: 1.0000 - val_loss: 0.2048 - val_accuracy: 0.9556
Epoch 10/20
11/11 [==============================] - 1s 102ms/step - loss: 0.0559 - accuracy: 1.0000 - val_loss: 0.1785 - val_accuracy: 0.9578
Epoch 11/20
11/11 [==============================] - 1s 103ms/step - loss: 0.0417 - accuracy: 1.0000 - val_loss: 0.1694 - val_accuracy: 0.9578
Epoch 12/20
11/11 [==============================] - 1s 105ms/step - loss: 0.0323 - accuracy: 1.0000 - val_loss: 0.1577 - val_accuracy: 0.9622
Epoch 13/20
11/11 [==============================] - 1s 103ms/step - loss: 0.0264 - accuracy: 1.0000 - val_loss: 0.1556 - val_accuracy: 0.9578
Epoch 14/20
11/11 [==============================] - 1s 105ms/step - loss: 0.0216 - accuracy: 1.0000 - val_loss: 0.1488 - val_accuracy: 0.9622
Epoch 15/20
11/11 [==============================] - 1s 105ms/step - loss: 0.0184 - accuracy: 1.0000 - val_loss: 0.1487 - val_accuracy: 0.9600
Epoch 16/20
11/11 [==============================] - 1s 100ms/step - loss: 0.0156 - accuracy: 1.0000 - val_loss: 0.1480 - val_accuracy: 0.9556
Epoch 17/20
11/11 [==============================] - 1s 100ms/step - loss: 0.0135 - accuracy: 1.0000 - val_loss: 0.1480 - val_accuracy: 0.9578
Epoch 18/20
11/11 [==============================] - 1s 102ms/step - loss: 0.0119 - accuracy: 1.0000 - val_loss: 0.1465 - val_accuracy: 0.9578
Epoch 19/20
11/11 [==============================] - 1s 102ms/step - loss: 0.0104 - accuracy: 1.0000 - val_loss: 0.1461 - val_accuracy: 0.9578
Epoch 20/20
11/11 [==============================] - 1s 101ms/step - loss: 0.0092 - accuracy: 1.0000 - val_loss: 0.1449 - val_accuracy: 0.9556

predict_y = model.predict(test_x)

np.argmax(predict_y, axis=1) == labels.values[test_range]

array([[ True, False,  True, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       [ True, False,  True, ..., False, False, False],
       ...,
       [False, False, False, ...,  True, False,  True],
       [False, False, False, ..., False,  True, False],
       [False, False, False, ...,  True, False,  True]])

Approach 2: append new layers and freeze bottom layers of VGG for training

vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))

# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in vgg_model.layers])

# Getting output tensor of the last VGG layer that we want to include
x = layer_dict['block5_pool'].output

# Adding new layers
x = Flatten()(x)
x = Dense(1000, input_dim=25088, activation='relu',kernel_initializer='uniform')(x)
x = keras.layers.core.Dropout(0.3, noise_shape=None, seed=None)(x)
x = Dense(500,input_dim=1000,activation='sigmoid')(x)
x = keras.layers.core.Dropout(0.4, noise_shape=None, seed=None)(x)
x = Dense(150,input_dim=500,activation='sigmoid')(x)
x = keras.layers.core.Dropout(0.2, noise_shape=None, seed=None)(x)
x = Dense(units=10)(x)
x = Activation('softmax')(x)


# Creating new model. Please note that this is NOT a Sequential() model.
from keras.models import Model
custom_model = Model(inputs=vgg_model.input, outputs=x)

# Make sure that the pre-trained bottom layers are not trainable
for layer in custom_model.layers:
    if layer.name in layer_dict:
        layer.trainable = False

# Do not forget to compile it
custom_model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])

custom_model.summary()

Model: "model_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
input_23 (InputLayer)        [(None, 224, 224, 3)]     0
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0
_________________________________________________________________
flatten_7 (Flatten)          (None, 25088)             0
_________________________________________________________________
dense_37 (Dense)             (None, 1000)              25089000
_________________________________________________________________
dropout_24 (Dropout)         (None, 1000)              0
_________________________________________________________________
dense_38 (Dense)             (None, 500)               500500
_________________________________________________________________
dropout_25 (Dropout)         (None, 500)               0
_________________________________________________________________
dense_39 (Dense)             (None, 150)               75150
_________________________________________________________________
dropout_26 (Dropout)         (None, 150)               0
_________________________________________________________________
dense_40 (Dense)             (None, 10)                1510
_________________________________________________________________
activation_8 (Activation)    (None, 10)                0
=================================================================
Total params: 40,380,848
Trainable params: 25,666,160
Non-trainable params: 14,714,688
_________________________________________________________________

Train the new model

custom_model.fit(
    train_img,
    train_y,
    epochs=20,
    batch_size=128,
    validation_data=(test_img,test_y),
)

Epoch 1/20
11/11 [==============================] - 122s 11s/step - loss: 2.4470 - accuracy: 0.1130 - val_loss: 2.1486 - val_accuracy: 0.2711
Epoch 2/20
11/11 [==============================] - 136s 13s/step - loss: 2.2192 - accuracy: 0.1839 - val_loss: 1.9199 - val_accuracy: 0.6311
Epoch 3/20
11/11 [==============================] - 129s 12s/step - loss: 1.9635 - accuracy: 0.3612 - val_loss: 1.4907 - val_accuracy: 0.7600
Epoch 4/20
11/11 [==============================] - 128s 12s/step - loss: 1.5639 - accuracy: 0.6354 - val_loss: 1.0121 - val_accuracy: 0.8244
Epoch 5/20
11/11 [==============================] - 128s 12s/step - loss: 1.0947 - accuracy: 0.7856 - val_loss: 0.6528 - val_accuracy: 0.8978
Epoch 6/20
11/11 [==============================] - 618s 61s/step - loss: 0.7248 - accuracy: 0.8800 - val_loss: 0.4142 - val_accuracy: 0.9244
Epoch 7/20
11/11 [==============================] - 124s 12s/step - loss: 0.4343 - accuracy: 0.9431 - val_loss: 0.3164 - val_accuracy: 0.9244
Epoch 8/20
11/11 [==============================] - 127s 12s/step - loss: 0.2650 - accuracy: 0.9688 - val_loss: 0.2276 - val_accuracy: 0.9467
Epoch 9/20
11/11 [==============================] - 132s 12s/step - loss: 0.1566 - accuracy: 0.9865 - val_loss: 0.1752 - val_accuracy: 0.9600
Epoch 10/20
11/11 [==============================] - 146s 13s/step - loss: 0.1018 - accuracy: 0.9874 - val_loss: 0.2028 - val_accuracy: 0.9511
Epoch 11/20
11/11 [==============================] - 138s 13s/step - loss: 0.0771 - accuracy: 0.9946 - val_loss: 0.1748 - val_accuracy: 0.9511
Epoch 12/20
11/11 [==============================] - 137s 13s/step - loss: 0.0557 - accuracy: 0.9978 - val_loss: 0.1440 - val_accuracy: 0.9600
Epoch 13/20
11/11 [==============================] - 137s 13s/step - loss: 0.0375 - accuracy: 1.0000 - val_loss: 0.1549 - val_accuracy: 0.9533
Epoch 14/20
11/11 [==============================] - 132s 12s/step - loss: 0.0300 - accuracy: 0.9993 - val_loss: 0.1574 - val_accuracy: 0.9578
Epoch 15/20
11/11 [==============================] - 132s 12s/step - loss: 0.0254 - accuracy: 1.0000 - val_loss: 0.1468 - val_accuracy: 0.9511
Epoch 16/20
11/11 [==============================] - 132s 12s/step - loss: 0.0222 - accuracy: 1.0000 - val_loss: 0.1399 - val_accuracy: 0.9578
Epoch 17/20
11/11 [==============================] - 133s 12s/step - loss: 0.0167 - accuracy: 1.0000 - val_loss: 0.1383 - val_accuracy: 0.9600
Epoch 18/20
11/11 [==============================] - 133s 12s/step - loss: 0.0151 - accuracy: 1.0000 - val_loss: 0.1463 - val_accuracy: 0.9489
Epoch 19/20
11/11 [==============================] - 131s 12s/step - loss: 0.0146 - accuracy: 1.0000 - val_loss: 0.1487 - val_accuracy: 0.9578
Epoch 20/20
11/11 [==============================] - 126s 12s/step - loss: 0.0132 - accuracy: 1.0000 - val_loss: 0.1502 - val_accuracy: 0.9556





<tensorflow.python.keras.callbacks.History at 0x1741d5160>

custom_model.save("mnist/custom_model.h5", save_format="h5")

custom_model = keras.models.load_model("mnist/custom_model.h5")

import matplotlib.pyplot as plt
examples = [4, 150, 1500, 1689]
for ex in examples:
    im = image.load_img(
        Path.joinpath(img_path, f"{ex}.png").as_posix(),
        target_size=(224,224)
    )
    plt.figure()
    plt.imshow(im)
    plt.show()
    y = custom_model.predict(np.array([image.img_to_array(im)]))
    print(np.argmax(y))
    print("---")

png

4
---

png

0
---

png

1
---

png

2
---