# Introduction to Convolutional Neural Networks

### The MNIST Dataset - Revisited

In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (4,3)  # default figure width, height in inches

In [None]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

In [None]:
# return images of type float32 in the range 0-1
def load_normalized_mnist_data():
    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
    # scale the pixel values to the range 0-1
    train_images = train_images.astype('float32') / 255
    test_images = test_images.astype('float32') / 255
    return (train_images, train_labels), (test_images, test_labels)

In [None]:
(train_images,train_labels), (test_images,test_labels) = load_normalized_mnist_data()

In [None]:
train_images.shape

In [None]:
test_images.shape

In [None]:
train_images.dtype, train_images.min(), train_images.max()

In [None]:
train_labels

In [None]:
# create the one-hot target vectors
train_targets, test_targets = to_categorical(train_labels), to_categorical(test_labels)

In [None]:
train_targets.shape

In [None]:
print(train_targets[7])

In [None]:
train_labels[7]

In [None]:
plt.imshow(train_images[7], cmap='gray');

### A Basic Feedforward Network for MNIST

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

In [None]:
def build_network():
    network = Sequential()
    network.add(Flatten(input_shape=(28,28)))
    network.add(Dense(30, activation='relu', name='hidden'))
    network.add(Dense(10, activation='softmax', name='output'))
    
    network.compile(loss='categorical_crossentropy',
               optimizer='rmsprop',
               metrics=['accuracy'])
    return network

In [None]:
network = build_network()

In [None]:
network.summary()

In [None]:
history = network.fit(train_images, train_targets, epochs=1, batch_size=64)  # epochs=5

In [None]:
network.evaluate(train_images, train_targets)

In [None]:
network.evaluate(test_images, test_targets)

In [None]:
def show_wrong_images(network):
    outputs = network.predict(test_images)
    predictions = [np.argmax(output) for output in outputs]
    wrong = [i for i in range(10000) if predictions[i] != test_labels[i]]
    print(f"Misclassified {len(wrong)} test images out of {len(test_images)}")
    plt.figure(figsize=(12,12))  # (width, height) in inches
    rows, columns = 5, 6
    for i in range(1, columns*rows+1):
        w = random.choice(wrong)
        img = test_images[w]
        correct_label = test_labels[w]
        prediction = predictions[w]
        plt.subplot(rows, columns, i)
        plt.title(f'"{prediction}"  (correct: {correct_label})')
        plt.axis('off')
        plt.imshow(img, cmap='gray')

A random sampling of misclassified images:

In [None]:
show_wrong_images(network)

### A Convolutional Neural Network for MNIST

To feed images to a ConvNet, they must be of shape (height, width, channels), even if the images are grayscale.

In [None]:
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))

In [None]:
train_images.shape

In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D

In [None]:
def build_convnet():
    convnet = Sequential()
    convnet.add(Conv2D(64, (3,3), activation='relu', name='conv1', input_shape=(28,28,1)))
    convnet.add(MaxPooling2D((2,2), name='pool1'))
    convnet.add(Conv2D(64, (3,3), activation='relu', name='conv2'))
    convnet.add(MaxPooling2D((2,2), name='pool2'))
    convnet.add(Conv2D(64, (3,3), activation='relu', name='conv3'))
    convnet.add(Flatten())
    convnet.add(Dense(64, activation='relu', name='hidden'))
    convnet.add(Dense(10, activation='softmax', name='output'))
    
    convnet.compile(loss='categorical_crossentropy',
               optimizer='rmsprop',
               metrics=['accuracy'])
    return convnet

In [None]:
convnet = build_convnet()

In [None]:
convnet.summary()

In [None]:
history = convnet.fit(train_images, train_targets, epochs=5, batch_size=64)

In [None]:
convnet.evaluate(train_images, train_targets)

In [None]:
convnet.evaluate(test_images, test_targets)

In [None]:
def plot_history(history):
    loss_values = history.history['loss']
    accuracy_values = history.history['accuracy']
    epoch_nums = range(1, len(loss_values)+1)
    plt.figure(figsize=(12,4)) # width, height in inches
    plt.subplot(1, 2, 1)
    plt.plot(epoch_nums, loss_values, 'r')
    plt.title("Training loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.subplot(1, 2, 2)
    plt.plot(epoch_nums, accuracy_values, 'b')
    plt.title("Training accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.show()

In [None]:
plot_history(history)

A random sampling of misclassified images:

In [None]:
show_wrong_images(convnet)

### The Structure of ConvNets in More Detail

A very simple case:
* input is a 28 &times; 28 grayscale image
* receptive field size is 3 &times; 3
* convolution layer has only 1 filter (it learns only 1 feature)

The 3 &times; 3 receptive field implies that:
* the feature map is 2 pixels smaller than the input image in each dimension
* the filter consists of 9 weights + 1 bias

In [None]:
cnn = Sequential()
cnn.add(Conv2D(1, (3,3), activation='relu', input_shape=(28,28,1)))
cnn.summary()

The size of the input image doesn't matter.  All units in the convolution layer share the same weights and bias (the "filter") for learning a feature, no matter where the feature occurs in the input image.

In [None]:
cnn = Sequential()
cnn.add(Conv2D(1, (3,3), activation='relu', input_shape=(1000,1000,1)))
cnn.summary()

If the receptive field size is 5 &times; 5, the "kernel" consists of 25 weights + 1 bias, and the feature map is 4 pixels smaller than the input image in each dimension.

In [None]:
cnn = Sequential()
cnn.add(Conv2D(1, (5,5), activation='relu', input_shape=(28,28,1)))
cnn.summary()

With RGB images, the input depth is 3, so a 3 x 3 kernel consists of 27 (9 &times; 3) weights + 1 bias

In [None]:
cnn = Sequential()
cnn.add(Conv2D(1, (3,3), activation='relu', input_shape=(28,28,3)))
cnn.summary()

If we add another filter, the Conv2D layer will learn two independent features, doubling the number of parameters.

In [None]:
cnn = Sequential()
cnn.add(Conv2D(2, (3,3), activation='relu', input_shape=(28,28,3)))
cnn.summary()

We can include as many filters as we like in a layer.  Each *N* x *N* filter has its own kernel (set of weights + bias), which is shared by all the units in the layer.  Each unit receives inputs from its specific *N* &times; *N* patch of the input image, including at all depths of the input image.

In [None]:
cnn = Sequential()
cnn.add(Conv2D(10, (3,3), activation='relu', input_shape=(28,28,3)))
cnn.summary()