Tensorflow / Keras – A fast non-introduction

Top

This is a fast overview how to get an MNIST example running under TF Keras

If you just want to start with Tensorflow / Keras (especially if it is a scientific project), then you want to reconsider using Keras. In this case please check (& use) PyTorch.

Questions to David Rotermund

Data loader / Data generator

   
keras.utils.Sequence “Base object for fitting to a sequence of data, such as a dataset.”
tf.keras.utils.to_categorical “Converts a class vector (integers) to binary class matrix.”

Basic

DataGenerator_no_dataugmentation.py

from tensorflow import keras
import numpy as np


class DataGenerator(keras.utils.Sequence):
    def __init__(
        self,
        train: bool = True,
        size_of_batch: int = 32,
        number_of_classes: int = 10,
        do_shuffle: bool = True,
    ) -> None:
        super(DataGenerator, self).__init__()

        if train is True:
            self.pattern_storage: np.ndarray = np.load("./train_pattern_storage.npy")
            self.label_storage: np.ndarray = np.load("./train_label_storage.npy")
        else:
            self.pattern_storage = np.load("./test_pattern_storage.npy")
            self.label_storage = np.load("./test_label_storage.npy")

        self.pattern_storage = self.pattern_storage.astype(np.float32)
        self.pattern_storage /= np.max(self.pattern_storage)

        self.dimensions: tuple[int, int] = (
            self.pattern_storage.shape[1],
            self.pattern_storage.shape[2],
        )

        # How many pattern are there?
        self.number_of_pattern: int = self.label_storage.shape[0]

        self.size_of_batch: int = size_of_batch

        self.number_of_classes: int = number_of_classes
        self.do_shuffle: bool = do_shuffle

        if self.pattern_storage.ndim == 3:
            self.number_of_channel: int = 1
        else:
            self.number_of_channel = self.pattern_storage.shape[3]

        self.available_indices: np.ndarray = np.arange(self.number_of_pattern)

        self.on_epoch_end()

    def on_epoch_end(self) -> None:
        self.available_indices = np.arange(self.number_of_pattern)

        if self.do_shuffle is True:
            np.random.shuffle(self.available_indices)

    def __getitem__(self, index: int) -> tuple[np.ndarray, np.ndarray]:
        selected_indices: np.ndarray = self.available_indices[
            index * self.size_of_batch : (index + 1) * self.size_of_batch
        ]
        image, target = self.__data_generation(selected_indices)
        return image, target

    def __data_generation(
        self, list_of_indice: np.ndarray
    ) -> tuple[np.ndarray, np.ndarray]:
        image = np.empty(
            (self.size_of_batch, *self.dimensions, self.number_of_channel),
            dtype=np.float32,
        )
        target = np.empty((self.size_of_batch), dtype=int)

        for i in range(0, len(list_of_indice)):

            if self.pattern_storage.ndim == 3:
                image[i, :, :, 0] = self.pattern_storage[
                    self.available_indices[list_of_indice[i]], :, :
                ]
            else:
                image[i, :, :, :] = self.pattern_storage[
                    self.available_indices[list_of_indice[i]], :, :, :
                ]

            target[i] = self.label_storage[self.available_indices[list_of_indice[i]]]

        return image, keras.utils.to_categorical(
            target, num_classes=self.number_of_classes
        )

    def __len__(self):
        return int(np.floor(self.number_of_pattern / self.size_of_batch))


if __name__ == "__main__":
    pass

With data augmentation

To the pre-processing chain self.data_augmentation you can add other preprocessing layers which are then applied to the input before given to the network.

DataGenerator.py

from tensorflow import keras
import numpy as np


class DataGenerator(keras.utils.Sequence):
    def __init__(
        self,
        train: bool = True,
        size_of_batch: int = 32,
        number_of_classes: int = 10,
        do_shuffle: bool = True,
    ) -> None:
        super(DataGenerator, self).__init__()

        if train is True:
            self.pattern_storage: np.ndarray = np.load("./train_pattern_storage.npy")
            self.label_storage: np.ndarray = np.load("./train_label_storage.npy")
        else:
            self.pattern_storage = np.load("./test_pattern_storage.npy")
            self.label_storage = np.load("./test_label_storage.npy")

        self.pattern_storage = self.pattern_storage.astype(np.float32)
        self.pattern_storage /= np.max(self.pattern_storage)

        self.dimensions: tuple[int, int] = (
            self.pattern_storage.shape[1],
            self.pattern_storage.shape[2],
        )
        reduction: tuple[int, int] = (4, 4)

        if train is True:
            self.data_augmentation = keras.Sequential(
                [
                    keras.layers.RandomCrop(
                        height=self.dimensions[0] - reduction[0],
                        width=self.dimensions[1] - reduction[1],
                    ),
                ]
            )
        else:
            self.data_augmentation = keras.Sequential(
                [
                    keras.layers.CenterCrop(
                        height=self.dimensions[0] - reduction[0],
                        width=self.dimensions[1] - reduction[1],
                    ),
                ]
            )

        # How many pattern are there?
        self.number_of_pattern: int = self.label_storage.shape[0]

        self.size_of_batch: int = size_of_batch

        self.number_of_classes: int = number_of_classes
        self.do_shuffle: bool = do_shuffle

        if self.pattern_storage.ndim == 3:
            self.number_of_channel: int = 1
        else:
            self.number_of_channel = self.pattern_storage.shape[3]

        self.available_indices: np.ndarray = np.arange(self.number_of_pattern)

        self.on_epoch_end()

    def on_epoch_end(self) -> None:
        self.available_indices = np.arange(self.number_of_pattern)

        if self.do_shuffle is True:
            np.random.shuffle(self.available_indices)

    def __getitem__(self, index: int) -> tuple[np.ndarray, np.ndarray]:
        selected_indices: np.ndarray = self.available_indices[
            index * self.size_of_batch : (index + 1) * self.size_of_batch
        ]
        image, target = self.__data_generation(selected_indices)
        return image, target

    def __data_generation(
        self, list_of_indice: np.ndarray
    ) -> tuple[np.ndarray, np.ndarray]:
        image = np.empty(
            (self.size_of_batch, *self.dimensions, self.number_of_channel),
            dtype=np.float32,
        )
        target = np.empty((self.size_of_batch), dtype=int)

        for i in range(0, len(list_of_indice)):

            if self.pattern_storage.ndim == 3:
                image[i, :, :, 0] = self.pattern_storage[
                    self.available_indices[list_of_indice[i]], :, :
                ]
            else:
                image[i, :, :, :] = self.pattern_storage[
                    self.available_indices[list_of_indice[i]], :, :, :
                ]

            target[i] = self.label_storage[self.available_indices[list_of_indice[i]]]

        image = self.data_augmentation(image)
        return image, keras.utils.to_categorical(
            target, num_classes=self.number_of_classes
        )

    def __len__(self):
        return int(np.floor(self.number_of_pattern / self.size_of_batch))


if __name__ == "__main__":
    pass

Train an example MNIST network

   
tf.keras.backend.clear_session “Resets all state generated by Keras.”
tf.keras.Sequential “Sequential groups a linear stack of layers into a tf.keras.Model.”
network.add() “Adds a layer instance on top of the layer stack.”
tf.keras.layers.Conv2D “2D convolution layer (e.g. spatial convolution over images).”
tf.keras.layers.MaxPool2D “Max pooling operation for 2D spatial data.”
tf.keras.layers.Flatten “Flattens the input. Does not affect the batch size.”
tf.keras.layers.Dense “Just your regular densely-connected NN layer.”
network.compile() “Configures the model for training.”
tf.keras.metrics.categorical_crossentropy “Computes the categorical crossentropy loss.”
tf.keras.optimizers.Adam “Optimizer that implements the Adam algorithm.”
network.fit() Trains the model for a fixed number of epochs (iterations on a dataset).
network.summary() “Prints a string summary of the network.”
network.save() “Saves the model to Tensorflow SavedModel or a single HDF5 file.”

Parameters for the layers:

   
padding “One of “valid”, “same” or “causal” (case-insensitive). “valid” means no padding. “same” results in padding with zeros evenly to the left/right or up/down of the input such that output has the same height/width dimension as the input. “causal” results in causal (dilated) convolutions, e.g. output[t] does not depend on input[t+1:]. “
use_bias “Boolean, whether the layer uses a bias vector.”
activation “Activation function to use. If you don’t specify anything, no activation is applied (see keras.activations).”
data_format ” A string, one of channels_last (default) or channels_first.”

keras_learn.py

from tensorflow import keras
from DataGenerator import DataGenerator

epoch_max: int = 50
number_of_classes: int = 10
size_of_batch_train: int = 100

train_data = DataGenerator(
    train=True,
    size_of_batch=size_of_batch_train,
    number_of_classes=number_of_classes,
    do_shuffle=True,
)

number_of_channels: int = train_data.number_of_channel
input_dimensions = train_data.dimensions
number_of_pattern_train = train_data.number_of_pattern

number_of_output_channels_conv1: int = 32
number_of_output_channels_conv2: int = 64
number_of_neurons_flatten1: int = 1024

kernel_size_conv1: tuple[int, int] = (5, 5)
kernel_size_pool1: tuple[int, int] = (2, 2)
kernel_size_conv2: tuple[int, int] = (5, 5)
kernel_size_pool2: tuple[int, int] = (2, 2)

stride_conv1: tuple[int, int] = (1, 1)
stride_pool1: tuple[int, int] = (2, 2)
stride_conv2: tuple[int, int] = (1, 1)
stride_pool2: tuple[int, int] = (2, 2)


keras.backend.clear_session()

network = keras.Sequential()

# Conv 1
network.add(
    keras.layers.Conv2D(
        number_of_output_channels_conv1,
        kernel_size=kernel_size_conv1,
        activation="relu",
        input_shape=(input_dimensions[0], input_dimensions[1], number_of_channels),
        padding="valid",
        strides=stride_conv1,
        data_format="channels_last",
        use_bias=True,
    )
)

# Pool 1
network.add(
    keras.layers.MaxPooling2D(
        pool_size=kernel_size_pool1,
        padding="valid",
        strides=stride_pool1,
        data_format="channels_last",
    )
)

# Conv 2
network.add(
    keras.layers.Conv2D(
        number_of_output_channels_conv2,
        kernel_size=kernel_size_conv2,
        activation="relu",
        padding="valid",
        strides=stride_conv2,
        data_format="channels_last",
        use_bias=True,
    )
)

# Pool 2
network.add(
    keras.layers.MaxPooling2D(
        pool_size=kernel_size_pool2,
        padding="valid",
        strides=stride_pool2,
        data_format="channels_last",
    )
)

# Flatten
network.add(keras.layers.Flatten(data_format="channels_last"))

# Full layer
network.add(
    keras.layers.Dense(number_of_neurons_flatten1, activation="relu", use_bias=True)
)

# Output layer
network.add(keras.layers.Dense(number_of_classes, activation="softmax"))

network.compile(
    loss=keras.losses.categorical_crossentropy,
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)

for epoch_id in range(0, epoch_max):
    print(f"Epoch: {epoch_id} of {epoch_max - 1}")
    network.fit(x=train_data)

    network.summary()
    network.save("Model_" + str(epoch_id) + ".h5")

Test the example network performance

   
tf.keras.models.load_model “Loads a model saved via model.save().”
network.evaluate() “Returns the loss value & metrics values for the model in test mode.”

keras_test.py

from tensorflow import keras
from DataGenerator import DataGenerator

number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49

test_data = DataGenerator(
    train=False,
    size_of_batch=size_of_batch_test,
    number_of_classes=number_of_classes,
    do_shuffle=False,
)

keras.backend.clear_session()

network = keras.models.load_model("./Model_" + str(model_id) + ".h5")

test_loss, test_acc = network.evaluate(x=test_data)

print(f"Correct: {test_acc * 100.0:.2f}%")

How to extract the activities from the network

from tensorflow import keras
from DataGenerator import DataGenerator
import numpy as np

number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49
pattern_batch_id: int = 0
pattern_id: int = 42

test_data = DataGenerator(
    train=False,
    size_of_batch=size_of_batch_test,
    number_of_classes=number_of_classes,
    do_shuffle=False,
)

keras.backend.clear_session()

network = keras.models.load_model("./Model_" + str(model_id) + ".h5")

image, target = test_data.__getitem__(pattern_batch_id)
the_target = target[pattern_id]

print("Layer 1 (Conv1)")
input_0 = image[pattern_id : pattern_id + 1, :, :, :]
output_0 = network.layers[0](input_0)

print("Input Shape:")
print(input_0.shape)
print("Output Shape:")
print(output_0.numpy().shape)
print("")

print("Layer 2 (Pool1)")
input_1 = output_0
output_1 = network.layers[1](input_1)

print("Input Shape:")
print(input_1.numpy().shape)
print("Output Shape:")
print(output_1.numpy().shape)
print("")

print("Layer 3 (Conv2)")
input_2 = output_1
output_2 = network.layers[2](input_2)

print("Input Shape:")
print(input_2.numpy().shape)
print("Output Shape:")
print(output_2.numpy().shape)
print("")

print("Layer 4 (Pool2)")
input_3 = output_2
output_3 = network.layers[3](input_3)

print("Input Shape:")
print(input_3.numpy().shape)
print("Output Shape:")
print(output_3.numpy().shape)
print("")

print("Layer 5 (Flatten)")
input_4 = output_3
output_4 = network.layers[4](input_4)

print("Input Shape:")
print(input_4.numpy().shape)
print("Output Shape:")
print(output_4.numpy().shape)
print("")

print("Layer 6 (Full)")
input_5 = output_4
output_5 = network.layers[5](input_5)

print("Input Shape:")
print(input_5.numpy().shape)
print("Output Shape:")
print(output_5.numpy().shape)
print("")

print("Layer 7 (Output)")
input_6 = output_5
output_6 = network.layers[6](input_6)

print("Input Shape:")
print(input_6.numpy().shape)
print("Output Shape:")
print(output_6.numpy().shape)
print("")

print("\nEstimation")
print(np.round(output_6.numpy(), 4))
print("Strongest reponse is at " + str(np.argmax(output_6.numpy())))
print("Correct output is " + str(np.argmax(the_target)))

Extracting weight and bias

Here is one way to extract the weights and bias of the whole network. Alternatively you can use get_weights from tf.keras.layers.Layer in combination with get_layer of tf.keras.Sequential.

from tensorflow import keras
from DataGenerator import DataGenerator

number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49
pattern_batch_id: int = 0
pattern_id: int = 42

test_data = DataGenerator(
    train=False,
    size_of_batch=size_of_batch_test,
    number_of_classes=number_of_classes,
    do_shuffle=False,
)

keras.backend.clear_session()

network = keras.models.load_model("./Model_" + str(model_id) + ".h5")

weights_bias = network.get_weights()

counter_layer: int = 0
for i in range(0, len(weights_bias), 2):
    print("Layer " + str(counter_layer) + " weights_bias position: " + str(i) + " =>")
    print(weights_bias[i].shape)
    counter_layer += 1

print("")

counter_layer = 0
for i in range(1, len(weights_bias), 2):
    print("Bias " + str(counter_layer) + " weights_bias position: " + str(i) + " =>")
    print(weights_bias[i].shape)
    counter_layer += 1

Type of layers

Reduced list with the most relevant network layers

   
Activation Applies an activation function to an output.
AveragePooling1D Average pooling for temporal data.
AveragePooling2D Average pooling operation for spatial data.
AveragePooling3D Average pooling operation for 3D data (spatial or spatio-temporal).
BatchNormalization Layer that normalizes its inputs.
Conv1D 1D convolution layer (e.g. temporal convolution).
Conv2D 2D convolution layer (e.g. spatial convolution over images).
Conv3D 3D convolution layer (e.g. spatial convolution over volumes).
Dense Just your regular densely-connected NN layer.
Dropout Applies Dropout to the input.
Flatten Flattens the input. Does not affect the batch size.
MaxPooling1D Max pooling operation for 1D temporal data.
MaxPooling2D Max pooling operation for 2D spatial data.
MaxPooling3D Max pooling operation for 3D data (spatial or spatio-temporal).
SpatialDropout1D Spatial 1D version of Dropout.
SpatialDropout2D Spatial 2D version of Dropout.
SpatialDropout3D Spatial 3D version of Dropout.
ZeroPadding1D Zero-padding layer for 1D input (e.g. temporal sequence).
ZeroPadding2D Zero-padding layer for 2D input (e.g. picture).
ZeroPadding3D Zero-padding layer for 3D data (spatial or spatio-temporal).

Preprocessing layers

Reduced list with the most relevant preprocessing layers

   
CenterCrop A preprocessing layer which crops images.
RandomContrast A preprocessing layer which randomly adjusts contrast during training.
RandomCrop A preprocessing layer which randomly crops images during training.
RandomFlip A preprocessing layer which randomly flips images during training.
RandomHeight A preprocessing layer which randomly varies image height during training.
RandomRotation A preprocessing layer which randomly rotates images during training.
RandomTranslation A preprocessing layer which randomly translates images during training.
RandomWidth A preprocessing layer which randomly varies image width during training.
RandomZoom A preprocessing layer which randomly zooms images during training.
Rescaling A preprocessing layer which rescales input values to a new range.
Resizing A preprocessing layer which resizes images.

Activation functions

Reduced list with the most relevant activation functions ||| |—|—| |hard_sigmoid(…)| Hard sigmoid activation function.| |relu(…) |Applies the rectified linear unit activation function.| |sigmoid(…) |Sigmoid activation function, sigmoid(x) = 1 / (1 + exp(-x)).| |softmax(…) |Softmax converts a vector of values to a probability distribution.| |softplus(…) |Softplus activation function, softplus(x) = log(exp(x) + 1).| |softsign(…) |Softsign activation function, softsign(x) = x / (abs(x) + 1).| |tanh(…) |Hyperbolic tangent activation function.|

Loss-functions

Reduced list with the most relevant loss functions

   
BinaryCrossentropy Computes the cross-entropy loss between true labels and predicted labels.
CategoricalCrossentropy Computes the crossentropy loss between the labels and predictions.
KLDivergence Computes Kullback-Leibler divergence loss between y_true and y_pred.
MeanAbsoluteError Computes the mean of absolute difference between labels and predictions.
MeanSquaredError Computes the mean of squares of errors between labels and predictions.
Poisson Computes the Poisson loss between y_true and y_pred.
SparseCategoricalCrossentropy Computes the crossentropy loss between the labels and predictions.

Optimizer

Reduced list with the most relevant optimizer

   
Adagrad Optimizer that implements the Adagrad algorithm.
Adam Optimizer that implements the Adam algorithm.
RMSprop Optimizer that implements the RMSprop algorithm.
SGD Gradient descent (with momentum) optimizer.

Metrics

A very reduced list with the most relevant metrics

   
Accuracy Calculates how often predictions equal labels.
BinaryAccuracy Calculates how often predictions match binary labels.
BinaryCrossentropy Computes the crossentropy metric between the labels and predictions.
CategoricalAccuracy Calculates how often predictions match one-hot labels.
CategoricalCrossentropy Computes the crossentropy metric between the labels and predictions.
KLDivergence Computes Kullback-Leibler divergence metric between y_true and y_pred.
Mean Computes the (weighted) mean of the given values.
MeanAbsoluteError Computes the mean absolute error between the labels and predictions.
MeanSquaredError Computes the mean squared error between y_true and y_pred.
Poisson Computes the Poisson metric between y_true and y_pred.
Precision Computes the precision of the predictions with respect to the labels.
RootMeanSquaredError Computes root mean squared error metric between y_true and y_pred.
SparseCategoricalAccuracy Calculates how often predictions match integer labels.
SparseCategoricalCrossentropy Computes the crossentropy metric between the labels and predictions.
SparseTopKCategoricalAccuracy Computes how often integer targets are in the top K predictions.
Sum Computes the (weighted) sum of the given values.
TopKCategoricalAccuracy Computes how often targets are in the top K predictions.

The source code is Open Source and can be found on GitHub.