Tensorflow / Keras – A fast non-introduction

Top

This is a fast overview how to get an MNIST example running under TF Keras

If you just want to start with Tensorflow / Keras (especially if it is a scientific project), then you want to reconsider using Keras. In this case please check (& use) PyTorch.

Questions to David Rotermund

Data loader / Data generator


keras.utils.Sequence	“Base object for fitting to a sequence of data, such as a dataset.”
tf.keras.utils.to_categorical	“Converts a class vector (integers) to binary class matrix.”

Basic

DataGenerator_no_dataugmentation.py

from tensorflow import keras
import numpy as np


class DataGenerator(keras.utils.Sequence):
    def __init__(
        self,
        train: bool = True,
        size_of_batch: int = 32,
        number_of_classes: int = 10,
        do_shuffle: bool = True,
    ) -> None:
        super(DataGenerator, self).__init__()

        if train is True:
            self.pattern_storage: np.ndarray = np.load("./train_pattern_storage.npy")
            self.label_storage: np.ndarray = np.load("./train_label_storage.npy")
        else:
            self.pattern_storage = np.load("./test_pattern_storage.npy")
            self.label_storage = np.load("./test_label_storage.npy")

        self.pattern_storage = self.pattern_storage.astype(np.float32)
        self.pattern_storage /= np.max(self.pattern_storage)

        self.dimensions: tuple[int, int] = (
            self.pattern_storage.shape[1],
            self.pattern_storage.shape[2],
        )

        # How many pattern are there?
        self.number_of_pattern: int = self.label_storage.shape[0]

        self.size_of_batch: int = size_of_batch

        self.number_of_classes: int = number_of_classes
        self.do_shuffle: bool = do_shuffle

        if self.pattern_storage.ndim == 3:
            self.number_of_channel: int = 1
        else:
            self.number_of_channel = self.pattern_storage.shape[3]

        self.available_indices: np.ndarray = np.arange(self.number_of_pattern)

        self.on_epoch_end()

    def on_epoch_end(self) -> None:
        self.available_indices = np.arange(self.number_of_pattern)

        if self.do_shuffle is True:
            np.random.shuffle(self.available_indices)

    def __getitem__(self, index: int) -> tuple[np.ndarray, np.ndarray]:
        selected_indices: np.ndarray = self.available_indices[
            index * self.size_of_batch : (index + 1) * self.size_of_batch
        ]
        image, target = self.__data_generation(selected_indices)
        return image, target

    def __data_generation(
        self, list_of_indice: np.ndarray
    ) -> tuple[np.ndarray, np.ndarray]:
        image = np.empty(
            (self.size_of_batch, *self.dimensions, self.number_of_channel),
            dtype=np.float32,
        )
        target = np.empty((self.size_of_batch), dtype=int)

        for i in range(0, len(list_of_indice)):

            if self.pattern_storage.ndim == 3:
                image[i, :, :, 0] = self.pattern_storage[
                    self.available_indices[list_of_indice[i]], :, :
                ]
            else:
                image[i, :, :, :] = self.pattern_storage[
                    self.available_indices[list_of_indice[i]], :, :, :
                ]

            target[i] = self.label_storage[self.available_indices[list_of_indice[i]]]

        return image, keras.utils.to_categorical(
            target, num_classes=self.number_of_classes
        )

    def __len__(self):
        return int(np.floor(self.number_of_pattern / self.size_of_batch))


if __name__ == "__main__":
    pass

With data augmentation

To the pre-processing chain self.data_augmentation you can add other preprocessing layers which are then applied to the input before given to the network.

DataGenerator.py

from tensorflow import keras
import numpy as np


class DataGenerator(keras.utils.Sequence):
    def __init__(
        self,
        train: bool = True,
        size_of_batch: int = 32,
        number_of_classes: int = 10,
        do_shuffle: bool = True,
    ) -> None:
        super(DataGenerator, self).__init__()

        if train is True:
            self.pattern_storage: np.ndarray = np.load("./train_pattern_storage.npy")
            self.label_storage: np.ndarray = np.load("./train_label_storage.npy")
        else:
            self.pattern_storage = np.load("./test_pattern_storage.npy")
            self.label_storage = np.load("./test_label_storage.npy")

        self.pattern_storage = self.pattern_storage.astype(np.float32)
        self.pattern_storage /= np.max(self.pattern_storage)

        self.dimensions: tuple[int, int] = (
            self.pattern_storage.shape[1],
            self.pattern_storage.shape[2],
        )
        reduction: tuple[int, int] = (4, 4)

        if train is True:
            self.data_augmentation = keras.Sequential(
                [
                    keras.layers.RandomCrop(
                        height=self.dimensions[0] - reduction[0],
                        width=self.dimensions[1] - reduction[1],
                    ),
                ]
            )
        else:
            self.data_augmentation = keras.Sequential(
                [
                    keras.layers.CenterCrop(
                        height=self.dimensions[0] - reduction[0],
                        width=self.dimensions[1] - reduction[1],
                    ),
                ]
            )

        # How many pattern are there?
        self.number_of_pattern: int = self.label_storage.shape[0]

        self.size_of_batch: int = size_of_batch

        self.number_of_classes: int = number_of_classes
        self.do_shuffle: bool = do_shuffle

        if self.pattern_storage.ndim == 3:
            self.number_of_channel: int = 1
        else:
            self.number_of_channel = self.pattern_storage.shape[3]

        self.available_indices: np.ndarray = np.arange(self.number_of_pattern)

        self.on_epoch_end()

    def on_epoch_end(self) -> None:
        self.available_indices = np.arange(self.number_of_pattern)

        if self.do_shuffle is True:
            np.random.shuffle(self.available_indices)

    def __getitem__(self, index: int) -> tuple[np.ndarray, np.ndarray]:
        selected_indices: np.ndarray = self.available_indices[
            index * self.size_of_batch : (index + 1) * self.size_of_batch
        ]
        image, target = self.__data_generation(selected_indices)
        return image, target

    def __data_generation(
        self, list_of_indice: np.ndarray
    ) -> tuple[np.ndarray, np.ndarray]:
        image = np.empty(
            (self.size_of_batch, *self.dimensions, self.number_of_channel),
            dtype=np.float32,
        )
        target = np.empty((self.size_of_batch), dtype=int)

        for i in range(0, len(list_of_indice)):

            if self.pattern_storage.ndim == 3:
                image[i, :, :, 0] = self.pattern_storage[
                    self.available_indices[list_of_indice[i]], :, :
                ]
            else:
                image[i, :, :, :] = self.pattern_storage[
                    self.available_indices[list_of_indice[i]], :, :, :
                ]

            target[i] = self.label_storage[self.available_indices[list_of_indice[i]]]

        image = self.data_augmentation(image)
        return image, keras.utils.to_categorical(
            target, num_classes=self.number_of_classes
        )

    def __len__(self):
        return int(np.floor(self.number_of_pattern / self.size_of_batch))


if __name__ == "__main__":
    pass

Train an example MNIST network


tf.keras.backend.clear_session	“Resets all state generated by Keras.”
tf.keras.Sequential	“Sequential groups a linear stack of layers into a tf.keras.Model.”
network.add()	“Adds a layer instance on top of the layer stack.”
tf.keras.layers.Conv2D	“2D convolution layer (e.g. spatial convolution over images).”
tf.keras.layers.MaxPool2D	“Max pooling operation for 2D spatial data.”
tf.keras.layers.Flatten	“Flattens the input. Does not affect the batch size.”
tf.keras.layers.Dense	“Just your regular densely-connected NN layer.”
network.compile()	“Configures the model for training.”
tf.keras.metrics.categorical_crossentropy	“Computes the categorical crossentropy loss.”
tf.keras.optimizers.Adam	“Optimizer that implements the Adam algorithm.”
network.fit()	Trains the model for a fixed number of epochs (iterations on a dataset).
network.summary()	“Prints a string summary of the network.”
network.save()	“Saves the model to Tensorflow SavedModel or a single HDF5 file.”

Parameters for the layers:


padding	“One of “valid”, “same” or “causal” (case-insensitive). “valid” means no padding. “same” results in padding with zeros evenly to the left/right or up/down of the input such that output has the same height/width dimension as the input. “causal” results in causal (dilated) convolutions, e.g. output[t] does not depend on input[t+1:]. “
use_bias	“Boolean, whether the layer uses a bias vector.”
activation	“Activation function to use. If you don’t specify anything, no activation is applied (see keras.activations).”
data_format	” A string, one of channels_last (default) or channels_first.”

keras_learn.py

from tensorflow import keras
from DataGenerator import DataGenerator

epoch_max: int = 50
number_of_classes: int = 10
size_of_batch_train: int = 100

train_data = DataGenerator(
    train=True,
    size_of_batch=size_of_batch_train,
    number_of_classes=number_of_classes,
    do_shuffle=True,
)

number_of_channels: int = train_data.number_of_channel
input_dimensions = train_data.dimensions
number_of_pattern_train = train_data.number_of_pattern

number_of_output_channels_conv1: int = 32
number_of_output_channels_conv2: int = 64
number_of_neurons_flatten1: int = 1024

kernel_size_conv1: tuple[int, int] = (5, 5)
kernel_size_pool1: tuple[int, int] = (2, 2)
kernel_size_conv2: tuple[int, int] = (5, 5)
kernel_size_pool2: tuple[int, int] = (2, 2)

stride_conv1: tuple[int, int] = (1, 1)
stride_pool1: tuple[int, int] = (2, 2)
stride_conv2: tuple[int, int] = (1, 1)
stride_pool2: tuple[int, int] = (2, 2)


keras.backend.clear_session()

network = keras.Sequential()

# Conv 1
network.add(
    keras.layers.Conv2D(
        number_of_output_channels_conv1,
        kernel_size=kernel_size_conv1,
        activation="relu",
        input_shape=(input_dimensions[0], input_dimensions[1], number_of_channels),
        padding="valid",
        strides=stride_conv1,
        data_format="channels_last",
        use_bias=True,
    )
)

# Pool 1
network.add(
    keras.layers.MaxPooling2D(
        pool_size=kernel_size_pool1,
        padding="valid",
        strides=stride_pool1,
        data_format="channels_last",
    )
)

# Conv 2
network.add(
    keras.layers.Conv2D(
        number_of_output_channels_conv2,
        kernel_size=kernel_size_conv2,
        activation="relu",
        padding="valid",
        strides=stride_conv2,
        data_format="channels_last",
        use_bias=True,
    )
)

# Pool 2
network.add(
    keras.layers.MaxPooling2D(
        pool_size=kernel_size_pool2,
        padding="valid",
        strides=stride_pool2,
        data_format="channels_last",
    )
)

# Flatten
network.add(keras.layers.Flatten(data_format="channels_last"))

# Full layer
network.add(
    keras.layers.Dense(number_of_neurons_flatten1, activation="relu", use_bias=True)
)

# Output layer
network.add(keras.layers.Dense(number_of_classes, activation="softmax"))

network.compile(
    loss=keras.losses.categorical_crossentropy,
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)

for epoch_id in range(0, epoch_max):
    print(f"Epoch: {epoch_id} of {epoch_max - 1}")
    network.fit(x=train_data)

    network.summary()
    network.save("Model_" + str(epoch_id) + ".h5")

Test the example network performance


tf.keras.models.load_model	“Loads a model saved via model.save().”
network.evaluate()	“Returns the loss value & metrics values for the model in test mode.”

keras_test.py

from tensorflow import keras
from DataGenerator import DataGenerator

number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49

test_data = DataGenerator(
    train=False,
    size_of_batch=size_of_batch_test,
    number_of_classes=number_of_classes,
    do_shuffle=False,
)

keras.backend.clear_session()

network = keras.models.load_model("./Model_" + str(model_id) + ".h5")

test_loss, test_acc = network.evaluate(x=test_data)

print(f"Correct: {test_acc * 100.0:.2f}%")

How to extract the activities from the network

from tensorflow import keras
from DataGenerator import DataGenerator
import numpy as np

number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49
pattern_batch_id: int = 0
pattern_id: int = 42

test_data = DataGenerator(
    train=False,
    size_of_batch=size_of_batch_test,
    number_of_classes=number_of_classes,
    do_shuffle=False,
)

keras.backend.clear_session()

network = keras.models.load_model("./Model_" + str(model_id) + ".h5")

image, target = test_data.__getitem__(pattern_batch_id)
the_target = target[pattern_id]

print("Layer 1 (Conv1)")
input_0 = image[pattern_id : pattern_id + 1, :, :, :]
output_0 = network.layers[0](input_0)

print("Input Shape:")
print(input_0.shape)
print("Output Shape:")
print(output_0.numpy().shape)
print("")

print("Layer 2 (Pool1)")
input_1 = output_0
output_1 = network.layers[1](input_1)

print("Input Shape:")
print(input_1.numpy().shape)
print("Output Shape:")
print(output_1.numpy().shape)
print("")

print("Layer 3 (Conv2)")
input_2 = output_1
output_2 = network.layers[2](input_2)

print("Input Shape:")
print(input_2.numpy().shape)
print("Output Shape:")
print(output_2.numpy().shape)
print("")

print("Layer 4 (Pool2)")
input_3 = output_2
output_3 = network.layers[3](input_3)

print("Input Shape:")
print(input_3.numpy().shape)
print("Output Shape:")
print(output_3.numpy().shape)
print("")

print("Layer 5 (Flatten)")
input_4 = output_3
output_4 = network.layers[4](input_4)

print("Input Shape:")
print(input_4.numpy().shape)
print("Output Shape:")
print(output_4.numpy().shape)
print("")

print("Layer 6 (Full)")
input_5 = output_4
output_5 = network.layers[5](input_5)

print("Input Shape:")
print(input_5.numpy().shape)
print("Output Shape:")
print(output_5.numpy().shape)
print("")

print("Layer 7 (Output)")
input_6 = output_5
output_6 = network.layers[6](input_6)

print("Input Shape:")
print(input_6.numpy().shape)
print("Output Shape:")
print(output_6.numpy().shape)
print("")

print("\nEstimation")
print(np.round(output_6.numpy(), 4))
print("Strongest reponse is at " + str(np.argmax(output_6.numpy())))
print("Correct output is " + str(np.argmax(the_target)))

Extracting weight and bias

Here is one way to extract the weights and bias of the whole network. Alternatively you can use get_weights from tf.keras.layers.Layer in combination with get_layer of tf.keras.Sequential.

from tensorflow import keras
from DataGenerator import DataGenerator

number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49
pattern_batch_id: int = 0
pattern_id: int = 42

test_data = DataGenerator(
    train=False,
    size_of_batch=size_of_batch_test,
    number_of_classes=number_of_classes,
    do_shuffle=False,
)

keras.backend.clear_session()

network = keras.models.load_model("./Model_" + str(model_id) + ".h5")

weights_bias = network.get_weights()

counter_layer: int = 0
for i in range(0, len(weights_bias), 2):
    print("Layer " + str(counter_layer) + " weights_bias position: " + str(i) + " =>")
    print(weights_bias[i].shape)
    counter_layer += 1

print("")

counter_layer = 0
for i in range(1, len(weights_bias), 2):
    print("Bias " + str(counter_layer) + " weights_bias position: " + str(i) + " =>")
    print(weights_bias[i].shape)
    counter_layer += 1

Type of layers

Reduced list with the most relevant network layers


Activation	Applies an activation function to an output.
AveragePooling1D	Average pooling for temporal data.
AveragePooling2D	Average pooling operation for spatial data.
AveragePooling3D	Average pooling operation for 3D data (spatial or spatio-temporal).
BatchNormalization	Layer that normalizes its inputs.
Conv1D	1D convolution layer (e.g. temporal convolution).
Conv2D	2D convolution layer (e.g. spatial convolution over images).
Conv3D	3D convolution layer (e.g. spatial convolution over volumes).
Dense	Just your regular densely-connected NN layer.
Dropout	Applies Dropout to the input.
Flatten	Flattens the input. Does not affect the batch size.
MaxPooling1D	Max pooling operation for 1D temporal data.
MaxPooling2D	Max pooling operation for 2D spatial data.
MaxPooling3D	Max pooling operation for 3D data (spatial or spatio-temporal).
SpatialDropout1D	Spatial 1D version of Dropout.
SpatialDropout2D	Spatial 2D version of Dropout.
SpatialDropout3D	Spatial 3D version of Dropout.
ZeroPadding1D	Zero-padding layer for 1D input (e.g. temporal sequence).
ZeroPadding2D	Zero-padding layer for 2D input (e.g. picture).
ZeroPadding3D	Zero-padding layer for 3D data (spatial or spatio-temporal).

Preprocessing layers

Reduced list with the most relevant preprocessing layers


CenterCrop	A preprocessing layer which crops images.
RandomContrast	A preprocessing layer which randomly adjusts contrast during training.
RandomCrop	A preprocessing layer which randomly crops images during training.
RandomFlip	A preprocessing layer which randomly flips images during training.
RandomHeight	A preprocessing layer which randomly varies image height during training.
RandomRotation	A preprocessing layer which randomly rotates images during training.
RandomTranslation	A preprocessing layer which randomly translates images during training.
RandomWidth	A preprocessing layer which randomly varies image width during training.
RandomZoom	A preprocessing layer which randomly zooms images during training.
Rescaling	A preprocessing layer which rescales input values to a new range.
Resizing	A preprocessing layer which resizes images.

Reduced list with the most relevant activation functions ||| |—|—| |hard_sigmoid(…)| Hard sigmoid activation function.| |relu(…) |Applies the rectified linear unit activation function.| |sigmoid(…) |Sigmoid activation function, sigmoid(x) = 1 / (1 + exp(-x)).| |softmax(…) |Softmax converts a vector of values to a probability distribution.| |softplus(…) |Softplus activation function, softplus(x) = log(exp(x) + 1).| |softsign(…) |Softsign activation function, softsign(x) = x / (abs(x) + 1).| |tanh(…) |Hyperbolic tangent activation function.|

Loss-functions

Reduced list with the most relevant loss functions


BinaryCrossentropy	Computes the cross-entropy loss between true labels and predicted labels.
CategoricalCrossentropy	Computes the crossentropy loss between the labels and predictions.
KLDivergence	Computes Kullback-Leibler divergence loss between y_true and y_pred.
MeanAbsoluteError	Computes the mean of absolute difference between labels and predictions.
MeanSquaredError	Computes the mean of squares of errors between labels and predictions.
Poisson	Computes the Poisson loss between y_true and y_pred.
SparseCategoricalCrossentropy	Computes the crossentropy loss between the labels and predictions.

Optimizer

Reduced list with the most relevant optimizer


Adagrad	Optimizer that implements the Adagrad algorithm.
Adam	Optimizer that implements the Adam algorithm.
RMSprop	Optimizer that implements the RMSprop algorithm.
SGD	Gradient descent (with momentum) optimizer.

Metrics

A very reduced list with the most relevant metrics


Accuracy	Calculates how often predictions equal labels.
BinaryAccuracy	Calculates how often predictions match binary labels.
BinaryCrossentropy	Computes the crossentropy metric between the labels and predictions.
CategoricalAccuracy	Calculates how often predictions match one-hot labels.
CategoricalCrossentropy	Computes the crossentropy metric between the labels and predictions.
KLDivergence	Computes Kullback-Leibler divergence metric between y_true and y_pred.
Mean	Computes the (weighted) mean of the given values.
MeanAbsoluteError	Computes the mean absolute error between the labels and predictions.
MeanSquaredError	Computes the mean squared error between y_true and y_pred.
Poisson	Computes the Poisson metric between y_true and y_pred.
Precision	Computes the precision of the predictions with respect to the labels.
RootMeanSquaredError	Computes root mean squared error metric between y_true and y_pred.
SparseCategoricalAccuracy	Calculates how often predictions match integer labels.
SparseCategoricalCrossentropy	Computes the crossentropy metric between the labels and predictions.
SparseTopKCategoricalAccuracy	Computes how often integer targets are in the top K predictions.
Sum	Computes the (weighted) sum of the given values.
TopKCategoricalAccuracy	Computes how often targets are in the top K predictions.

The source code is Open Source and can be found on GitHub.