Tensorflow / Keras – A fast non-introduction
Top
This is a fast overview how to get an MNIST example running under TF Keras
If you just want to start with Tensorflow / Keras (especially if it is a scientific project), then you want to reconsider using Keras. In this case please check (& use) PyTorch.
Questions to David Rotermund
Data loader / Data generator
keras.utils.Sequence | “Base object for fitting to a sequence of data, such as a dataset.” |
tf.keras.utils.to_categorical | “Converts a class vector (integers) to binary class matrix.” |
Basic
DataGenerator_no_dataugmentation.py
from tensorflow import keras
import numpy as np
class DataGenerator(keras.utils.Sequence):
def __init__(
self,
train: bool = True,
size_of_batch: int = 32,
number_of_classes: int = 10,
do_shuffle: bool = True,
) -> None:
super(DataGenerator, self).__init__()
if train is True:
self.pattern_storage: np.ndarray = np.load("./train_pattern_storage.npy")
self.label_storage: np.ndarray = np.load("./train_label_storage.npy")
else:
self.pattern_storage = np.load("./test_pattern_storage.npy")
self.label_storage = np.load("./test_label_storage.npy")
self.pattern_storage = self.pattern_storage.astype(np.float32)
self.pattern_storage /= np.max(self.pattern_storage)
self.dimensions: tuple[int, int] = (
self.pattern_storage.shape[1],
self.pattern_storage.shape[2],
)
# How many pattern are there?
self.number_of_pattern: int = self.label_storage.shape[0]
self.size_of_batch: int = size_of_batch
self.number_of_classes: int = number_of_classes
self.do_shuffle: bool = do_shuffle
if self.pattern_storage.ndim == 3:
self.number_of_channel: int = 1
else:
self.number_of_channel = self.pattern_storage.shape[3]
self.available_indices: np.ndarray = np.arange(self.number_of_pattern)
self.on_epoch_end()
def on_epoch_end(self) -> None:
self.available_indices = np.arange(self.number_of_pattern)
if self.do_shuffle is True:
np.random.shuffle(self.available_indices)
def __getitem__(self, index: int) -> tuple[np.ndarray, np.ndarray]:
selected_indices: np.ndarray = self.available_indices[
index * self.size_of_batch : (index + 1) * self.size_of_batch
]
image, target = self.__data_generation(selected_indices)
return image, target
def __data_generation(
self, list_of_indice: np.ndarray
) -> tuple[np.ndarray, np.ndarray]:
image = np.empty(
(self.size_of_batch, *self.dimensions, self.number_of_channel),
dtype=np.float32,
)
target = np.empty((self.size_of_batch), dtype=int)
for i in range(0, len(list_of_indice)):
if self.pattern_storage.ndim == 3:
image[i, :, :, 0] = self.pattern_storage[
self.available_indices[list_of_indice[i]], :, :
]
else:
image[i, :, :, :] = self.pattern_storage[
self.available_indices[list_of_indice[i]], :, :, :
]
target[i] = self.label_storage[self.available_indices[list_of_indice[i]]]
return image, keras.utils.to_categorical(
target, num_classes=self.number_of_classes
)
def __len__(self):
return int(np.floor(self.number_of_pattern / self.size_of_batch))
if __name__ == "__main__":
pass
With data augmentation
To the pre-processing chain self.data_augmentation you can add other preprocessing layers which are then applied to the input before given to the network.
from tensorflow import keras
import numpy as np
class DataGenerator(keras.utils.Sequence):
def __init__(
self,
train: bool = True,
size_of_batch: int = 32,
number_of_classes: int = 10,
do_shuffle: bool = True,
) -> None:
super(DataGenerator, self).__init__()
if train is True:
self.pattern_storage: np.ndarray = np.load("./train_pattern_storage.npy")
self.label_storage: np.ndarray = np.load("./train_label_storage.npy")
else:
self.pattern_storage = np.load("./test_pattern_storage.npy")
self.label_storage = np.load("./test_label_storage.npy")
self.pattern_storage = self.pattern_storage.astype(np.float32)
self.pattern_storage /= np.max(self.pattern_storage)
self.dimensions: tuple[int, int] = (
self.pattern_storage.shape[1],
self.pattern_storage.shape[2],
)
reduction: tuple[int, int] = (4, 4)
if train is True:
self.data_augmentation = keras.Sequential(
[
keras.layers.RandomCrop(
height=self.dimensions[0] - reduction[0],
width=self.dimensions[1] - reduction[1],
),
]
)
else:
self.data_augmentation = keras.Sequential(
[
keras.layers.CenterCrop(
height=self.dimensions[0] - reduction[0],
width=self.dimensions[1] - reduction[1],
),
]
)
# How many pattern are there?
self.number_of_pattern: int = self.label_storage.shape[0]
self.size_of_batch: int = size_of_batch
self.number_of_classes: int = number_of_classes
self.do_shuffle: bool = do_shuffle
if self.pattern_storage.ndim == 3:
self.number_of_channel: int = 1
else:
self.number_of_channel = self.pattern_storage.shape[3]
self.available_indices: np.ndarray = np.arange(self.number_of_pattern)
self.on_epoch_end()
def on_epoch_end(self) -> None:
self.available_indices = np.arange(self.number_of_pattern)
if self.do_shuffle is True:
np.random.shuffle(self.available_indices)
def __getitem__(self, index: int) -> tuple[np.ndarray, np.ndarray]:
selected_indices: np.ndarray = self.available_indices[
index * self.size_of_batch : (index + 1) * self.size_of_batch
]
image, target = self.__data_generation(selected_indices)
return image, target
def __data_generation(
self, list_of_indice: np.ndarray
) -> tuple[np.ndarray, np.ndarray]:
image = np.empty(
(self.size_of_batch, *self.dimensions, self.number_of_channel),
dtype=np.float32,
)
target = np.empty((self.size_of_batch), dtype=int)
for i in range(0, len(list_of_indice)):
if self.pattern_storage.ndim == 3:
image[i, :, :, 0] = self.pattern_storage[
self.available_indices[list_of_indice[i]], :, :
]
else:
image[i, :, :, :] = self.pattern_storage[
self.available_indices[list_of_indice[i]], :, :, :
]
target[i] = self.label_storage[self.available_indices[list_of_indice[i]]]
image = self.data_augmentation(image)
return image, keras.utils.to_categorical(
target, num_classes=self.number_of_classes
)
def __len__(self):
return int(np.floor(self.number_of_pattern / self.size_of_batch))
if __name__ == "__main__":
pass
Train an example MNIST network
tf.keras.backend.clear_session | “Resets all state generated by Keras.” |
tf.keras.Sequential | “Sequential groups a linear stack of layers into a tf.keras.Model.” |
network.add() | “Adds a layer instance on top of the layer stack.” |
tf.keras.layers.Conv2D | “2D convolution layer (e.g. spatial convolution over images).” |
tf.keras.layers.MaxPool2D | “Max pooling operation for 2D spatial data.” |
tf.keras.layers.Flatten | “Flattens the input. Does not affect the batch size.” |
tf.keras.layers.Dense | “Just your regular densely-connected NN layer.” |
network.compile() | “Configures the model for training.” |
tf.keras.metrics.categorical_crossentropy | “Computes the categorical crossentropy loss.” |
tf.keras.optimizers.Adam | “Optimizer that implements the Adam algorithm.” |
network.fit() | Trains the model for a fixed number of epochs (iterations on a dataset). |
network.summary() | “Prints a string summary of the network.” |
network.save() | “Saves the model to Tensorflow SavedModel or a single HDF5 file.” |
Parameters for the layers:
padding | “One of “valid”, “same” or “causal” (case-insensitive). “valid” means no padding. “same” results in padding with zeros evenly to the left/right or up/down of the input such that output has the same height/width dimension as the input. “causal” results in causal (dilated) convolutions, e.g. output[t] does not depend on input[t+1:]. “ |
use_bias | “Boolean, whether the layer uses a bias vector.” |
activation | “Activation function to use. If you don’t specify anything, no activation is applied (see keras.activations).” |
data_format | ” A string, one of channels_last (default) or channels_first.” |
from tensorflow import keras
from DataGenerator import DataGenerator
epoch_max: int = 50
number_of_classes: int = 10
size_of_batch_train: int = 100
train_data = DataGenerator(
train=True,
size_of_batch=size_of_batch_train,
number_of_classes=number_of_classes,
do_shuffle=True,
)
number_of_channels: int = train_data.number_of_channel
input_dimensions = train_data.dimensions
number_of_pattern_train = train_data.number_of_pattern
number_of_output_channels_conv1: int = 32
number_of_output_channels_conv2: int = 64
number_of_neurons_flatten1: int = 1024
kernel_size_conv1: tuple[int, int] = (5, 5)
kernel_size_pool1: tuple[int, int] = (2, 2)
kernel_size_conv2: tuple[int, int] = (5, 5)
kernel_size_pool2: tuple[int, int] = (2, 2)
stride_conv1: tuple[int, int] = (1, 1)
stride_pool1: tuple[int, int] = (2, 2)
stride_conv2: tuple[int, int] = (1, 1)
stride_pool2: tuple[int, int] = (2, 2)
keras.backend.clear_session()
network = keras.Sequential()
# Conv 1
network.add(
keras.layers.Conv2D(
number_of_output_channels_conv1,
kernel_size=kernel_size_conv1,
activation="relu",
input_shape=(input_dimensions[0], input_dimensions[1], number_of_channels),
padding="valid",
strides=stride_conv1,
data_format="channels_last",
use_bias=True,
)
)
# Pool 1
network.add(
keras.layers.MaxPooling2D(
pool_size=kernel_size_pool1,
padding="valid",
strides=stride_pool1,
data_format="channels_last",
)
)
# Conv 2
network.add(
keras.layers.Conv2D(
number_of_output_channels_conv2,
kernel_size=kernel_size_conv2,
activation="relu",
padding="valid",
strides=stride_conv2,
data_format="channels_last",
use_bias=True,
)
)
# Pool 2
network.add(
keras.layers.MaxPooling2D(
pool_size=kernel_size_pool2,
padding="valid",
strides=stride_pool2,
data_format="channels_last",
)
)
# Flatten
network.add(keras.layers.Flatten(data_format="channels_last"))
# Full layer
network.add(
keras.layers.Dense(number_of_neurons_flatten1, activation="relu", use_bias=True)
)
# Output layer
network.add(keras.layers.Dense(number_of_classes, activation="softmax"))
network.compile(
loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(),
metrics=["accuracy"],
)
for epoch_id in range(0, epoch_max):
print(f"Epoch: {epoch_id} of {epoch_max - 1}")
network.fit(x=train_data)
network.summary()
network.save("Model_" + str(epoch_id) + ".h5")
Test the example network performance
tf.keras.models.load_model | “Loads a model saved via model.save().” |
network.evaluate() | “Returns the loss value & metrics values for the model in test mode.” |
from tensorflow import keras
from DataGenerator import DataGenerator
number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49
test_data = DataGenerator(
train=False,
size_of_batch=size_of_batch_test,
number_of_classes=number_of_classes,
do_shuffle=False,
)
keras.backend.clear_session()
network = keras.models.load_model("./Model_" + str(model_id) + ".h5")
test_loss, test_acc = network.evaluate(x=test_data)
print(f"Correct: {test_acc * 100.0:.2f}%")
How to extract the activities from the network
from tensorflow import keras
from DataGenerator import DataGenerator
import numpy as np
number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49
pattern_batch_id: int = 0
pattern_id: int = 42
test_data = DataGenerator(
train=False,
size_of_batch=size_of_batch_test,
number_of_classes=number_of_classes,
do_shuffle=False,
)
keras.backend.clear_session()
network = keras.models.load_model("./Model_" + str(model_id) + ".h5")
image, target = test_data.__getitem__(pattern_batch_id)
the_target = target[pattern_id]
print("Layer 1 (Conv1)")
input_0 = image[pattern_id : pattern_id + 1, :, :, :]
output_0 = network.layers[0](input_0)
print("Input Shape:")
print(input_0.shape)
print("Output Shape:")
print(output_0.numpy().shape)
print("")
print("Layer 2 (Pool1)")
input_1 = output_0
output_1 = network.layers[1](input_1)
print("Input Shape:")
print(input_1.numpy().shape)
print("Output Shape:")
print(output_1.numpy().shape)
print("")
print("Layer 3 (Conv2)")
input_2 = output_1
output_2 = network.layers[2](input_2)
print("Input Shape:")
print(input_2.numpy().shape)
print("Output Shape:")
print(output_2.numpy().shape)
print("")
print("Layer 4 (Pool2)")
input_3 = output_2
output_3 = network.layers[3](input_3)
print("Input Shape:")
print(input_3.numpy().shape)
print("Output Shape:")
print(output_3.numpy().shape)
print("")
print("Layer 5 (Flatten)")
input_4 = output_3
output_4 = network.layers[4](input_4)
print("Input Shape:")
print(input_4.numpy().shape)
print("Output Shape:")
print(output_4.numpy().shape)
print("")
print("Layer 6 (Full)")
input_5 = output_4
output_5 = network.layers[5](input_5)
print("Input Shape:")
print(input_5.numpy().shape)
print("Output Shape:")
print(output_5.numpy().shape)
print("")
print("Layer 7 (Output)")
input_6 = output_5
output_6 = network.layers[6](input_6)
print("Input Shape:")
print(input_6.numpy().shape)
print("Output Shape:")
print(output_6.numpy().shape)
print("")
print("\nEstimation")
print(np.round(output_6.numpy(), 4))
print("Strongest reponse is at " + str(np.argmax(output_6.numpy())))
print("Correct output is " + str(np.argmax(the_target)))
Extracting weight and bias
Here is one way to extract the weights and bias of the whole network. Alternatively you can use get_weights from tf.keras.layers.Layer in combination with get_layer of tf.keras.Sequential.
from tensorflow import keras
from DataGenerator import DataGenerator
number_of_classes: int = 10
size_of_batch_test: int = 100
model_id: int = 49
pattern_batch_id: int = 0
pattern_id: int = 42
test_data = DataGenerator(
train=False,
size_of_batch=size_of_batch_test,
number_of_classes=number_of_classes,
do_shuffle=False,
)
keras.backend.clear_session()
network = keras.models.load_model("./Model_" + str(model_id) + ".h5")
weights_bias = network.get_weights()
counter_layer: int = 0
for i in range(0, len(weights_bias), 2):
print("Layer " + str(counter_layer) + " weights_bias position: " + str(i) + " =>")
print(weights_bias[i].shape)
counter_layer += 1
print("")
counter_layer = 0
for i in range(1, len(weights_bias), 2):
print("Bias " + str(counter_layer) + " weights_bias position: " + str(i) + " =>")
print(weights_bias[i].shape)
counter_layer += 1
Type of layers
Reduced list with the most relevant network layers
Activation | Applies an activation function to an output. |
AveragePooling1D | Average pooling for temporal data. |
AveragePooling2D | Average pooling operation for spatial data. |
AveragePooling3D | Average pooling operation for 3D data (spatial or spatio-temporal). |
BatchNormalization | Layer that normalizes its inputs. |
Conv1D | 1D convolution layer (e.g. temporal convolution). |
Conv2D | 2D convolution layer (e.g. spatial convolution over images). |
Conv3D | 3D convolution layer (e.g. spatial convolution over volumes). |
Dense | Just your regular densely-connected NN layer. |
Dropout | Applies Dropout to the input. |
Flatten | Flattens the input. Does not affect the batch size. |
MaxPooling1D | Max pooling operation for 1D temporal data. |
MaxPooling2D | Max pooling operation for 2D spatial data. |
MaxPooling3D | Max pooling operation for 3D data (spatial or spatio-temporal). |
SpatialDropout1D | Spatial 1D version of Dropout. |
SpatialDropout2D | Spatial 2D version of Dropout. |
SpatialDropout3D | Spatial 3D version of Dropout. |
ZeroPadding1D | Zero-padding layer for 1D input (e.g. temporal sequence). |
ZeroPadding2D | Zero-padding layer for 2D input (e.g. picture). |
ZeroPadding3D | Zero-padding layer for 3D data (spatial or spatio-temporal). |
Preprocessing layers
Reduced list with the most relevant preprocessing layers
CenterCrop | A preprocessing layer which crops images. |
RandomContrast | A preprocessing layer which randomly adjusts contrast during training. |
RandomCrop | A preprocessing layer which randomly crops images during training. |
RandomFlip | A preprocessing layer which randomly flips images during training. |
RandomHeight | A preprocessing layer which randomly varies image height during training. |
RandomRotation | A preprocessing layer which randomly rotates images during training. |
RandomTranslation | A preprocessing layer which randomly translates images during training. |
RandomWidth | A preprocessing layer which randomly varies image width during training. |
RandomZoom | A preprocessing layer which randomly zooms images during training. |
Rescaling | A preprocessing layer which rescales input values to a new range. |
Resizing | A preprocessing layer which resizes images. |
Activation functions
Reduced list with the most relevant activation functions ||| |—|—| |hard_sigmoid(…)| Hard sigmoid activation function.| |relu(…) |Applies the rectified linear unit activation function.| |sigmoid(…) |Sigmoid activation function, sigmoid(x) = 1 / (1 + exp(-x)).| |softmax(…) |Softmax converts a vector of values to a probability distribution.| |softplus(…) |Softplus activation function, softplus(x) = log(exp(x) + 1).| |softsign(…) |Softsign activation function, softsign(x) = x / (abs(x) + 1).| |tanh(…) |Hyperbolic tangent activation function.|
Loss-functions
Reduced list with the most relevant loss functions
BinaryCrossentropy | Computes the cross-entropy loss between true labels and predicted labels. |
CategoricalCrossentropy | Computes the crossentropy loss between the labels and predictions. |
KLDivergence | Computes Kullback-Leibler divergence loss between y_true and y_pred. |
MeanAbsoluteError | Computes the mean of absolute difference between labels and predictions. |
MeanSquaredError | Computes the mean of squares of errors between labels and predictions. |
Poisson | Computes the Poisson loss between y_true and y_pred. |
SparseCategoricalCrossentropy | Computes the crossentropy loss between the labels and predictions. |
Optimizer
Reduced list with the most relevant optimizer
Adagrad | Optimizer that implements the Adagrad algorithm. |
Adam | Optimizer that implements the Adam algorithm. |
RMSprop | Optimizer that implements the RMSprop algorithm. |
SGD | Gradient descent (with momentum) optimizer. |
Metrics
A very reduced list with the most relevant metrics
Accuracy | Calculates how often predictions equal labels. |
BinaryAccuracy | Calculates how often predictions match binary labels. |
BinaryCrossentropy | Computes the crossentropy metric between the labels and predictions. |
CategoricalAccuracy | Calculates how often predictions match one-hot labels. |
CategoricalCrossentropy | Computes the crossentropy metric between the labels and predictions. |
KLDivergence | Computes Kullback-Leibler divergence metric between y_true and y_pred. |
Mean | Computes the (weighted) mean of the given values. |
MeanAbsoluteError | Computes the mean absolute error between the labels and predictions. |
MeanSquaredError | Computes the mean squared error between y_true and y_pred. |
Poisson | Computes the Poisson metric between y_true and y_pred. |
Precision | Computes the precision of the predictions with respect to the labels. |
RootMeanSquaredError | Computes root mean squared error metric between y_true and y_pred. |
SparseCategoricalAccuracy | Calculates how often predictions match integer labels. |
SparseCategoricalCrossentropy | Computes the crossentropy metric between the labels and predictions. |
SparseTopKCategoricalAccuracy | Computes how often integer targets are in the top K predictions. |
Sum | Computes the (weighted) sum of the given values. |
TopKCategoricalAccuracy | Computes how often targets are in the top K predictions. |
The source code is Open Source and can be found on GitHub.