I'm going through some tutorials using the Keras functional API in Tensorflow 2, and I'm having some trouble including BatchNormalization layers when using the functional API.
Using roughly the same code:
- This network trains with the sequential API and batch normalization
- This network trains with the functional API, but commenting out the batch normalization layers
- This network does not train using the functional API and batch normalization layers
Am I missing a step somewhere? Do I set training=true
or training=false
somewhere in the code?
Working Sequential Code:
#subclassed layers in keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
import numpy as np
import logging
tf.get_logger().setLevel(logging.ERROR)
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import matplotlib.pyplot as plt
%matplotlib inline
cifar_dataset = keras.datasets.cifar10
(train_images, train_labels), (test_images,
test_labels) = cifar_dataset.load_data()
EPOCHS = 128
BATCH_SIZE = 128
#standardize dataset
mean = np.mean(train_images)
stdev = np.std(train_images)
train_images = (train_images - mean)/stdev
test_images = (test_images - mean)/stdev
#change labels to one-hot
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)
# Keras model subclassing: build your own layers
#CNN -> batch norm -> Relu
#create a class for this kind of block
class CNNBlock(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, kernel_size=3, strides=(1,1)): #needs both __init__ and call functions, initialize layer
super(CNNBlock, self).__init__() #superclass layers.Layer with our new class
self.conv = layers.Conv2D(out_channels, kernel_size, padding='same',
kernel_initializer='he_normal',bias_initializer='zeros')#initialize the conv portion of this class
self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor)#run convolution operation
x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
class CNNBlock_init(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, input_size, kernel_size=3): #needs both __init__ and call functions, initialize layer
super(CNNBlock_init, self).__init__() #superclass layers.Layer with our new class - make sure new class name matches
self.input_size = input_size
self.conv = layers.Conv2D(out_channels, kernel_size,
input_shape=input_size, #first layer needs input shape to build properly
padding='same')#initialize the conv portion of this class
self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor,input_shape=self.input_size)#run convolution operation
x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
#build model with this
model = keras.Sequential(
[
CNNBlock(64,kernel_size=4,strides=(2,2)),
Dropout(0.2),
CNNBlock(64,kernel_size=2,strides=(2,2)),
Dropout(0.2),
CNNBlock(32),
Dropout(0.2),
CNNBlock(32),
MaxPooling2D(pool_size=(2,2), strides=2),
Dropout(0.2),
Flatten(),
Dense(64, activation='relu',#dense layers to combine features
kernel_initializer='he_normal',
bias_initializer='zeros'),
Dropout(0.2),
Dense(10, activation='softmax',#softmax for classification
kernel_initializer='glorot_uniform',
bias_initializer='zeros')
])
#compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#model.build(input_shape=(32,32,3))
#model.summary()
#train model
history = model.fit(
train_images, train_labels,
validation_data=(test_images,test_labels),
epochs=EPOCHS, batch_size=BATCH_SIZE,
verbose=1, shuffle=True) #verbose 1 is cool gives time for each epoch
#evaluate model
import matplotlib.pyplot as plt
%matplotlib inline
def plot_error(history):
history_dict_vals = history.__dict__['history']
history_x = history.epoch
plt.plot(history_x,history_dict_vals['accuracy'],'r-', label='training accuracy')
plt.plot(history_x,history_dict_vals['val_accuracy'],'g-', label='test accuracy')
plt.axis([0,len(history_x),0.0,1])
plt.xlabel('training epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
print(f"Final test accuracy = {history_dict_vals['val_accuracy'][-1]}")
plot_error(history)
Working Functional Code:
# same convolutional structure but with the keras functional API
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
import numpy as np
import logging
tf.get_logger().setLevel(logging.ERROR)
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import matplotlib.pyplot as plt
%matplotlib inline
cifar_dataset = keras.datasets.cifar10
(train_images, train_labels), (test_images,
test_labels) = cifar_dataset.load_data()
EPOCHS = 128
BATCH_SIZE = 128
#standardize dataset
mean = np.mean(train_images)
stdev = np.std(train_images)
train_images = (train_images - mean)/stdev
test_images = (test_images - mean)/stdev
#change labels to one-hot
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)
# Keras model subclassing: build your own layers
#CNN -> batch norm -> Relu
#create a class for this kind of block
class CNNBlock(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, kernel_size=3, strides=(1,1)): #needs both __init__ and call functions, initialize layer
super(CNNBlock, self).__init__() #superclass layers.Layer with our new class
self.conv = layers.Conv2D(out_channels, kernel_size, padding='same',
kernel_initializer='he_normal',bias_initializer='zeros')#initialize the conv portion of this class
#self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor)#run convolution operation
#x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
class CNNBlock_init(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, input_size, kernel_size=3): #needs both __init__ and call functions, initialize layer
super(CNNBlock_init, self).__init__() #superclass layers.Layer with our new class - make sure new class name matches
self.input_size = input_size
self.conv = layers.Conv2D(out_channels, kernel_size,
input_shape=input_size, #first layer needs input shape to build properly
padding='same')#initialize the conv portion of this class
#self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor,input_shape=self.input_size)#run convolution operation
#x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
#build model with this
#Build the model with the Keras functional API
input_shape = (32,32,3)
chanDim = -1
#define model with first inputs
inputs = Input(shape=input_shape)
#functional API passing layers through
x = CNNBlock(64,kernel_size=4,strides=(2,2))(inputs)
x = Dropout(0.2)(x)
x = CNNBlock(64,kernel_size=2,strides=(2,2))(x)
x = Dropout(0.2)(x)
x = CNNBlock(64)(x)
x = MaxPooling2D(pool_size=(2,2), strides=2)(x)
x = Dropout(0.2)(x)
x = Flatten()(x)
x = Dense(64, activation='relu',#dense layers to combine features
kernel_initializer='he_normal',
bias_initializer='zeros')(x)
x = Dropout(0.2)(x)
y = Dense(10, activation='softmax',#softmax for classification
kernel_initializer='glorot_uniform',
bias_initializer='zeros')(x)
#initialize model with inputs and outputs
model = Model(inputs, y, name='convnet_func')
#compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
#train model
history = model.fit(
train_images, train_labels,
validation_data=(test_images,test_labels),
epochs=EPOCHS, batch_size=BATCH_SIZE,
verbose=1, shuffle=True) #verbose 1 is cool gives time for each epoch
#evaluate model
import matplotlib.pyplot as plt
%matplotlib inline
def plot_error(history):
history_dict_vals = history.__dict__['history']
history_x = history.epoch
plt.plot(history_x,history_dict_vals['accuracy'],'r-', label='training accuracy')
plt.plot(history_x,history_dict_vals['val_accuracy'],'g-', label='test accuracy')
plt.axis([0,len(history_x),0.0,1])
plt.xlabel('training epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
print(f"Final test accuracy = {history_dict_vals['val_accuracy'][-1]}")
plot_error(history)
Unfortunately the model does not train when I remove the comments around the batch normalization layers.