I am trying to implement my own activation function using functions from the Keras backend or Tensorflow but I have trouble getting this function to learn properly.
My first approach was to rebuild an existing activation function (ELU) to see if the there is a problem with my own activation function, but even the rebuild function does not train like the activation function build into Keras or Tensorflow.
Tensorflow function:
def custom_activation(x):
cond = tf.greater(x, tf.constant(0.0))
return tf.where(cond,
x,
tf.subtract(tf.exp(x), tf.constant(1.0)))
Keras function:
def custom_activation(x):
cond = K.greater(x, 0)
return K.switch(cond, x, K.exp(x) - 1)
get_custom_objects().update({'custom_activation': Activation(custom_activation)})
I am using the mnist dataset and a simple 8 layer fully connected network with 128 nodes in each layer to test my activation function. This network is learning slightly with the build-in ELU function, but with the custom Keras or Tensorflow function the loss is instantly near zero and the accuracy doesn't improve at all.
What am I missing?
I followed How do you create a custom activation function with Keras? for the Keras function and this post for Tensorflow.
Full code (for copy / paste):
ELU in Keras (working normal)
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
from keras import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
model = Sequential([
Dense(128, input_shape=x_train.shape[1:]),
Activation('elu'),
Dense(128),
Activation('elu'),
Dense(128),
Activation('elu'),
Dense(128),
Activation('elu'),
Dense(128),
Activation('elu'),
Dense(128),
Activation('elu'),
Dense(128),
Activation('elu'),
Dense(128),
Activation('elu'),
Dense(10),
Activation('sigmoid')
])
model.compile(SGD(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x=x_train, y=y_train,
validation_data=[x_test, y_test],
batch_size=64, epochs=5)
custom ELU in Keras
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
from keras import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
def custom_activation(x):
cond = K.greater(x, 0)
return K.switch(cond, x, K.exp(x) - 1)
get_custom_objects().update({'custom_activation': Activation(custom_activation)})
model = Sequential([
Dense(128, input_shape=x_train.shape[1:]),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(10),
Activation('sigmoid')
])
model.compile(SGD(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x=x_train, y=y_train,
validation_data=[x_test, y_test],
batch_size=64, epochs=5)
custom ELU in Tensorflow with Keras API
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
def custom_activation(x):
cond = tf.greater(x, tf.constant(0.0))
return tf.where(cond,
x,
tf.subtract(tf.exp(x), tf.constant(1.0)))
model = Sequential([
Dense(128, input_shape=x_train.shape[1:]),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(128),
Activation(custom_activation),
Dense(10),
Activation('sigmoid')
])
model.compile(SGD(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x=x_train, y=y_train,
validation_data=[x_test, y_test],
batch_size=64, epochs=5)