I am facing a strange problem when adversarially training a resnet-50, and I am not sure whether is's a logical error, or a bug somewhere in the code/libraries. I am adversarially training a resnet-50 thats loaded from Keras, using the FastGradientMethod from cleverhans, and expecting the adversarial accuracy to rise at least above 90% (probably 99.x%). The training algorithm, training- and attack-params should be visible in the code. The problem, as already stated in the title is, that the accuracy is stuck at 5% after training ~3000 of 39002 training inputs in the first epoch. (GermanTrafficSignRecognitionBenchmark, GTSRB).
When training without and adversariy loss function, the accuracy does not get stuck after 3000 samples, but continues to rise > 0.95 in the first epoch.
When substituting the network with a lenet-5, alexnet and vgg19, the code works as expected, and an accuracy absolutely comparabele to the non-adversarial, categorical_corssentropy lossfunction is achieved. I've also tried running the procedure using solely tf-cpu and different versions of tensorflow, the result is always the same.
Code for obtaining ResNet-50:
def build_resnet50(num_classes, img_size):
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=img_size)
x = Flatten(input_shape=resnet.output.shape)(resnet.output)
x = Dense(1024, activation='sigmoid')(x)
predictions = Dense(num_classes, activation='softmax', name='pred')(x)
model = Model(inputs=[resnet.input], outputs=[predictions])
return model
Training:
def lr_schedule(epoch):
# decreasing learning rate depending on epoch
return 0.001 * (0.1 ** int(epoch / 10))
def train_model(model, xtrain, ytrain, xtest, ytest, lr=0.001, batch_size=32,
epochs=10, result_folder=""):
from cleverhans.attacks import FastGradientMethod
from cleverhans.utils_keras import KerasModelWrapper
import tensorflow as tf
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True)
model(model.input)
wrap = KerasModelWrapper(model)
sess = tf.compat.v1.keras.backend.get_session()
fgsm = FastGradientMethod(wrap, sess=sess)
fgsm_params = {'eps': 0.01,
'clip_min': 0.,
'clip_max': 1.}
loss = get_adversarial_loss(model, fgsm, fgsm_params)
model.compile(loss=loss, optimizer=sgd, metrics=['accuracy'])
model.fit(xtrain, ytrain,
batch_size=batch_size,
validation_data=(xtest, ytest),
epochs=epochs,
callbacks=[LearningRateScheduler(lr_schedule)])
Loss-function:
def get_adversarial_loss(model, fgsm, fgsm_params):
def adv_loss(y, preds):
import tensorflow as tf
tf.keras.backend.set_learning_phase(False) #turn off dropout during input gradient calculation, to avoid unconnected gradients
# Cross-entropy on the legitimate examples
cross_ent = tf.keras.losses.categorical_crossentropy(y, preds)
# Generate adversarial examples
x_adv = fgsm.generate(model.input, **fgsm_params)
# Consider the attack to be constant
x_adv = tf.stop_gradient(x_adv)
# Cross-entropy on the adversarial examples
preds_adv = model(x_adv)
cross_ent_adv = tf.keras.losses.categorical_crossentropy(y, preds_adv)
tf.keras.backend.set_learning_phase(True) #turn back on
return 0.5 * cross_ent + 0.5 * cross_ent_adv
return adv_loss
Versions used: tf+tf-gpu: 1.14.0 keras: 2.3.1 cleverhans: > 3.0.1 - latest version pulled from github