0

I am training a keras model with a data generator that reads the data in batches from a directory. This works great with model.fit(). But when using model.predict(), I would like to have both ypred and ytrue values returned.

Can I enable/modify model.predict() to do this (maybe with a custom callback)?

class DataGenerator(tf.keras.utils.Sequence):
    def__init__(self, ids, batch_size=256):
        self.batch_size=batch_size
        self.ids = ids
    
    def __len__(self):
        return(self.ids)
    
    def __getitem__(self, index):
        X, y = np.load(f'data/{index}.npy', allow_pickle=True)
        return X, y
    
    def on_epoch_end(self):
        '''Shuffle ids in each epoch'''
        self.ids = np.random.choice(self.ids, len(self.ids), replace=False)



model = buildModel() #builds a multilayer perceptron
train_ids = np.arange(10000)  #training data are in data/0.npy, data/1.npy, ... data/9999.npy
val_ids = np.arange(10000, 12000)

train_generator = DataGenerator(train_ids)
val_generator = DataGenerator(val_ids)

# Train model
history = model.fit(x=train_generator, epochs=100)

# Validate model (but I don't have ytrue)
ypred = model.predict(x=val_generator).reshape(-1)


# What I would like to achieve
(ypred, ytrue) = model.predict(x=val_generator, callbacks=[some_custom_callback])

# Or
ypred = model.predict(x=val_generator)
ytrue = some_fancy_method(val_generator)
ProteinGuy
  • 1,754
  • 2
  • 17
  • 33
  • 1
    Check this https://stackoverflow.com/questions/58812921/getting-true-labels-for-keras-predictions and also this https://stackoverflow.com/questions/44970445/how-to-return-true-labels-of-items-when-using-predict-generator – Innat Mar 31 '21 at 02:37

1 Answers1

0

This can be done by adding a method to your DataGenerator class that takes the fitted model as input, applies it to the generated data batches, and returns ytrue and ypred.

class DataGenerator(tf.keras.utils.Sequence):
    def__init__(self, ids, batch_size=256):
        self.batch_size=batch_size
        self.ids = ids
    
    def __len__(self):
        return(self.ids)
    
    def __getitem__(self, index):
        X, y = self.load_data(index)
        return X, y

    def load_data(self, index):
        X, y = np.load(f'data/{index}.npy', allow_pickle=True)
        return X, y

    def predict(self, model):
        ytrue, ypred = [], []
        for index in self.ids:
            X, y = self.load_data(index)
            pred = model.predict(X).reshape(-1)
            ytrue.extend(y)
            ypred.extend(pred)
        return ytrue, ypred

    def on_epoch_end(self):
        '''Shuffle ids in each epoch'''
        self.ids = np.random.choice(self.ids, len(self.ids), replace=False)



train_generator = DataGenerator(train_ids)
val_generator = DataGenerator(val_ids)

# Train model
history = model.fit(x=train_generator, epochs=100)

# Validate model 
ypred, ytrue = val_generator.predict(model)
ProteinGuy
  • 1,754
  • 2
  • 17
  • 33