I am using TensorFlow 2.0.0 and trying to create my own data set with tf.data.Dataset.from_generator()
Here are my codes:
def trainDatagen():
for npy in train_list:
x = tf.convert_to_tensor(np.load(npy), dtype=tf.float32)
if npy in gbmlist:
y = to_categorical(0, num_classes=2)
else:
y = to_categorical(1, num_classes=2)
yield x, y
def tfDatasetGen(datagen, output_types, is_training, batch_size):
dataset = tf.data.Dataset.from_generator(generator=datagen, output_types=output_types)
if is_training:
dataset.shuffle(buffer_size=100)
dataset.repeat()
dataset.batch(batch_size=batch_size)
dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset
train_set = tfDatasetGen(
datagen = trainDatagen,
output_types = (tf.float32, tf.float32),
is_training = True,
batch_size = 16)
All those npy files are np.array with shape of [4000,2048] got from large pathology slides with 4000 tiles. Feature of each tile was calculated by ResNet50.
Here is my model:
def top_k(inputs, k):
return tf.nn.top_k(inputs, k=k, sorted=True).values
def least_k(inputs, k):
return -tf.nn.top_k(-inputs, k=k, sorted=True).values
def minmax_k(inputs, k):
return tf.concat([top_k(inputs, k), least_k(inputs, k)], axis = -1)
inputs = keras.Input(shape=(4000,2048))
y = layers.Conv1D(1, 2048, use_bias=False, padding='same', data_format='channels_last')(inputs)
y = layers.Flatten()(y)
y = layers.Lambda(minmax_k, arguments={'k': 5})(y)
y = layers.Dense(units=200, activation=tf.nn.relu)(y)
y = layers.Dropout(rate=0.5)(y)
y = layers.Dense(units=100, activation=tf.nn.relu)(y)
y = layers.Dense(units=2, activation=tf.nn.softmax)(y)
model = keras.Model(inputs=inputs, outputs=y)
When using model.fit() to train the model, I received this:
ValueError: Error when checking input: expected input_4 to have 3 dimensions, but got array with shape (4000, 2048)
All this idea is from the paper arXiv:1802.02212. Here is the figure of the neural network I tried to reproduce.
I followed Mahsa Hassankashi's advice to reshape the input to (4000,2048,1)
x = tf.convert_to_tensor(np.load(npy).reshape(4000,2048,1), dtype=tf.float32)
and modified this part to fix an error according to GitHub issues:
train_set = tfDatasetGen(
datagen = trainDatagen,
output_types = (tf.float32, tf.float32),
**output_shapes = (tf.TensorShape((None,None,None)), tf.TensorShape((2,))),**
is_training = True,
batch_size = 16)
But I got this:
InvalidArgumentError: input and filter must have the same depth: 1 vs 2048
Finally I tried to reshape the input to (1,4000,2048), this time another kind of error came to me:
InvalidArgumentError: Expected size[0] in [0, 1], but got 2