def _bytes_feature(value):
#Returns a bytes_list from a string / byte.
if isinstance(value, type(tf.constant(0))): # if value ist tensor
value = value.numpy() # get value of tensor
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
#Returns an int64_list from a bool / enum / int / uint.
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def write_tfrecords(data_list, output_file):
"""
write data for nearest neighbor evaluation
"""
total_samples = 0
with tf.io.TFRecordWriter(output_file) as writer:
for image, label in tqdm(data_list):
data = {
"image": _bytes_feature(image.numpy().bytes()),
"label": _int64_feature(label)
}
out = tf.train.Example(features=tf.train.Features(feature=data))
writer.write(out.SerializeToString())
total_samples += 1
return total_samples
The image object at this line "image": _bytes_feature(image.numpy().bytes())
, is a tensor.
I used numpy bytes here but when I decode the data I found that the shape of the array is missing. I need to specify the tensor array shape. This is how numpy.bytes()
and numpy.frombuffer()
works: Convert byte array back to numpy array
Is there a better way that the array shape can be serialized as well?