I have an application in which I need to setup a pipeline using tf.data. The data I have is stored in .mat files created in Matlab and contains three variables "s_matrix" which is a 224x224x3 double array, a "frame" which is 1024x1 complex double and finally a numeric label. The pipeline is to be loaded as such so that I can feed the data to the model.fit function. The code I have been using so far to load and process the data is added below but I keep getting several errors of type and unexpected byte errors.
Update 2: Making several modifications
Updating the code by including changes suggested by Giorgos and also changing the dataset generator and using tf.data.Dataset.from_generator function. There is some apparent improvement but now issue is that only one of the two inputs is being passed.
# Define the shape of the input image
input_shape = (224, 224, 3)
# Define the shape of the complex vector after conversion
complex_shape = (1024, 2, 1)
# Define a function to load and preprocess each sample
def load_and_preprocess_sample(sample_path):
# Load the sample from the mat file
sample = scipy.io.loadmat(sample_path)
matrix = sample['s_matrix']
complex_vector = sample['frame']
label = sample['numeric_label']
# Preprocess the matrix, complex vector, and label as needed
real = tf.reshape(tf.math.real(complex_vector), [1024, 1])
imag = tf.reshape(tf.math.imag(complex_vector), [1024, 1])
signal_tensor = tf.concat([real, imag], axis=-1)
signal_tensor = tf.reshape(signal_tensor, [1024, 2, 1])
signal = signal_tensor
# Normalize the matrix values between 0 and 1
matrix = matrix / 255.0
return matrix, signal, label
# Define a generator function to generate the samples
def sample_generator(file_paths):
for file_path in file_paths:
#yield load_and_preprocess_sample(file_path)
matrix, complex_vector, label = load_and_preprocess_sample(file_path)
yield (matrix, complex_vector), label
# Modify the create_dataset() function to use from_generator
def create_dataset(file_paths):
dataset = tf.data.Dataset.from_generator(
generator=lambda: sample_generator(file_paths),
output_signature=(
tf.TensorSpec(shape=input_shape, dtype=tf.float32),
tf.TensorSpec(shape=complex_shape, dtype=tf.float32),
tf.TensorSpec(shape=(1,), dtype=tf.float32)
)
)
dataset = dataset.shuffle(buffer_size=len(file_paths))
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
return dataset
# Get a list of all file paths in the data folder
file_paths = [os.path.join(data_path, f) for f in os.listdir(data_path) if f.endswith('.mat')]
# Split file paths into training and validation sets
train_file_paths = file_paths[:-num_val_samples]
val_file_paths = file_paths[-num_val_samples:]
Data generated and Model is called as below:
# Create datasets for training and validation sets
train_dataset = create_dataset(train_file_paths)
val_dataset = create_dataset(val_file_paths)
...
...
...
model = tf.keras.Model(inputs=[input1, input2], outputs=output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train your model
model.fit(train_dataset,
epochs=5,
steps_per_epoch=num_train_samples // batch_size,
validation_data=val_dataset,
validation_steps=num_val_samples // batch_size)
the current error output is shared below:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[7], line 90
87 model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
89 # Train your model
---> 90 model.fit(train_dataset,
91 epochs=5,
92 steps_per_epoch=num_train_samples // batch_size,
93 validation_data=val_dataset,
94 validation_steps=num_val_samples // batch_size)
File ~\miniconda3\envs\tf2\lib\site-packages\keras\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~\AppData\Local\Temp\__autograph_generated_filea4_9b7hv.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "C:\Users\Admin\miniconda3\envs\tf2\lib\site-packages\keras\engine\training.py", line 1160, in train_function *
return step_function(self, iterator)
File "C:\Users\Admin\miniconda3\envs\tf2\lib\site-packages\keras\engine\training.py", line 1146, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\Admin\miniconda3\envs\tf2\lib\site-packages\keras\engine\training.py", line 1135, in run_step **
outputs = model.train_step(data)
File "C:\Users\Admin\miniconda3\envs\tf2\lib\site-packages\keras\engine\training.py", line 993, in train_step
y_pred = self(x, training=True)
File "C:\Users\Admin\miniconda3\envs\tf2\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\Admin\miniconda3\envs\tf2\lib\site-packages\keras\engine\input_spec.py", line 216, in assert_input_compatibility
raise ValueError(
ValueError: Layer "model" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 224, 224, 3) dtype=float32>]