I would like to convert the tacotron 2 DDC model to ONNX. I used the code below. Unfortunately, I get an error which is also listed below. Any ideas? Am I using a wrong input shape? Is the conversion to ONNX currently not supported in coqui tacotron 2?
If you need some more information or have questions, please dont hesitate. I appreciate every correction or idea that helps me solve the problem.
config_path = './config.json'
config = load_config(config_path)
ckpt = './model_file.pth'
model = Tacotron2.init_from_config(config)
model.load_checkpoint(config, ckpt, eval=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sequences = torch.randint(low=0, high=131, size=(1, 512),
dtype=torch.long)
sequence_lengths = torch.IntTensor([sequences.size(0)]).long()
dummy_input=(sequences, sequence_lengths)
input_names = ["sequences", "sequence_lengths"]
output_names = ["output"]
torch.onnx.export(
model,
dummy_input,
"test.onnx",
input_names=input_names,
output_names=output_names,
export_params=True,
opset_version=13,
verbose=True,
)
I get this error:
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/torch/nn/modules/module.py:1099, in Module._slow_forward(self, *input, **kwargs)
1097 recording_scopes = False
1098 try:
-> 1099 result = self.forward(*input, **kwargs)
1100 finally:
1101 if recording_scopes:
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/TTS/tts/models/tacotron2.py:206, in Tacotron2.forward(self, text, text_lengths, mel_specs, mel_lengths, aux_input)
203 encoder_outputs = encoder_outputs * input_mask.unsqueeze(2).expand_as(encoder_outputs)
205 # B x mel_dim x T_out -- B x T_out//r x T_in -- B x T_out//r
--> 206 decoder_outputs, alignments, stop_tokens = self.decoder(encoder_outputs, mel_specs, input_mask)
207 # sequence masking
208 if mel_lengths is not None:
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/torch/nn/modules/module.py:1111, in Module._call_impl(self, *input, **kwargs)
1107 # If we don't have any hooks, we want to skip the rest of the logic in
1108 # this function, and just call forward.
1109 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1110 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1111 return forward_call(*input, **kwargs)
1112 # Do not call functions when jit is used
1113 full_backward_hooks, non_full_backward_hooks = [], []
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/torch/nn/modules/module.py:1099, in Module._slow_forward(self, *input, **kwargs)
1097 recording_scopes = False
1098 try:
-> 1099 result = self.forward(*input, **kwargs)
1100 finally:
1101 if recording_scopes:
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/TTS/tts/layers/tacotron/tacotron2.py:314, in Decoder.forward(self, inputs, memories, mask)
312 memory = self.get_go_frame(inputs).unsqueeze(0)
313 print("IN forward memory", memory)
--> 314 memories = self._reshape_memory(memories)
315 print("after reshape", memories)
316 memories = torch.cat((memory, memories), dim=0)
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/TTS/tts/layers/tacotron/tacotron2.py:239, in Decoder._reshape_memory(self, memory)
237 print("MEMORY",memory)
238 # Grouping multiple frames if necessary
--> 239 if memory.size(-1) == self.frame_channels:
240 memory = memory.view(memory.shape[0], memory.size(1) // self.r, -1)
241 # Time first (T_decoder, B, frame_channels)
AttributeError: 'NoneType' object has no attribute 'size'