0

I would like to convert the tacotron 2 DDC model to ONNX. I used the code below. Unfortunately, I get an error which is also listed below. Any ideas? Am I using a wrong input shape? Is the conversion to ONNX currently not supported in coqui tacotron 2?

If you need some more information or have questions, please dont hesitate. I appreciate every correction or idea that helps me solve the problem.

config_path = './config.json'
config = load_config(config_path)
ckpt = './model_file.pth'
model = Tacotron2.init_from_config(config)
model.load_checkpoint(config, ckpt, eval=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sequences = torch.randint(low=0, high=131, size=(1, 512),
                             dtype=torch.long)
sequence_lengths = torch.IntTensor([sequences.size(0)]).long()

dummy_input=(sequences, sequence_lengths)
input_names = ["sequences", "sequence_lengths"]
output_names = ["output"]

torch.onnx.export(
    model,
    dummy_input,
    "test.onnx",
    input_names=input_names,
    output_names=output_names,
    export_params=True,
    opset_version=13,
    verbose=True,
)

I get this error:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/torch/nn/modules/module.py:1099, in Module._slow_forward(self, *input, **kwargs)
   1097         recording_scopes = False
   1098 try:
-> 1099     result = self.forward(*input, **kwargs)
   1100 finally:
   1101     if recording_scopes:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/TTS/tts/models/tacotron2.py:206, in Tacotron2.forward(self, text, text_lengths, mel_specs, mel_lengths, aux_input)
    203 encoder_outputs = encoder_outputs * input_mask.unsqueeze(2).expand_as(encoder_outputs)
    205 # B x mel_dim x T_out -- B x T_out//r x T_in -- B x T_out//r
--> 206 decoder_outputs, alignments, stop_tokens = self.decoder(encoder_outputs, mel_specs, input_mask)
    207 # sequence masking
    208 if mel_lengths is not None:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/torch/nn/modules/module.py:1111, in Module._call_impl(self, *input, **kwargs)
   1107 # If we don't have any hooks, we want to skip the rest of the logic in
   1108 # this function, and just call forward.
   1109 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1110         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1111     return forward_call(*input, **kwargs)
   1112 # Do not call functions when jit is used
   1113 full_backward_hooks, non_full_backward_hooks = [], []

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/torch/nn/modules/module.py:1099, in Module._slow_forward(self, *input, **kwargs)
   1097         recording_scopes = False
   1098 try:
-> 1099     result = self.forward(*input, **kwargs)
   1100 finally:
   1101     if recording_scopes:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/TTS/tts/layers/tacotron/tacotron2.py:314, in Decoder.forward(self, inputs, memories, mask)
    312 memory = self.get_go_frame(inputs).unsqueeze(0)
    313 print("IN forward memory", memory)
--> 314 memories = self._reshape_memory(memories)
    315 print("after reshape", memories)
    316 memories = torch.cat((memory, memories), dim=0)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/TTS/tts/layers/tacotron/tacotron2.py:239, in Decoder._reshape_memory(self, memory)
    237 print("MEMORY",memory)
    238 # Grouping multiple frames if necessary
--> 239 if memory.size(-1) == self.frame_channels:
    240     memory = memory.view(memory.shape[0], memory.size(1) // self.r, -1)
    241 # Time first (T_decoder, B, frame_channels)

AttributeError: 'NoneType' object has no attribute 'size' 
hux0
  • 207
  • 1
  • 4
  • 17

0 Answers0