I'm working with GANs and I was getting artifacts and the best way to remove them is to change the generator blocks conv2dtranspose into an upsample and a convolution as suggested by many papers.
I'm trying to generate ASL hand symbols via my personal dataset
this is the block code that implies conv2dtranspose as mentioned in the screenshot above
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.ConvTranspose2d(
in_channels, out_channels, kernel_size, stride, padding, bias=False,
),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.2),
What I'm trying to achieve is change the block so that it up samples the image and applies convolution to it producing the same output image size as the conv2dTranspose
this is what i tried
nn.Upsample(scale_factor = 2, mode='bilinear'),
nn.ReflectionPad2d(1),
nn.Conv2d(out_channels, int(out_channels / 2),kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.2),
However I was getting the following error [![enter image description here][3]][3]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-12-4fa61054bc2c> in <module>
81 for _ in range(CRITIC_ITERATIONS):
82 noise = torch.randn(cur_batch_size, Z_DIM, 1, 1).to(device)
---> 83 fake = gen(noise,labels)
84 critic_real = critic(real,labels).reshape(-1)
85 critic_fake = critic(fake,labels).reshape(-1)
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-10-6af8005acf1f> in forward(self, x, labels)
77 embedding=self.embed(labels).unsqueeze(2).unsqueeze(3)
78 x=torch.cat([x,embedding],dim=1)
---> 79 return self.net(x)
80
81
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~\Anaconda3\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
115 def forward(self, input):
116 for module in self:
--> 117 input = module(input)
118 return input
119
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~\Anaconda3\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
115 def forward(self, input):
116 for module in self:
--> 117 input = module(input)
118 return input
119
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
421
422 def forward(self, input: Tensor) -> Tensor:
--> 423 return self._conv_forward(input, self.weight)
424
425 class Conv3d(_ConvNd):
~\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in _conv_forward(self, input, weight)
418 _pair(0), self.dilation, self.groups)
419 return F.conv2d(input, weight, self.bias, self.stride,
--> 420 self.padding, self.dilation, self.groups)
421
422 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Given groups=1, weight of size [1024, 2048, 1, 1], expected input[64, 200, 2, 2] to have 2048 channels, but got 200 channels instead
Also if I change the stride to 1 it gives me embedding errors
=================================================================
Layer (type:depth-idx) Param #
=================================================================
├─Sequential: 1-1 --
| └─Sequential: 2-1 --
| | └─Upsample: 3-1 --
| | └─Conv2d: 3-2 411,648
| | └─BatchNorm2d: 3-3 4,096
| | └─LeakyReLU: 3-4 --
| └─Sequential: 2-2 --
| | └─Upsample: 3-5 --
| | └─Conv2d: 3-6 2,098,176
| | └─BatchNorm2d: 3-7 2,048
| | └─LeakyReLU: 3-8 --
| └─Sequential: 2-3 --
| | └─Upsample: 3-9 --
| | └─Conv2d: 3-10 524,800
| | └─BatchNorm2d: 3-11 1,024
| | └─LeakyReLU: 3-12 --
| └─Sequential: 2-4 --
| | └─Upsample: 3-13 --
| | └─Conv2d: 3-14 131,328
| | └─BatchNorm2d: 3-15 512
| | └─LeakyReLU: 3-16 --
| └─Sequential: 2-5 --
| | └─Upsample: 3-17 --
| | └─Conv2d: 3-18 32,896
| | └─BatchNorm2d: 3-19 256
| | └─LeakyReLU: 3-20 --
| └─ConvTranspose2d: 2-6 6,147
| └─Tanh: 2-7 --
├─Embedding: 1-2 2,700
=================================================================
Total params: 3,215,631
Trainable params: 3,215,631
Non-trainable params: 0
=================================================================
=================================================================
Layer (type:depth-idx) Param #
=================================================================
├─Sequential: 1-1 --
| └─Conv2d: 2-1 2,080
| └─LeakyReLU: 2-2 --
| └─Sequential: 2-3 --
| | └─Conv2d: 3-1 32,768
| | └─InstanceNorm2d: 3-2 128
| | └─LeakyReLU: 3-3 --
| └─Sequential: 2-4 --
| | └─Conv2d: 3-4 131,072
| | └─InstanceNorm2d: 3-5 256
| | └─LeakyReLU: 3-6 --
| └─Sequential: 2-5 --
| | └─Conv2d: 3-7 524,288
| | └─InstanceNorm2d: 3-8 512
| | └─LeakyReLU: 3-9 --
| └─Sequential: 2-6 --
| | └─Conv2d: 3-10 2,097,152
| | └─InstanceNorm2d: 3-11 1,024
| | └─LeakyReLU: 3-12 --
| └─Conv2d: 2-7 8,193
├─Embedding: 1-2 442,368
=================================================================
Total params: 3,239,841
Trainable params: 3,239,841
Non-trainable params: 0
=================================================================
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-3-4fa61054bc2c> in <module>
83 fake = gen(noise,labels)
84 critic_real = critic(real,labels).reshape(-1)
---> 85 critic_fake = critic(fake,labels).reshape(-1)
86 gp = gradient_penalty(critic,labels, real, fake, device=device)
87 loss_critic = (
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-1-4154f5f318f2> in forward(self, x, labels)
38 def forward(self, x,labels):
39 embedding=self.embed(labels).view(labels.shape[0],1,self.img_size,self.img_size)
---> 40 x=torch.cat([x,embedding],dim=1)
41 return self.disc(x)
42
RuntimeError: Sizes of tensors must match except in dimension 2. Got 128 and 64 (The offending index is 0)
Could someone explain how would I achieve this for my architecture.