| | import torch
|
| | import torch.nn as nn
|
| |
|
| |
|
| | def nonlinearity(x):
|
| |
|
| | return x*torch.sigmoid(x)
|
| |
|
| |
|
| | def Normalize(in_channels):
|
| | return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
|
| |
|
| |
|
| | class Upsample(nn.Module):
|
| | def __init__(self, in_channels, with_conv):
|
| | super().__init__()
|
| | self.with_conv = with_conv
|
| | if self.with_conv:
|
| | self.conv = torch.nn.Conv2d(in_channels,
|
| | in_channels,
|
| | kernel_size=3,
|
| | stride=1,
|
| | padding=1)
|
| |
|
| | def forward(self, x):
|
| | x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
|
| | if self.with_conv:
|
| | x = self.conv(x)
|
| | return x
|
| |
|
| |
|
| | class Downsample(nn.Module):
|
| | def __init__(self, in_channels, with_conv):
|
| | super().__init__()
|
| | self.with_conv = with_conv
|
| | if self.with_conv:
|
| |
|
| | self.conv = torch.nn.Conv2d(in_channels,
|
| | in_channels,
|
| | kernel_size=3,
|
| | stride=2,
|
| | padding=0)
|
| |
|
| | def forward(self, x):
|
| | if self.with_conv:
|
| | pad = (0,1,0,1)
|
| | x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
|
| | x = self.conv(x)
|
| | else:
|
| | x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
|
| | return x
|
| |
|
| |
|
| | class ResnetBlock(nn.Module):
|
| | def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False,
|
| | dropout, temb_channels=512):
|
| | super().__init__()
|
| | self.in_channels = in_channels
|
| | out_channels = in_channels if out_channels is None else out_channels
|
| | self.out_channels = out_channels
|
| | self.use_conv_shortcut = conv_shortcut
|
| |
|
| | self.norm1 = Normalize(in_channels)
|
| | self.conv1 = torch.nn.Conv2d(in_channels,
|
| | out_channels,
|
| | kernel_size=3,
|
| | stride=1,
|
| | padding=1,
|
| | bias=False)
|
| | if temb_channels > 0:
|
| | self.temb_proj = torch.nn.Linear(temb_channels,
|
| | out_channels)
|
| | self.norm2 = Normalize(out_channels)
|
| | self.dropout = torch.nn.Dropout(dropout)
|
| | self.conv2 = torch.nn.Conv2d(out_channels,
|
| | out_channels,
|
| | kernel_size=3,
|
| | stride=1,
|
| | padding=1,
|
| | bias=False)
|
| | if self.in_channels != self.out_channels:
|
| | if self.use_conv_shortcut:
|
| | self.conv_shortcut = torch.nn.Conv2d(out_channels,
|
| | out_channels,
|
| | kernel_size=3,
|
| | stride=1,
|
| | padding=1,
|
| | bias=False)
|
| | else:
|
| | self.nin_shortcut = torch.nn.Conv2d(out_channels,
|
| | out_channels,
|
| | kernel_size=1,
|
| | stride=1,
|
| | padding=0,
|
| | bias=False)
|
| |
|
| | def forward(self, x, temb):
|
| | h = x
|
| | h = self.norm1(h)
|
| | h = nonlinearity(h)
|
| | h = self.conv1(h)
|
| |
|
| | if temb is not None:
|
| | h = h + self.temb_proj(nonlinearity(temb))[:,:,None,None]
|
| |
|
| | h = self.norm2(h)
|
| | h = nonlinearity(h)
|
| | h = self.dropout(h)
|
| | h = self.conv2(h)
|
| |
|
| | if self.in_channels != self.out_channels:
|
| | if self.use_conv_shortcut:
|
| | x = self.conv_shortcut(h)
|
| | else:
|
| | x = self.nin_shortcut(h)
|
| |
|
| | return x+h
|
| |
|
| |
|
| | class Decoder(nn.Module):
|
| | def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks,
|
| | attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels,
|
| | resolution, z_channels, give_pre_end=False, **ignorekwargs):
|
| | super().__init__()
|
| | self.ch = ch
|
| | self.temb_ch = 0
|
| | self.num_resolutions = len(ch_mult)
|
| | self.num_res_blocks = num_res_blocks
|
| | self.resolution = resolution
|
| | self.in_channels = in_channels
|
| | self.give_pre_end = give_pre_end
|
| |
|
| |
|
| | in_ch_mult = (1,)+tuple(ch_mult)
|
| | block_in = ch*ch_mult[self.num_resolutions-1]
|
| | curr_res = resolution // 2**(self.num_resolutions-1)
|
| |
|
| |
|
| | self.conv_in = torch.nn.Conv2d(z_channels,
|
| | block_in,
|
| | kernel_size=3,
|
| | stride=1,
|
| | padding=1)
|
| |
|
| |
|
| | self.mid = nn.Module()
|
| | self.mid.block_1 = ResnetBlock(in_channels=block_in,
|
| | out_channels=block_in,
|
| | temb_channels=self.temb_ch,
|
| | dropout=dropout)
|
| | self.mid.block_2 = ResnetBlock(in_channels=block_in,
|
| | out_channels=block_in,
|
| | temb_channels=self.temb_ch,
|
| | dropout=dropout)
|
| |
|
| |
|
| | self.up = nn.ModuleList()
|
| | for i_level in reversed(range(self.num_resolutions)):
|
| | block = nn.ModuleList()
|
| | block_out = ch*ch_mult[i_level]
|
| | for i_block in range(self.num_res_blocks):
|
| | block.append(ResnetBlock(in_channels=block_in,
|
| | out_channels=block_out,
|
| | temb_channels=self.temb_ch,
|
| | dropout=dropout))
|
| | block_in = block_out
|
| | up = nn.Module()
|
| | up.block = block
|
| | if i_level != 0:
|
| | up.upsample = Upsample(block_in, resamp_with_conv)
|
| | curr_res = curr_res * 2
|
| | self.up.insert(0, up)
|
| |
|
| |
|
| | self.norm_out = Normalize(block_in)
|
| | self.conv_out = torch.nn.Conv2d(block_in,
|
| | out_ch,
|
| | kernel_size=3,
|
| | stride=1,
|
| | padding=1)
|
| |
|
| | def forward(self, z):
|
| | self.last_z_shape = z.shape
|
| |
|
| |
|
| | temb = None
|
| |
|
| |
|
| | h = self.conv_in(z)
|
| |
|
| |
|
| | h = self.mid.block_1(h, temb)
|
| | h = self.mid.block_2(h, temb)
|
| |
|
| |
|
| | for i_level in reversed(range(self.num_resolutions)):
|
| | for i_block in range(self.num_res_blocks):
|
| | h = self.up[i_level].block[i_block](h, temb)
|
| | if i_level != 0:
|
| | h = self.up[i_level].upsample(h)
|
| |
|
| |
|
| | if self.give_pre_end:
|
| | return h
|
| |
|
| | h = self.norm_out(h)
|
| | h = nonlinearity(h)
|
| | h = self.conv_out(h)
|
| | return h
|
| |
|