(1)基础框架使用了
https://github.com/WZMIAOMIAO/deep-learning-for-image-processing
(2)VGG16使用了
Pytorch搭建FCN网络_金渐层猫的博客-CSDN博客_pytorch 实现fcn
(1)原理见教程17
(17)语义分割(2)_FCN的原理_chencaw的博客-CSDN博客
(2)

(3)
(4)

(1)注意网络的FC6和FC7的输出在实现的时候没有使用Dropout,size也使用了1。
(2)当然我们也可以安装上图的方法去实现
- from torch import nn
- from torchvision.models import vgg16
-
- def vgg_block(num_convs, in_channels, out_channels):
- blk = []
- for i in range(num_convs):
- if i == 0:
- blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
- else:
- blk.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
- blk.append(nn.ReLU(inplace=True))
- blk.append(nn.MaxPool2d(kernel_size=2, stride=2)) # 宽高减半
- return blk
-
- class VGG16(nn.Module):
- def __init__(self, pretrained=True):
- super(VGG16, self).__init__()
- features = []
- features.extend(vgg_block(2, 3, 64))
- features.extend(vgg_block(2, 64, 128))
- features.extend(vgg_block(3, 128, 256))
- self.index_pool3 = len(features)
- features.extend(vgg_block(3, 256, 512))
- self.index_pool4 = len(features)
- features.extend(vgg_block(3, 512, 512))
- self.features = nn.Sequential(*features)
-
- self.conv6 = nn.Conv2d(512, 4096, kernel_size=1)
- self.relu = nn.ReLU(inplace=True)
- self.conv7 = nn.Conv2d(4096, 4096, kernel_size=1)
-
- # load pretrained params from torchvision.models.vgg16(pretrained=True)
- if pretrained:
- pretrained_model = vgg16(pretrained=pretrained)
- pretrained_params = pretrained_model.state_dict()
- keys = list(pretrained_params.keys())
- new_dict = {}
- for index, key in enumerate(self.features.state_dict().keys()):
- new_dict[key] = pretrained_params[keys[index]]
- self.features.load_state_dict(new_dict)
-
- def forward(self, x):
- pool3 = self.features[:self.index_pool3](x) # 1/8
- pool4 = self.features[self.index_pool3:self.index_pool4](pool3) # 1/16
- pool5 = self.features[self.index_pool4:](pool4) # 1/32
-
- conv6 = self.relu(self.conv6(pool5)) # 1/32
- conv7 = self.relu(self.conv7(conv6)) # 1/32
-
- return pool3, pool4, conv7
(1)注意接在了conv7的输出上

(2)代码实现,注意输入图像的尺寸必须是32 的整数倍
- from operator import imod
- from src.vgg_backbone import VGG16
- import torch.nn as nn
-
-
- class FCN_32S(nn.Module):
- def __init__(self, num_classes, backbone='vgg'):
- super(FCN_32S, self).__init__()
- if backbone == 'vgg':
- self.features = VGG16()
-
- self.scores1 = nn.Conv2d(4096, num_classes, kernel_size=1)
- self.relu = nn.ReLU(inplace=True)
- self.scores2 = nn.Conv2d(512, num_classes, kernel_size=1)
- self.scores3 = nn.Conv2d(256, num_classes, kernel_size=1)
-
- # torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
- # stride=1, padding=0, output_padding=0,
- # groups=1, bias=True, dilation=1, padding_mode='zeros')
-
- #输入图像是32的整数倍,如:224 输入模块前的h = 8 ,w = 8
- #公式:h_out=(h−1)×stride[0]−2×padding[0]+dilation[0]×(kernel_size[0]−1)+output_padding[0]+1
- # 计算 h_out = (h-1)*stride + kernel
-
- self.upsample_32x = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=32, stride=32)
- self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=8, stride=8)
- self.upsample_4x = nn.ConvTrans