前言
在搭建网络的时候,发现relu函数用几种不同的用法,对于不同的用法,并不会改变可训练参数的数量,但是所占用的计算机资源却不同,但是各有利弊,相面详细介绍。
可以对比这篇文章一起看,实在这篇文章基础上进行修改的。https://mp.csdn.net/mp_blog/creation/editor/126259211
- def BasicBlock(in_ch,out_ch,stride):
- return nn.Sequential(
- nn.Conv2d(in_ch, out_ch, 3, stride, padding=1, bias=False),
- nn.BatchNorm2d(out_ch),
- nn.ReLU(inplace=True), # inplace = True原地操作,节省显存
- nn.Conv2d(out_ch, out_ch, 3, stride=1, padding=1, bias=False),
- nn.BatchNorm2d(out_ch),
- nn.ReLU(inplace=True),
- )
- class ResidualBlock_old(nn.Module):
- # 实现子module:Residual Block
- def __init__(self, in_ch, out_ch, stride=1, shortcut=None):
- super(ResidualBlock_old, self).__init__()
- self.BasicBlock = BasicBlock(in_ch,out_ch,stride)
- self.downsample = shortcut
-
- def forward(self, x):
- out = self.BasicBlock(x)
- residual = x if self.downsample is None else self.downsample(x)
- out += residual
- return out
- class ResidualBlock(nn.Module):
- # 实现子module:Residual Block
- def __init__(self, in_ch, out_ch, stride=1, shortcut=None):
- super(ResidualBlock, self).__init__()
- self.conv1 = nn.Conv2d(in_ch, out_ch, 3, stride, padding=1, bias=False)
- self.bn1 = nn.BatchNorm2d(out_ch)
- self.conv2 = nn.Conv2d(out_ch, out_ch, 3, stride=1, padding=1, bias=False)
- self.bn2 = nn.BatchNorm2d(out_ch)
-
- self.downsample = shortcut
-
- def forward(self, x):
- out = self.conv1(x)
- out = self.bn1(out)
- out = F.relu(out)
- out = self.conv2(out)
- out = self.bn2(out)
-
- residual = x if self.downsample is None else self.downsample(x)
- out += residual
- return F.relu(out)
即nn.ReLU和F.ReLU两种实现方法。
其中nn.ReLU作为一个层结构,必须添加到nn.Module容器中才能使用,而F.ReLU则作为一个函数调用,
具体使用哪种方式,取决于编程风格。在PyTorch中,nn.X都有对应的函数版本F.X,但是并不是所有的F.X均可以用于forward或其它代码段中,
当网络模型训练完毕时,在存储model时,在forward中的F.X函数中的参数是无法保存的。
也就是说,在forward中,使用的F.X函数一般均没有状态参数,比如F.ReLU,F.avg_pool2d等,均没有参数,它们可以用在任何代码片段中。
二、两个版本的可训练参数与占用显存情况


可训练参数保持不变,但是占用内存的情况变为:源码为589.16MB,修改后的为479.16MB
总的代码
- import torch.nn as nn
- import torch
- from torch.nn import functional as F
- from torchsummary import summary
- from torchvision import models
-
- def BasicBlock(in_ch,out_ch,stride):
- return nn.Sequential(
- nn.Conv2d(in_ch, out_ch, 3, stride, padding=1, bias=False),
- nn.BatchNorm2d(out_ch),
- nn.ReLU(inplace=True), # inplace = True原地操作,节省显存
- nn.Conv2d(out_ch, out_ch, 3, stride=1, padding=1, bias=False),
- nn.BatchNorm2d(out_ch),
- nn.ReLU(inplace=True),
- )
- class ResidualBlock_old(nn.Module):
- # 实现子module:Residual Block
- def __init__(self, in_ch, out_ch, stride=1, shortcut=None):
- super(ResidualBlock, self).__init__()
- self.BasicBlock = BasicBlock(in_ch,out_ch,stride)
- self.downsample = shortcut
-
- def forward(self, x):
- out = self.BasicBlock(x)
- residual = x if self.downsample is None else self.downsample(x)
- out += residual
- return out
-
- class ResidualBlock(nn.Module):
- # 实现子module:Residual Block
- def __init__(self, in_ch, out_ch, stride=1, shortcut=None):
- super(ResidualBlock, self).__init__()
- self.conv1 = nn.Conv2d(in_ch, out_ch, 3, stride, padding=1, bias=False)
- self.bn1 = nn.BatchNorm2d(out_ch)
- self.conv2 = nn.Conv2d(out_ch, out_ch, 3, stride=1, padding=1, bias=False)
- self.bn2 = nn.BatchNorm2d(out_ch)
-
- self.downsample = shortcut
-
- def forward(self, x):
- out = self.conv1(x)
- out = self.bn1(out)
- out = F.relu(out)
-
- out = self.conv2(out)
- out = self.bn2(out)
-
- residual = x if self.downsample is None else self.downsample(x)
- out += residual
- return F.relu(out)
-
- class ResNet34(nn.Module):
- # 实现主module:ResNet34
- def __init__(self, num_classes=1):
- super(ResNet34, self).__init__()
- self.init_block = nn.Sequential(
- nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=False),
- nn.BatchNorm2d(64),
- nn.ReLU(inplace=True),
- nn.MaxPool2d(3, 2, 1)
- )
- self.layer1 = self.make_layer(64, 64, 3)
- self.layer2 = self.make_layer(64, 128, 4, stride=2)
- self.layer3 = self.make_layer(128, 256, 6, stride=2)
- self.layer4 = self.make_layer(256, 512, 3, stride=2)
- # 分类用的全连接
- self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
- self.fc = nn.Linear(512, num_classes)
-
- def make_layer(self, in_ch, out_ch, block_num, stride=1):
- shortcut = None
- # 判断是否使用降采样 维度增加
- if not in_ch==out_ch:
- shortcut = nn.Sequential(
- nn.Conv2d(in_ch, out_ch, 1, stride, bias=False), # 1x1卷积用于增加维度;stride=2用于减半size;为简化不考虑偏差
- nn.BatchNorm2d(out_ch))
- layers = []
- layers.append(ResidualBlock(in_ch, out_ch, stride, shortcut))
- for i in range(1, block_num):
- layers.append(ResidualBlock(out_ch, out_ch)) # 后面的几个ResidualBlock,shortcut直接相加
- return nn.Sequential(*layers)
-
- def forward(self, x):
- x = self.init_block(x)
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
- x = self.avgpool(x)
- x = torch.flatten(x, 1)
- x = self.fc(x)
- return nn.Sigmoid()(x) # 1x1,将结果化为(0~1)之间
-
-
- if __name__ == '__main__':
- filters = [64, 128, 256, 512]
- resnet = models.resnet34(pretrained=False)
- summary(resnet.cuda(), (3, 512, 512))
-
- print('***************\n*****************\n')
-
- # MY RESNET
- resnet_my = ResNet34(num_classes=1000)
-
- print(self_encoder1)
- summary(resnet_my.cuda(), (3, 512, 512))