我正在尝试重用一些resnet层作为自定义架构,并遇到了一个我无法解决的问题。下面是一个简化的示例;当我运行时:
import torch
from torchvision import models
from torchsummary import summary
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True),
)
class ResNetUNet(nn.Module):
def __init__(self):
super().__init__()
self.base_model = models.resnet18(pretrained=False)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3])
def forward(self, x):
print(x.shape)
output = self.layer0(x)
return output
base_model = ResNetUNet().cuda()
summary(base_model,(3,224,224))是给我:
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 112, 112] 9,408
Conv2d-2 [-1, 64, 112, 112] 9,408
BatchNorm2d-3 [-1, 64, 112, 112] 128
BatchNorm2d-4 [-1, 64, 112, 112] 128
ReLU-5 [-1, 64, 112, 112] 0
ReLU-6 [-1, 64, 112, 112] 0
================================================================
Total params: 19,072
Trainable params: 19,072
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 36.75
Params size (MB): 0.07
Estimated Total Size (MB): 37.40
----------------------------------------------------------------这是复制每一层(有2个凸集,2个批次规范,2个relu的),而不是每个层一个层。如果我打印出self.base_layers[:3],我得到:
[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True)]
它只显示了三层没有重复。为什么要复制我的图层?
我使用的是pytorch 1.4.0版本
发布于 2020-05-08 00:41:07
您的层实际上不会被调用两次。这是一个如何实现summary的工件。
原因很简单,因为summary递归地迭代模块的所有子模块,并为每个子模块注册前向挂钩。因为您有重复子模块(在base_model和layer0中),那么这些重复模块就会注册多个钩子。当摘要调用前向时,这会导致调用每个模块的两个钩子,这将导致报告层的重复。
对于您的玩具示例,一个解决方案就是不将base_model赋值为属性,因为它在转发过程中不会被使用。这避免了将base_model作为子级添加。
class ResNetUNet(nn.Module):
def __init__(self):
super().__init__()
base_model = models.resnet18(pretrained=False)
base_layers = list(base_model.children())
self.layer0 = nn.Sequential(*base_layers[:3])另一个解决方案是创建一个修改过的summary版本,它不会多次为同一个模块注册钩子。下面是一个扩展的summary,在这里我使用一个名为already_registered的集合来跟踪已经注册了钩子的模块,以避免注册多个钩子。
from collections import OrderedDict
import torch
import torch.nn as nn
import numpy as np
def summary(model, input_size, batch_size=-1, device="cuda"):
# keep track of registered modules so that we don't add multiple hooks
already_registered = set()
def register_hook(module):
def hook(module, input, output):
class_name = str(module.__class__).split(".")[-1].split("'")[0]
module_idx = len(summary)
m_key = "%s-%i" % (class_name, module_idx + 1)
summary[m_key] = OrderedDict()
summary[m_key]["input_shape"] = list(input[0].size())
summary[m_key]["input_shape"][0] = batch_size
if isinstance(output, (list, tuple)):
summary[m_key]["output_shape"] = [
[-1] + list(o.size())[1:] for o in output
]
else:
summary[m_key]["output_shape"] = list(output.size())
summary[m_key]["output_shape"][0] = batch_size
params = 0
if hasattr(module, "weight") and hasattr(module.weight, "size"):
params += torch.prod(torch.LongTensor(list(module.weight.size())))
summary[m_key]["trainable"] = module.weight.requires_grad
if hasattr(module, "bias") and hasattr(module.bias, "size"):
params += torch.prod(torch.LongTensor(list(module.bias.size())))
summary[m_key]["nb_params"] = params
if (
not isinstance(module, nn.Sequential)
and not isinstance(module, nn.ModuleList)
and not (module == model)
and module not in already_registered:
):
already_registered.add(module)
hooks.append(module.register_forward_hook(hook))
device = device.lower()
assert device in [
"cuda",
"cpu",
], "Input device is not valid, please specify 'cuda' or 'cpu'"
if device == "cuda" and torch.cuda.is_available():
dtype = torch.cuda.FloatTensor
else:
dtype = torch.FloatTensor
# multiple inputs to the network
if isinstance(input_size, tuple):
input_size = [input_size]
# batch_size of 2 for batchnorm
x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
# print(type(x[0]))
# create properties
summary = OrderedDict()
hooks = []
# register hook
model.apply(register_hook)
# make a forward pass
# print(x.shape)
model(*x)
# remove these hooks
for h in hooks:
h.remove()
print("----------------------------------------------------------------")
line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
print(line_new)
print("================================================================")
total_params = 0
total_output = 0
trainable_params = 0
for layer in summary:
# input_shape, output_shape, trainable, nb_params
line_new = "{:>20} {:>25} {:>15}".format(
layer,
str(summary[layer]["output_shape"]),
"{0:,}".format(summary[layer]["nb_params"]),
)
total_params += summary[layer]["nb_params"]
total_output += np.prod(summary[layer]["output_shape"])
if "trainable" in summary[layer]:
if summary[layer]["trainable"] == True:
trainable_params += summary[layer]["nb_params"]
print(line_new)
# assume 4 bytes/number (float on cuda).
total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients
total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
total_size = total_params_size + total_output_size + total_input_size
print("================================================================")
print("Total params: {0:,}".format(total_params))
print("Trainable params: {0:,}".format(trainable_params))
print("Non-trainable params: {0:,}".format(total_params - trainable_params))
print("----------------------------------------------------------------")
print("Input size (MB): %0.2f" % total_input_size)
print("Forward/backward pass size (MB): %0.2f" % total_output_size)
print("Params size (MB): %0.2f" % total_params_size)
print("Estimated Total Size (MB): %0.2f" % total_size)
print("----------------------------------------------------------------")
# return summaryhttps://stackoverflow.com/questions/61668501
复制相似问题