这是错误堆栈。
Traceback (most recent call last):
File "my_train.py", line 55, in <module>
test()
File "my_train.py", line 51, in test
train.train()
File "my_train.py", line 37, in train
outputs = self.model(inputs)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/rs/andy/andy/MyFCN/model/FCN/FCN32.py", line 12, in forward
y = self.up_sampling(feature, None, 512)
File "/home/rs/andy/andy/MyFCN/model/FCN/FCN.py", line 47, in up_sampling
y = self.bn(batch_norm)(y)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/batchnorm.py", line 168, in forward
return F.batch_norm(
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/functional.py", line 2421, in batch_norm
return torch.batch_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument weight in method wrapper__cudnn_batch_norm)这是我的全部代码:当我在本节中将数据放入GPU时,我认为这个文件中会出现错误行。火车:
import torch.cuda
from torch import nn, optim
from torch.utils.data import DataLoader
import dataset
from model.FCN import FCN32
class Train:
def __init__(self, dataset_path, model, batch_size, shuffle):
self.dataset = dataset.ObtTrainDataset(dataset_path)
self.batch_size = batch_size
self.shuffle = shuffle
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"using {self.device}")
self.model = model
def train(self):
self.model = self.model.to(self.device)
epoch = 10
criterion = nn.CrossEntropyLoss().to(self.device)
optimizer = optim.Adam(self.model.parameters(), lr=0.0001)
dl = DataLoader(dataset=self.dataset, batch_size=self.batch_size, shuffle=False)
for i in range(epoch):
print("------------{} begin--------------".format(i))
self.model.train()
running_loss = 0.0
j = 0
for data in dl:
j += 1
inputs, target = data
inputs = inputs.to(self.device)
target = target.to(self.device)
target = torch.squeeze(target, 1).long().to(self.device)
optimizer.zero_grad()
outputs = self.model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.cpu().item()
print(running_loss)
torch.save(self.model.state_dict(), f"models/obt_10_{i}.pth")
def test():
dataset = "data/obt/image"
model = FCN32(256, 5)
train = Train(dataset, model, 8, True)
train.train()
if __name__ == '__main__':
test()模型代码:这是FCN32s的实现,我认为没有什么可以运行的。但是错误堆栈表明FCN.py文件行47错误。FCN:
from torch import nn
from model.FCN.vgg import VGG16
class FCN(nn.Module):
def __init__(self, input_size, num_classes, backbone="vgg16"):
super().__init__()
all_backones = ["vgg16"]
if backbone not in all_backones:
raise ValueError(f"backbone must be ont of the item in {all_backones}")
if backbone == "vgg16":
self.features = VGG16(input_size)
self.num_classes = num_classes
self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, padding=1, output_padding=1)
self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, padding=1, output_padding=1)
self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, padding=1, output_padding=1)
self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, padding=1, output_padding=1)
self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, padding=1, output_padding=1)
self.classifier = nn.Conv2d(32, num_classes, kernel_size=1, padding="same")
self.bn = nn.BatchNorm2d
self.relu = nn.ReLU()
def forward(self, x):
raise NotImplementedError("please implement it")
def up_sampling(self, x1, x2=None, batch_norm=None):
deconv = None
assert batch_norm is not None
if batch_norm == 512:
deconv = self.deconv1
elif batch_norm == 256:
deconv = self.deconv2
elif batch_norm == 128:
deconv = self.deconv3
elif batch_norm == 64:
deconv = self.deconv4
elif batch_norm == 32:
deconv = self.deconv5
y = deconv(x1)
y = self.relu(y)
if x2 is None:
y = self.bn(batch_norm)(y)
else:
y = self.bn(batch_norm)(y + x2)
return y
if __name__ == '__main__':
passFCN32s
import torch
from torch import nn
from model.FCN import FCN
class FCN32(FCN):
def forward(self, x):
feature = self.features(x)["pool32"]
y = self.up_sampling(feature, None, 512)
y = self.up_sampling(y, None, 256)
y = self.up_sampling(y, None, 128)
y = self.up_sampling(y, None, 64)
y = self.up_sampling(y, None, 32)
y = self.classifier(y)
return y数据集:
import os
from glob import glob
import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
class ObtTrainDataset(Dataset):
def __init__(self, image_path=r"data/obt/image", mode="train"):
assert mode in ("train", "val", "test")
self.image_path = image_path
self.image_list = glob(os.path.join(self.image_path, "*.npy"))
self.mode = mode
if mode in ("train", "val"):
self.mask_path = self.image_path
self.transform_x = T.Compose(
[T.ToTensor(), T.Resize((256, 256))])
self.transform_mask = T.Compose([T.ToTensor(), T.Resize((256, 256))])
def __getitem__(self, index):
if self.mode in ("train", "val"):
image_name = os.path.basename(self.image_list[index])
X = np.load(os.path.join(self.image_path, image_name))
masks = np.load(os.path.join(self.image_path + "Masks", image_name))
X = X / 1.0
masks = masks / 1.0
X = self.transform_x(X)
masks = self.transform_mask(masks)
X = X.type(torch.FloatTensor)
masks = masks.type(torch.FloatTensor)
return X, masks
else:
X = Image.open(self.image_list[index])
X = self.transform_x(X)
path = self.image_list[index]
return X, path
def __len__(self):
return len(self.image_list)我已经做了3个小时的帮助!
发布于 2022-05-08 14:16:51
您对BatchNorm2d的使用非常奇怪。在基类FCN中,您没有定义批处理规范层,而是定义了“位置保持器”:给定参数num_features,self.bn不是层,而是返回层的函数。然后,在每次向前传递时,从零开始创建一个新的BatchNorm2d层,并通过该层转发您的特性。
不用说,在前向传递期间创建的新层不会移动到GPU:.to(self.device)是在前向传递之前和批处理规范层创建之前调用的。
但这并不是最糟糕的事情:您为每一次前向传递创建了一个新的批处理规范层,这意味着该层无法学习数据的统计信息,因此无法正常工作。
https://stackoverflow.com/questions/72160962
复制相似问题