首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >我很确定我已经把所有的数据和模型都放好了,但是我仍然得到了这个错误。

我很确定我已经把所有的数据和模型都放好了,但是我仍然得到了这个错误。
EN

Stack Overflow用户
提问于 2022-05-08 12:25:35
回答 1查看 130关注 0票数 0

这是错误堆栈。

代码语言:javascript
复制
Traceback (most recent call last):
  File "my_train.py", line 55, in <module>
    test()
  File "my_train.py", line 51, in test
    train.train()
  File "my_train.py", line 37, in train
    outputs = self.model(inputs)
  File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/rs/andy/andy/MyFCN/model/FCN/FCN32.py", line 12, in forward
    y = self.up_sampling(feature, None, 512)
  File "/home/rs/andy/andy/MyFCN/model/FCN/FCN.py", line 47, in up_sampling
    y = self.bn(batch_norm)(y)
  File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/batchnorm.py", line 168, in forward
    return F.batch_norm(
  File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/functional.py", line 2421, in batch_norm
    return torch.batch_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument weight in method wrapper__cudnn_batch_norm)

这是我的全部代码:当我在本节中将数据放入GPU时,我认为这个文件中会出现错误行。火车:

代码语言:javascript
复制
import torch.cuda
from torch import nn, optim
from torch.utils.data import DataLoader

import dataset
from model.FCN import FCN32


class Train:
    def __init__(self, dataset_path, model, batch_size, shuffle):
        self.dataset = dataset.ObtTrainDataset(dataset_path)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"using {self.device}")
        self.model = model

    def train(self):
        self.model = self.model.to(self.device)
        epoch = 10
        criterion = nn.CrossEntropyLoss().to(self.device)
        optimizer = optim.Adam(self.model.parameters(), lr=0.0001)
        dl = DataLoader(dataset=self.dataset, batch_size=self.batch_size, shuffle=False)
        for i in range(epoch):
            print("------------{} begin--------------".format(i))
            self.model.train()
            running_loss = 0.0
            j = 0
            for data in dl:
                j += 1
                inputs, target = data
                inputs = inputs.to(self.device)
                target = target.to(self.device)
                target = torch.squeeze(target, 1).long().to(self.device)

                optimizer.zero_grad()

                outputs = self.model(inputs)
                loss = criterion(outputs, target)
                loss.backward()
                optimizer.step()
                running_loss += loss.cpu().item()
            print(running_loss)
            torch.save(self.model.state_dict(), f"models/obt_10_{i}.pth")



def test():
    dataset = "data/obt/image"
    model = FCN32(256, 5)
    train = Train(dataset, model, 8, True)
    train.train()


if __name__ == '__main__':
    test()

模型代码:这是FCN32s的实现,我认为没有什么可以运行的。但是错误堆栈表明FCN.py文件行47错误。FCN:

代码语言:javascript
复制
from torch import nn

from model.FCN.vgg import VGG16


class FCN(nn.Module):
    def __init__(self, input_size, num_classes, backbone="vgg16"):
        super().__init__()
        all_backones = ["vgg16"]
        if backbone not in all_backones:
            raise ValueError(f"backbone must be ont of the item in {all_backones}")

        if backbone == "vgg16":
            self.features = VGG16(input_size)
            self.num_classes = num_classes

            self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, padding=1, output_padding=1)
            self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, padding=1, output_padding=1)
            self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, padding=1, output_padding=1)
            self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, padding=1, output_padding=1)
            self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, padding=1, output_padding=1)
            self.classifier = nn.Conv2d(32, num_classes, kernel_size=1, padding="same")
            self.bn = nn.BatchNorm2d
            self.relu = nn.ReLU()

    def forward(self, x):
        raise NotImplementedError("please implement it")

    def up_sampling(self, x1, x2=None, batch_norm=None):
        deconv = None
        assert batch_norm is not None
        if batch_norm == 512:
            deconv = self.deconv1
        elif batch_norm == 256:
            deconv = self.deconv2
        elif batch_norm == 128:
            deconv = self.deconv3
        elif batch_norm == 64:
            deconv = self.deconv4
        elif batch_norm == 32:
            deconv = self.deconv5
        y = deconv(x1)
        y = self.relu(y)
        if x2 is None:
            y = self.bn(batch_norm)(y)
        else:
            y = self.bn(batch_norm)(y + x2)
        return y


if __name__ == '__main__':
    pass

FCN32s

代码语言:javascript
复制
import torch
from torch import nn

from model.FCN import FCN


class FCN32(FCN):
    def forward(self, x):
        feature = self.features(x)["pool32"]
        y = self.up_sampling(feature, None, 512)
        y = self.up_sampling(y, None, 256)
        y = self.up_sampling(y, None, 128)
        y = self.up_sampling(y, None, 64)
        y = self.up_sampling(y, None, 32)
        y = self.classifier(y)
        return y

数据集:

代码语言:javascript
复制
import os
from glob import glob

import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T


class ObtTrainDataset(Dataset):
    def __init__(self, image_path=r"data/obt/image", mode="train"):
        assert mode in ("train", "val", "test")
        self.image_path = image_path
        self.image_list = glob(os.path.join(self.image_path, "*.npy"))
        self.mode = mode

        if mode in ("train", "val"):
            self.mask_path = self.image_path

        self.transform_x = T.Compose(
            [T.ToTensor(), T.Resize((256, 256))])
        self.transform_mask = T.Compose([T.ToTensor(), T.Resize((256, 256))])

    def __getitem__(self, index):
        if self.mode in ("train", "val"):
            image_name = os.path.basename(self.image_list[index])

            X = np.load(os.path.join(self.image_path, image_name))

            masks = np.load(os.path.join(self.image_path + "Masks", image_name))

            X = X / 1.0
            masks = masks / 1.0
            X = self.transform_x(X)
            masks = self.transform_mask(masks)
            X = X.type(torch.FloatTensor)
            masks = masks.type(torch.FloatTensor)

            return X, masks

        else:
            X = Image.open(self.image_list[index])
            X = self.transform_x(X)
            path = self.image_list[index]
            return X, path

    def __len__(self):
        return len(self.image_list)

我已经做了3个小时的帮助!

EN

回答 1

Stack Overflow用户

发布于 2022-05-08 14:16:51

您对BatchNorm2d的使用非常奇怪。在基类FCN中,您没有定义批处理规范层,而是定义了“位置保持器”:给定参数num_featuresself.bn不是层,而是返回层的函数。然后,在每次向前传递时,从零开始创建一个新的BatchNorm2d层,并通过该层转发您的特性。

不用说,在前向传递期间创建的新层不会移动到GPU:.to(self.device)是在前向传递之前和批处理规范层创建之前调用的。

但这并不是最糟糕的事情:您为每一次前向传递创建了一个新的批处理规范层,这意味着该层无法学习数据的统计信息,因此无法正常工作。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/72160962

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档