目前在学习pytorch,自己写了一些例子,在这里记录下来一些报错及总结
1. RuntimeError: Expected object of type torch.FloatTensor but found type torch.cuda.FloatTensor for argument #2 'weight'
详细报错信息
1 Traceback (most recent call last): 2 File "dogvscat-resnet.py", line 105, in3 outputs = net(inputs) 4 File "/home/lzx/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__ 5 result = self.forward(*input, **kwargs) 6 File "/home/lzx/anaconda3/envs/pytorch/lib/python3.6/site-packages/torchvision-0.2.1-py3.6.egg/torchvision/models/resnet.py", li 7 ne 139, in forward 8 File "/home/lzx/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__ 9 result = self.forward(*input, **kwargs)10 File "/home/lzx/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 301, in forward11 self.padding, self.dilation, self.groups)12 RuntimeError: Expected object of type torch.FloatTensor but found type torch.cuda.FloatTensor for argument #2 'weight'
参考:
这个报错其实比较隐蔽,用Google搜索的第一页都没什么参考价值,只有上面的这个链接里提醒了我,
在GPU上进行训练时,需要把模型和数据都加上.cuda(),如
model.cuda()
但是对于数据,这个.cuda()并非是inplace操作,就是说不单单是在变量名后面加上.cuda()就可以了
还必须显示的赋值回去,即:
data.cuda()是不行的,而
data = data.cuda()才是可以的。
这样的显示声明的细节非常重要。
示例代码:用LeNet做猫狗的二分类,自己写的代码
请重点关注以下行的写法:46 47 57 58 96 97
1 import os 2 from PIL import Image 3 import numpy as np 4 import torch 5 from torchvision import transforms as T 6 from torchvision.datasets import ImageFolder 7 from torch.utils.data import DataLoader 8 import torch.nn as nn 9 import torch.nn.functional as F 10 from torch import optim 11 from torch.utils import data 12 import torchvision as tv 13 from torchvision.transforms import ToPILImage 14 show = ToPILImage() # 可以把Tensor转成Image,方便可视化 15 16 17 transform = T.Compose([ 18 T.Resize(32), # 缩放图片(Image),保持长宽比不变,最短边为224像素 19 T.CenterCrop(32), # 从图片中间切出224*224的图片 20 T.ToTensor(), # 将图片(Image)转成Tensor,归一化至[0, 1] 21 T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]) # 标准化至[-1, 1],规定均值和标准差 22 ]) 23 24 25 class Net(nn.Module): 26 def __init__(self): 27 super(Net, self).__init__() 28 self.conv1 = nn.Conv2d(3, 6, 5) 29 self.conv2 = nn.Conv2d(6, 16, 5) 30 self.fc1 = nn.Linear(16*5*5, 120) 31 self.fc2 = nn.Linear(120, 84) 32 self.fc3 = nn.Linear(84, 2) 33 34 def forward(self, x): 35 x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) 36 x = F.max_pool2d(F.relu(self.conv2(x)), 2) 37 x = x.view(x.size()[0], -1) 38 x = F.relu(self.fc1(x)) 39 x = F.relu(self.fc2(x)) 40 x = self.fc3(x) 41 return x 42 43 net = Net() 44 if torch.cuda.is_available(): 45 print("Using GPU") 46 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 47 net.to(device) 48 49 50 def test(): 51 correct = 0 # 预测正确的图片数 52 total = 0 # 总共的图片数 53 # 由于测试的时候不需要求导,可以暂时关闭autograd,提高速度,节约内存 54 with torch.no_grad(): 55 for data in testloader: 56 images, labels = data 57 images = images.to(device) 58 labels = labels.to(device) 59 outputs = net(images) 60 _, predicted = torch.max(outputs, 1) 61 total += labels.size(0) 62 correct += (predicted == labels).sum() 63 64 print('Accuracy in the test dataset: %.1f %%' % (100 * correct / total)) 65 66 train_dataset = ImageFolder('/home/lzx/datasets/dogcat/sub-train/', transform=transform) 67 test_dataset = ImageFolder('/home/lzx/datasets/dogcat/sub-test/', transform=transform) 68 # dataset = DogCat('/home/lzx/datasets/dogcat/sub-train/', transforms=transform) 69 # train_dataset = ImageFolder('/Users/lizhixuan/PycharmProjects/pytorch_learning/Chapter5/sub-train/', transform=transform) 70 # test_dataset = ImageFolder('/Users/lizhixuan/PycharmProjects/pytorch_learning/Chapter5/sub-test/', transform=transform) 71 72 trainloader = torch.utils.data.DataLoader( 73 train_dataset, 74 batch_size=512, 75 shuffle=True, 76 num_workers=4) 77 testloader = torch.utils.data.DataLoader( 78 test_dataset, 79 batch_size=512, 80 shuffle=False, 81 num_workers=4) 82 classes = ('cat', 'dog') 83 84 criterion = nn.CrossEntropyLoss() # 交叉熵损失函数 85 optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) 86 87 print("Starting to train") 88 torch.set_num_threads(8) 89 for epoch in range(1000): 90 91 running_loss = 0.0 92 for i, data in enumerate(trainloader, 0): 93 94 # 输入数据 95 inputs, labels = data 96 inputs = inputs.to(device) 97 labels = labels.to(device) 98 99 # 梯度清零100 optimizer.zero_grad()101 102 # forward + backward103 outputs = net(inputs)104 loss = criterion(outputs, labels)105 # print("outputs %s labels %s" % (outputs, labels))106 loss.backward()107 108 # 更新参数109 optimizer.step()110 111 # 打印log信息112 # loss 是一个scalar,需要使用loss.item()来获取数值,不能使用loss[0]113 running_loss += loss.item()114 print_gap = 10115 if i % print_gap == (print_gap-1): # 每1000个batch打印一下训练状态116 print('[%d, %5d] loss: %.3f' \117 % (epoch+1, i+1, running_loss / print_gap))118 running_loss = 0.0119 test()120 print('Finished Training')
这样一来,就完全明白了如何把代码放在GPU上运行了,哈哈