import matplotlib.pyplot as plt
import numpy as np
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
这里必须做一个扩展。
在2.2中我们可以看到神经网络中,每个层的输出都经过了激活函数的激活作用。但是在输出层后却缺少了激活函数而貌似“直接作用了损失函数”。
简单地说,原因就在于torch.nn.CrossEntropyLoss()将nn.LogSoftmax()激活函数和nn.NLLLoss()负对数似然损失函数集成在一起。 logsoftmax是argmax => softargmax => softmax => logsoftmax逐步优化的求极大值的index的期望的方法。 负对数似然损失函数(Negtive Log Likehood)就是计算最小化真实分布\(P(y|x)\)与模型输出分布\(P(\hat{y}|x)\)的距离,等价于最小化两者的交叉熵。实际使用函数时,是one-hot编码后的标签与logsoftmax结果相乘再求均值再取反,这个过程博主“不愿透漏姓名的王建森”在他的博客中做过实验[7]讲解。
上述结论的详尽说明请参考知乎上Cassie的创作《吃透torch.nn.CrossEntropyLoss()》[8]、知乎上Gordon Lee的创作《交叉熵和极大似然估计的再理解》 [9]。
P.S. 对于torch.nn.CrossEntropyLoss()的官网Doc中提到的"This is particularly useful when you have an unbalanced training set."关于如何处理不均衡样品的几个解决办法,可以参考Quora上的问答《In classification, how do you handle an unbalanced training set?》[10]以及热心网友对此问答的翻译[11]
2.4 训练神经网络
事情变得有趣起来了!我们只需要遍历我们的迭代器,将其输入进神经网络和优化器即可。
如果想在GPU上训练请参考文章开头给出的链接中的末尾部分有教授如何修改代码的部分。
--snip--
# 在③后插入代码
for epoch in range(5): # 数据被遍历的次数
running_loss = 0.0 # 每次遍历前重新初始化loss值
for i, data in enumerate(trainloader, 0):
inputs, labels = data # 切分数据集
optimizer.zero_grad() # 梯度清零,避免上一个batch迭代的影响
# 前向传递 + 反向传递 + 权重优化
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 输出日志
running_loss += loss.item() # Tensor.item()方法是将tensor的值转化成python number
if i % 2000 == 1999: # 每2000个mini batches输出一次
# print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) 如果python3.6之前版本可以使用这个代码
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
Out:
model will be trained on device: 'cuda:0'
某一次输出结果日志整理一下如下表:
--snip--
_, predicted = torch.max(outputs, 1) # torch.max(input, dim)返回按照dim方向的最大值和其索引
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(batch_size)))
Out:
Predicted: cat ship ship ship
看起来不错。下面就试一试在全部测试集上的表现:
correct = 0
total = 0
# 由于这不是在训练模型,因此对输出不需要计算梯度等反向传播过程
with torch.no_grad():
for data in testloader:
images_pre, labels_pre = data
outputs = net(images_pre) # 数据传入神经网络,计算输出
_, predicted = torch.max(outputs.data, 1) # 获取最大能量的索引
total += labels_pre.size(0) # 计算预测次数
correct += (predicted == labels_pre).sum().item() # 计算正确预测次数
print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
Out:
Accuracy of the network on the 10000 test images: 61 %
--snip--
# 生成两个dict,分别用来存放预测正确数量和总数量的个数
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}
# 启动预测过程,无需计算梯度等
with torch.no_grad():
for data in testloader:
images_cl, labels_cl = data
outputs = net(images_cl)
_, predictions = torch.max(outputs, 1)
# 开始计数
for label, prediction in zip(labels_cl, predictions):
if label == prediction:
correct_pred[classes[label]] += 1
total_pred[classes[label]] += 1
# 分类别打印预测准确率
for classname, correct_count in correct_pred.items():
accuracy = 100 * float(correct_count) / total_pred[classname]
print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
Out:
Accuracy for class: plane is 66.2 %
Accuracy for class: car is 80.7 %
Accuracy for class: bird is 39.1 %
Accuracy for class: cat is 53.4 %
Accuracy for class: deer is 64.6 %
Accuracy for class: dog is 35.8 %
Accuracy for class: frog is 67.9 %
Accuracy for class: horse is 69.5 %
Accuracy for class: ship is 75.0 %
Accuracy for class: truck is 65.5 %
import matplotlib.pyplot as plt
import numpy as np
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
这里必须做一个扩展。
在2.2中我们可以看到神经网络中,每个层的输出都经过了激活函数的激活作用。但是在输出层后却缺少了激活函数而貌似“直接作用了损失函数”。
简单地说,原因就在于torch.nn.CrossEntropyLoss()将nn.LogSoftmax()激活函数和nn.NLLLoss()负对数似然损失函数集成在一起。 logsoftmax是argmax => softargmax => softmax => logsoftmax逐步优化的求极大值的index的期望的方法。 负对数似然损失函数(Negtive Log Likehood)就是计算最小化真实分布\(P(y|x)\)与模型输出分布\(P(\hat{y}|x)\)的距离,等价于最小化两者的交叉熵。实际使用函数时,是one-hot编码后的标签与logsoftmax结果相乘再求均值再取反,这个过程博主“不愿透漏姓名的王建森”在他的博客中做过实验[7]讲解。
上述结论的详尽说明请参考知乎上Cassie的创作《吃透torch.nn.CrossEntropyLoss()》[8]、知乎上Gordon Lee的创作《交叉熵和极大似然估计的再理解》 [9]。
P.S. 对于torch.nn.CrossEntropyLoss()的官网Doc中提到的"This is particularly useful when you have an unbalanced training set."关于如何处理不均衡样品的几个解决办法,可以参考Quora上的问答《In classification, how do you handle an unbalanced training set?》[10]以及热心网友对此问答的翻译[11]
2.4 训练神经网络
事情变得有趣起来了!我们只需要遍历我们的迭代器,将其输入进神经网络和优化器即可。
如果想在GPU上训练请参考文章开头给出的链接中的末尾部分有教授如何修改代码的部分。
--snip--
# 在③后插入代码
for epoch in range(5): # 数据被遍历的次数
running_loss = 0.0 # 每次遍历前重新初始化loss值
for i, data in enumerate(trainloader, 0):
inputs, labels = data # 切分数据集
optimizer.zero_grad() # 梯度清零,避免上一个batch迭代的影响
# 前向传递 + 反向传递 + 权重优化
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 输出日志
running_loss += loss.item() # Tensor.item()方法是将tensor的值转化成python number
if i % 2000 == 1999: # 每2000个mini batches输出一次
# print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) 如果python3.6之前版本可以使用这个代码
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
Out:
model will be trained on device: 'cuda:0'
某一次输出结果日志整理一下如下表:
--snip--
_, predicted = torch.max(outputs, 1) # torch.max(input, dim)返回按照dim方向的最大值和其索引
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(batch_size)))
Out:
Predicted: cat ship ship ship
看起来不错。下面就试一试在全部测试集上的表现:
correct = 0
total = 0
# 由于这不是在训练模型,因此对输出不需要计算梯度等反向传播过程
with torch.no_grad():
for data in testloader:
images_pre, labels_pre = data
outputs = net(images_pre) # 数据传入神经网络,计算输出
_, predicted = torch.max(outputs.data, 1) # 获取最大能量的索引
total += labels_pre.size(0) # 计算预测次数
correct += (predicted == labels_pre).sum().item() # 计算正确预测次数
print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
Out:
Accuracy of the network on the 10000 test images: 61 %
--snip--
# 生成两个dict,分别用来存放预测正确数量和总数量的个数
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}
# 启动预测过程,无需计算梯度等
with torch.no_grad():
for data in testloader:
images_cl, labels_cl = data
outputs = net(images_cl)
_, predictions = torch.max(outputs, 1)
# 开始计数
for label, prediction in zip(labels_cl, predictions):
if label == prediction:
correct_pred[classes[label]] += 1
total_pred[classes[label]] += 1
# 分类别打印预测准确率
for classname, correct_count in correct_pred.items():
accuracy = 100 * float(correct_count) / total_pred[classname]
print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
Out:
Accuracy for class: plane is 66.2 %
Accuracy for class: car is 80.7 %
Accuracy for class: bird is 39.1 %
Accuracy for class: cat is 53.4 %
Accuracy for class: deer is 64.6 %
Accuracy for class: dog is 35.8 %
Accuracy for class: frog is 67.9 %
Accuracy for class: horse is 69.5 %
Accuracy for class: ship is 75.0 %
Accuracy for class: truck is 65.5 %