本文代码都只针对默认参数情形。
nn.MSELoss()\qquad\text{nn.MSELoss()}nn.MSELoss() 计算输入 xxx 和输出 yyy 之间的最小均方误差 (Mean Squared Error)\text{(Mean Squared Error)}(Mean Squared Error)。
\qquadtorch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')
(1) reduction\qquad\text{(1) reduction}(1) reduction 为 None\text{None}None
ℓ(x,y)=L={l1,⋯,lN}T\qquad\qquad\qquad\ell(x,y)=L=\{l_1,\cdots,l_N\}^Tℓ(x,y)=L={l1,⋯,lN}T,NNN 为 batch\text{batch}batch 大小
ln=(xn−yn)2\qquad\qquad\qquad l_n=(x_n-y_n)^2ln=(xn−yn)2
\qquad这里的输入 xxx 和输出 yyy 是 tensors of arbitrary shapes\text{tensors of arbitrary shapes}tensors of arbitrary shapes,各自总共包含了 nnn 个元素。
(2) reduction\qquad\text{(2) reduction}(2) reduction 不为 None\text{None}None
ℓ(x,y)={mean(L),reduction=meansum(L),reduction=sum\qquad\qquad\qquad\ell(x,y)=\begin{cases}\text{mean}(L)&,\ \text{reduction=mean} \\ \text{sum}(L)&,\ \text{reduction=sum}\end{cases}ℓ(x,y)={mean(L)sum(L), reduction=mean, reduction=sum
\qquad默认参数reduction = 'mean'
,也就是:
(a)\qquad(a)(a) 先计算 ln=(xn−yn)2l_n=(x_n-y_n)^2ln=(xn−yn)2
(b)\qquad(b)(b) 再对 L={l1,⋯,lN}TL=\{l_1,\cdots,l_N\}^TL={l1,⋯,lN}T 求平均
\qquad即:ℓ(x,y)=mean(L)=1N∑n=1N(xn−yn)2\ell(x,y)=\text{mean}(L)=\dfrac{1}{N}\displaystyle\sum_{n=1}^N(x_n-y_n)^2ℓ(x,y)=mean(L)=N1n=1∑N(xn−yn)2
import torch
import torch.nn as nn
import numpy as np
def MSE_func(outputs, targets): # outputs和targets的shape必须一致outputs = outputs.numpy()targets = targets.numpy()loss = np.sum((outputs-targets)**2)/outputs.sizereturn loss
batch = 3
criterion = nn.MSELoss()
outputs = torch.rand(batch, 5)
targets = torch.rand(batch, 5)
loss = criterion(outputs, targets)
print("网络输出:\n\t",outputs)
print("目标输出:\n\t",targets)
print("nn.MSELoss()值:\n\t",loss)
print("MSE_func()值:\n\t",MSE_func(outputs, targets))
运行结果:
网络输出:tensor([[0.4415, 0.5011, 0.5475, 0.4359, 0.4999],[0.2731, 0.8872, 0.8158, 0.1117, 0.0668],[0.9196, 0.1141, 0.6827, 0.5296, 0.2487]])
目标输出:tensor([[0.2042, 0.0540, 0.4651, 0.1866, 0.3254],[0.7488, 0.2806, 0.3361, 0.3648, 0.5692],[0.8242, 0.7141, 0.5642, 0.5478, 0.1543]])
nn.MSELoss()值:tensor(0.1259)
MSE_func()值:0.1259116570154826
\quad
nn.BCELoss()\qquad\text{nn.BCELoss()}nn.BCELoss() 计算目标概率和输入概率之间的二元交叉熵 (Binary Cross Entropy)\text{(Binary Cross Entropy)}(Binary Cross Entropy)。
\qquadtorch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')
(1) reduction\qquad\text{(1) reduction}(1) reduction 为 None\text{None}None
ℓ(x,y)=L={l1,⋯,lN}T\qquad\qquad\qquad\ell(x,y)=L=\{l_1,\cdots,l_N\}^Tℓ(x,y)=L={l1,⋯,lN}T,NNN 为 batch\text{batch}batch 大小
ln=−wn[yn⋅logxn+(1−yn)⋅log(1−xn)],yn∈[0,1]\qquad\qquad\qquad l_n=-w_n\left[y_n\cdot\log x_n+(1-y_n)\cdot\log(1-x_n) \right],\quad y_n\in[0,1]ln=−wn[yn⋅logxn+(1−yn)⋅log(1−xn)],yn∈[0,1]
(2) reduction\qquad\text{(2) reduction}(2) reduction 不为 None\text{None}None
ℓ(x,y)={mean(L), reduction=meansum(L), reduction=sum \qquad\qquad\qquad\ell(x,y)=\begin{cases}\text{mean}(L)&\text{, reduction=mean} \\ \text{sum}(L)&\text{, reduction=sum }\end{cases}ℓ(x,y)={mean(L)sum(L), reduction=mean, reduction=sum
\qquad由于默认参数weight = None, reduction = 'mean'
,也就是:
(a)\qquad(a)(a) 先计算 ln=−[yn⋅logxn+(1−yn)⋅log(1−xn)]l_n=-\left[y_n\cdot\log x_n+(1-y_n)\cdot\log(1-x_n) \right]ln=−[yn⋅logxn+(1−yn)⋅log(1−xn)]
(b)\qquad(b)(b) 再对 L={l1,⋯,lN}TL=\{l_1,\cdots,l_N\}^TL={l1,⋯,lN}T 求平均,即:ℓ(x,y)=mean(L)\ell(x,y)=\text{mean}(L)ℓ(x,y)=mean(L)
\qquad即:ℓ(x,y)=mean(L)=−1N∑n=1N[yn⋅logxn+(1−yn)⋅log(1−xn)]\ell(x,y)=\text{mean}(L)=-\dfrac{1}{N}\displaystyle\sum_{n=1}^N\left[y_n\cdot\log x_n+(1-y_n)\cdot\log(1-x_n) \right]ℓ(x,y)=mean(L)=−N1n=1∑N[yn⋅logxn+(1−yn)⋅log(1−xn)]
import torch
import torch.nn as nn
import numpy as np
def sig_fun(inputs):return 1/(1+np.exp(-inputs))
def BCE_func(outputs, targets): # outputs和targets的shape必须一致x = outputs.detach().numpy()y = targets.detach().numpy()loss = -np.mean(y*np.log(x)+(1-y)*np.log(1-x))return loss
batch = 3
criterion = nn.BCELoss()
m = nn.Sigmoid() # Can't call numpy() on Tensor that requires
inputs = torch.randn(batch,10, requires_grad=True) # 转换numpy必须tensor.detach().numpy()
outputs = m(inputs)
targets = torch.empty(batch, dtype=int).random_(10)
targets = torch.tensor(np.eye(10)[targets],dtype=torch.float32).reshape(batch,10)
loss = criterion(outputs, targets)
print("输入:\n\t",inputs)
print("sig_fun()输出:\n\t", sig_fun(inputs.detach().numpy())) # 测试nn.Sigmoid()
print("nn.Sigmoid()输出:\n\t", outputs)
print("目标输出:\n",targets)
print("nn.BCELoss()值:\n\t",loss)
print("BCE_func()值:\n\t",BCE_func(outputs, targets))
运行结果:
输入:tensor([[ 0.0021, 0.6016, -0.1959, 0.4794, 0.5754, 0.5058, -0.0619, 1.8369,-0.7354, -0.3700],[ 0.2142, -0.6579, 1.1027, -1.0919, -0.2439, 1.5869, 0.7264, 0.1992,-0.5063, -0.1620],[-0.1333, 0.0282, -0.2834, -1.1821, -2.0245, -1.2802, 1.7498, 0.3211,-0.1836, -1.0151]], requires_grad=True)
sig_fun()输出:[[0.50052077 0.6460125 0.45117807 0.61759907 0.64000136 0.62381030.48454005 0.8625772 0.32400492 0.40854385][0.55335456 0.34121954 0.75076365 0.25125644 0.43933427 0.83018070.67400527 0.549633 0.37605163 0.45957667][0.46673262 0.5070478 0.42961934 0.23467043 0.11665367 0.217511240.8519326 0.57959706 0.45423016 0.26597494]]
nn.Sigmoid()输出:tensor([[0.5005, 0.6460, 0.4512, 0.6176, 0.6400, 0.6238, 0.4845, 0.8626, 0.3240,0.4085],[0.5534, 0.3412, 0.7508, 0.2513, 0.4393, 0.8302, 0.6740, 0.5496, 0.3761,0.4596],[0.4667, 0.5070, 0.4296, 0.2347, 0.1167, 0.2175, 0.8519, 0.5796, 0.4542,0.2660]], grad_fn=)
目标输出:tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
nn.BCELoss()值:tensor(0.8323, grad_fn=)
BCE_func()值:0.83233833
\quad
nn.CrossEntropyLoss()\qquad\text{nn.CrossEntropyLoss()}nn.CrossEntropyLoss() 计算输入 logits\text{logits}logits 和目标值之间的交叉熵 (Cross Entropy)\text{(Cross Entropy)}(Cross Entropy)。
\qquadtorch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=- 100, reduce=None, reduction='mean', label_smoothing=0.0)
\qquad默认参数reduction = 'mean'
(1) target with class indices\qquad\text{(1) target with class indices}(1) target with class indices
\qquad\quad假设, xxx 为网络输出值,yyy 为期望输出(目标值),CCC 为类别数,NNN 为 batch\text{batch}batch 大小
ℓ(x,y)=L={l1,⋯,lN}T\qquad\qquad\qquad\ell(x,y)=L=\{l_1,\cdots,l_N\}^Tℓ(x,y)=L={l1,⋯,lN}T
ln=−logexp(xn,yn)∑c=1Cexp(xn,c)\qquad\qquad\qquad l_n=-\log\dfrac{\exp(x_{n,y_n})}{\sum_{c=1}^C\exp(x_{n,c})}ln=−log∑c=1Cexp(xn,c)exp(xn,yn)
ℓ(x,y)={mean(L),reduction=meansum(L),reduction=sum\qquad\qquad\qquad\ell(x,y)=\begin{cases}\text{mean}(L)&,\ \text{reduction=mean} \\ \text{sum}(L)&,\ \text{reduction=sum}\end{cases}ℓ(x,y)={mean(L)sum(L), reduction=mean, reduction=sum
(2) target with class probabilities\qquad\text{(2) target with class probabilities}(2) target with class probabilities
ℓ(x,y)=L={l1,⋯,lN}T\qquad\qquad\qquad\ell(x,y)=L=\{l_1,\cdots,l_N\}^Tℓ(x,y)=L={l1,⋯,lN}T
ln=−∑c=1Clogexp(xn,c)∑i=1Cexp(xn,i)yn,c\qquad\qquad\qquad l_n=-\displaystyle\sum_{c=1}^C\log\dfrac{\exp(x_{n,c})}{\sum_{i=1}^C\exp(x_{n,i})}y_{n,c}ln=−c=1∑Clog∑i=1Cexp(xn,i)exp(xn,c)yn,c
ℓ(x,y)={mean(L),reduction=meansum(L),reduction=sum\qquad\qquad\qquad\ell(x,y)=\begin{cases}\text{mean}(L)&,\ \text{reduction=mean} \\ \text{sum}(L)&,\ \text{reduction=sum}\end{cases}ℓ(x,y)={mean(L)sum(L), reduction=mean, reduction=sum
假设batch大小为3,分类数为5,并假设网络输出矩阵 x=[x11x12x13x14x15x21x22x23x24x25x31x32x33x34x35]x=\begin{bmatrix}x_{11}&x_{12}&x_{13}&x_{14}&x_{15}\\x_{21}&x_{22}&x_{23}&x_{24}&x_{25}\\x_{31}&x_{32}&x_{33}&x_{34}&x_{35}\end{bmatrix}x=x11x21x31x12x22x32x13x23x33x14x24x34x15x25x35
(1)输入是类别索引值(target with class indices)
例如tensor([1, 0, 4])
,与之相对应的softmax型的类别概率,就是采用one-hot写法,也就是:
tensor([[0., 1., 0., 0., 0.],
y1=2y_1=2y1=2(为了方便,从1开始计数)
[1., 0., 0., 0., 0.],
y2=1y_2=1y2=1
[0., 0., 0., 0., 1.]])
y3=5y_3=5y3=5
l1=−logexp(x1,y1)∑c=15exp(x1,c)=−logexp(x1,2)∑c=15exp(x1,c)\qquad l_1=-\log\dfrac{\exp(x_{1,y_1})}{\sum_{c=1}^5\exp(x_{1,c})}=-\log\dfrac{\exp(x_{1,2})}{\sum_{c=1}^5\exp(x_{1,c})}l1=−log∑c=15exp(x1,c)exp(x1,y1)=−log∑c=15exp(x1,c)exp(x1,2)
l2=−logexp(x2,y2)∑c=15exp(x2,c)=−logexp(x2,1)∑c=15exp(x2,c)\qquad l_2=-\log\dfrac{\exp(x_{2,y_2})}{\sum_{c=1}^5\exp(x_{2,c})}=-\log\dfrac{\exp(x_{2,1})}{\sum_{c=1}^5\exp(x_{2,c})}l2=−log∑c=15exp(x2,c)exp(x2,y2)=−log∑c=15exp(x2,c)exp(x2,1)
l3=−logexp(x3,y3)∑c=15exp(x3,c)=−logexp(x3,5)∑c=15exp(x3,c)\qquad l_3=-\log\dfrac{\exp(x_{3,y_3})}{\sum_{c=1}^5\exp(x_{3,c})}=-\log\dfrac{\exp(x_{3,5})}{\sum_{c=1}^5\exp(x_{3,c})}l3=−log∑c=15exp(x3,c)exp(x3,y3)=−log∑c=15exp(x3,c)exp(x3,5)
ℓ(x,y)=l1+l2+l33\qquad\ell(x,y)=\dfrac{l_1+l_2+l_3}{3}ℓ(x,y)=3l1+l2+l3
(2)输入是softmax型的类别概率(target with class probabilities),矩阵元素为 yn,iy_{n,i}yn,i
例如tensor([[0.6114, 0.0574, 0.0403, 0.2432, 0.0478],
矩阵元素为 y1,iy_{1,i}y1,i
[0.2474, 0.0323, 0.3698, 0.1311, 0.2193],
矩阵元素为 y2,iy_{2,i}y2,i
[0.0700, 0.1692, 0.0484, 0.5735, 0.1389]])
矩阵元素为 y3,iy_{3,i}y3,i
l1=−∑c=15logexp(xn,c)∑i=15exp(xn,i)yn,c=−logexp(x1,1)∑i=15exp(x1,i)y1,1−⋯−logexp(x1,5)∑i=15exp(x1,i)y1,5\qquad l_1=-\displaystyle\sum_{c=1}^5\log\dfrac{\exp(x_{n,c})}{\sum_{i=1}^5\exp(x_{n,i})}y_{n,c}=-\log\dfrac{\exp(x_{1,1})}{\sum_{i=1}^5\exp(x_{1,i})}y_{1,1}-\cdots-\log\dfrac{\exp(x_{1,5})}{\sum_{i=1}^5\exp(x_{1,i})}y_{1,5}l1=−c=1∑5log∑i=15exp(xn,i)exp(xn,c)yn,c=−log∑i=15exp(x1,i)exp(x1,1)y1,1−⋯−log∑i=15exp(x1,i)exp(x1,5)y1,5
l2=−∑c=15logexp(x2,c)∑i=15exp(x2,i)yn,c=−logexp(x2,1)∑i=15exp(x2,i)y2,1−⋯−logexp(x2,5)∑i=15exp(x2,i)y2,5\qquad l_2=-\displaystyle\sum_{c=1}^5\log\dfrac{\exp(x_{2,c})}{\sum_{i=1}^5\exp(x_{2,i})}y_{n,c}=-\log\dfrac{\exp(x_{2,1})}{\sum_{i=1}^5\exp(x_{2,i})}y_{2,1}-\cdots-\log\dfrac{\exp(x_{2,5})}{\sum_{i=1}^5\exp(x_{2,i})}y_{2,5}l2=−c=1∑5log∑i=15exp(x2,i)exp(x2,c)yn,c=−log∑i=15exp(x2,i)exp(x2,1)y2,1−⋯−log∑i=15exp(x2,i)exp(x2,5)y2,5
l3=−∑c=15logexp(x3,c)∑i=15exp(x3,i)y3,c=−logexp(x3,1)∑i=15exp(x3,i)y3,1−⋯−logexp(x3,5)∑i=15exp(x3,i)y3,5\qquad l_3=-\displaystyle\sum_{c=1}^5\log\dfrac{\exp(x_{3,c})}{\sum_{i=1}^5\exp(x_{3,i})}y_{3,c}=-\log\dfrac{\exp(x_{3,1})}{\sum_{i=1}^5\exp(x_{3,i})}y_{3,1}-\cdots-\log\dfrac{\exp(x_{3,5})}{\sum_{i=1}^5\exp(x_{3,i})}y_{3,5}l3=−c=1∑5log∑i=15exp(x3,i)exp(x3,c)y3,c=−log∑i=15exp(x3,i)exp(x3,1)y3,1−⋯−log∑i=15exp(x3,i)exp(x3,5)y3,5
ℓ(x,y)=l1+l2+l33\qquad\ell(x,y)=\dfrac{l_1+l_2+l_3}{3}ℓ(x,y)=3l1+l2+l3
pyTorch例程如下:
## Example of target with class indices
inputs = torch.randn(batch, classnum, requires_grad=True)
print('inputs:\n\t',inputs.data)
targets = torch.empty(batch, dtype=torch.long).random_(classnum)
print('targets:\n\t',targets.data)
loss = criterion(inputs, targets)
print("nn.CrossEntropyLoss(): ", loss)## Example of target with class probabilities
inputs = torch.randn(batch, classnum, requires_grad=True)
print('inputs:\n\t',inputs.data)
targets = torch.randn(batch, classnum).softmax(dim=1)
print('targets:\n\t',targets.data)
loss = criterion(inputs, targets)
print('loss: ',loss.item())
按照公式实现的CrossEntropy_func(outputs, targets)
import torch
import torch.nn as nn
import numpy as np
def CrossEntropy_func(outputs, targets):yn = outputs.detach().numpy()if targets.dim()==1: # target with class indices, targets.dim()=1,否则targets.dim()=2targets = torch.tensor(np.eye(classnum)[targets],dtype=torch.float32).reshape(batch,classnum)cn = targets.detach().numpy()loss = -np.mean(np.log(np.exp(np.sum(yn*cn,axis=1))/np.sum(np.exp(yn),axis=1)))return loss
batch = 3
classnum = 5
criterion = nn.CrossEntropyLoss()
outputs = torch.rand(batch, classnum, requires_grad=True)
targets = torch.empty(batch, dtype=torch.long).random_(classnum) # target with class indices
# targets = torch.randn(batch, classnum).softmax(dim=1) # target with class probabilities
loss = criterion(outputs, targets)
print("网络输出:\n\t", outputs)
print("目标输出:\n",targets)#, "\n目标输出:\n",np.where(targets==1)[1])
print("nn.CrossEntropyLoss()值:\n\t",loss)
print("CrossEntropy_func()值:\n\t",CrossEntropy_func(outputs, targets))
运行结果:
网络输出:tensor([[0.1748, 0.4633, 0.4517, 0.2411, 0.7795],[0.2201, 0.0267, 0.6075, 0.7097, 0.5620],[0.8562, 0.4230, 0.8201, 0.3789, 0.3674]], requires_grad=True)
目标输出:tensor([3, 2, 0]) # target with class indices
nn.CrossEntropyLoss()值:tensor(1.5399, grad_fn=)
CrossEntropy_func()值:1.5399135
以及
网络输出:tensor([[0.3374, 0.1705, 0.5405, 0.1642, 0.6645],[0.9041, 0.4213, 0.9672, 0.2670, 0.1970],[0.7261, 0.9160, 0.3956, 0.2560, 0.4922]], requires_grad=True)
目标输出: # target with class probabilities (softmax,每行元素之和为1)tensor([[0.1267, 0.1977, 0.4202, 0.1212, 0.1342],[0.1167, 0.4284, 0.0611, 0.3156, 0.0782],[0.1842, 0.0455, 0.0133, 0.1828, 0.5743]])
nn.CrossEntropyLoss()值:tensor(1.6820, grad_fn=)
CrossEntropy_func()值:1.6819801
上一篇:java基础:值传递与引用传递