defsgd(params, lr, batch_size): #@save """⼩批量随机梯度下降""" with torch.no_grad(): for param in params: param -= lr * param.grad / batch_size param.grad.zero_()
3.训练
1 2 3 4
lr = 0.03 num_epochs = 3 net = linreg loss = squared_loss
1 2 3 4 5 6 7 8 9 10
for epoch inrange(num_epochs): for X, y in data_iter(batch_size, features, labels): l = loss(net(X, w, b), y) # X和y的⼩批量损失 # 因为l形状是(batch_size,1),⽽不是⼀个标量。l中的所有元素被加到⼀起, # 并以此计算关于[w,b]的梯度 l.sum().backward() sgd([w, b], lr, batch_size) # 使⽤参数的梯度更新参数 with torch.no_grad(): train_l = loss(net(features, w, b), labels) print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
# 定义优化算法:小批量梯度随机下降 # lr 为学习率 trainer = torch.optim.SGD(net.parameters(), lr=0.03)
# 训练 num_epochs = 3# 迭代周期个数 for epoch inrange(num_epochs): for X, y in data_iter: l = loss(net(X) ,y) trainer.zero_grad() l.backward() trainer.step() l = loss(net(features), labels) print(f'epoch {epoch + 1}, loss {l:f}')
w = net[0].weight.data print('w的估计误差:', true_w - w.reshape(true_w.shape)) b = net[0].bias.data print('b的估计误差:', true_b - b)