先定义一个丢弃层

Dropout 法就是训练中传播时随机丢弃一些节点的权重。

仅在训练期间有效，每次前向传播均会重新生成 mask

手动实现

1
# 先定义一个丢弃层
2
def dropout_layer(X, dropout):
3
    assert 0 <= dropout <= 1
4
    # 在本情况中，所有元素都被丢弃
5
    if dropout == 1:
6
        return torch.zeros_like(X)
7
    # 在本情况中，所有元素都被保留
8
    if dropout == 0:
9
        return X
10
    mask = (torch.rand(X.shape) > dropout).float()
11
    return mask * X / (1.0 - dropout)
12

13
dropout1, dropout2 = 0.2, 0.5
14

15
class Net(nn.Module):
25 collapsed lines
16
    def __init__(self, num_inputs, num_outputs, num_hiddens1, num_hiddens2,
17
                 is_training = True):
18
        super(Net, self).__init__()
19
        self.num_inputs = num_inputs
20
        self.training = is_training
21
        self.lin1 = nn.Linear(num_inputs, num_hiddens1)
22
        self.lin2 = nn.Linear(num_hiddens1, num_hiddens2)
23
        self.lin3 = nn.Linear(num_hiddens2, num_outputs)
24
        self.relu = nn.ReLU()
25

26
    def forward(self, X):
27
        H1 = self.relu(self.lin1(X.reshape((-1, self.num_inputs))))
28
        # 只有在训练模型时才使用dropout
29
        if self.training == True:
30
            # 在第一个全连接层之后添加一个dropout层
31
            H1 = dropout_layer(H1, dropout1)
32
        H2 = self.relu(self.lin2(H1))
33
        if self.training == True:
34
            # 在第二个全连接层之后添加一个dropout层
35
            H2 = dropout_layer(H2, dropout2)
36
        out = self.lin3(H2)
37
        return out
38

39

40
net = Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2)

简洁实现

1
net = nn.Sequential(nn.Flatten(),
2
        nn.Linear(784, 256),
3
        nn.ReLU(),
4
        # 在第一个全连接层之后添加一个dropout层
5
        # 这里 dropout1 是一个实数
6
        nn.Dropout(dropout1),
7
        nn.Linear(256, 256),
8
        nn.ReLU(),
9
        # 在第二个全连接层之后添加一个dropout层
10
        nn.Dropout(dropout2),
11
        nn.Linear(256, 10))
12

13
def init_weights(m):
14
    if type(m) == nn.Linear:
15
        nn.init.normal_(m.weight, std=0.01)
2 collapsed lines
16

17
net.apply(init_weights);