Skip to content

Positional Encoding

PositionalEncoding

Bases: Module

Source code in src/transformer/modules/positional_encoding.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, seq_len: int, dropout: float):
        """Trigonometric Positional Encoding

        Represents the position of a word within a sequence following
        Section 3.5 of "Attention is All you Need".

        Args:
            d_model: the Transformer model dimension
            seq_len: the sequence lenth of our data
            dropout: the dropout percentage
        """
        super().__init__()
        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = nn.Dropout(dropout)
        self._register_positional_encoding(d_model, seq_len)

    def _register_positional_encoding(self, d_model: int, seq_len: int):
        """Create a positional encoding

        Using trigonometric functions, we create a positional encoding
        for every token in the sequence. For tokens in even positions of the
        sequence, we use `sin()`, and for odd positions we use `cos()`.

        Args:
            d_model: the model embedding dimension
            seq_len: the sequence length of our data

        Note:
            We register the positional encoding as an nn.Module buffer rather
            than create a set of parameters. This coding is fixed, and we do
            not want to learn it.
        """
        position_encoding = torch.zeros(seq_len, d_model)
        # Position vector of shape (seq_len, 1)
        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)
        denominator = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10_000) / d_model))
        # For even terms in the position encoding, we use the sin()
        position_encoding[:, 0::2] = torch.sin(position * denominator)
        # For odd terms, we use the cos()
        position_encoding[:, 1::2] = torch.cos(position * denominator)
        # Reshape the position encoding to be of shape (1, seq_len, d_model)
        position_encoding = position_encoding.unsqueeze(0)
        # Store the positional encoding in the module, not as a parameter
        self.register_buffer('positional_encoding', position_encoding)

    def forward(self, x):
        with torch.no_grad():
            x = x + (self.positional_encoding[:, :x.shape[1], :])
            return self.dropout(x)

__init__(d_model, seq_len, dropout)

Trigonometric Positional Encoding

Represents the position of a word within a sequence following Section 3.5 of "Attention is All you Need".

Parameters:

Name Type Description Default
d_model int

the Transformer model dimension

required
seq_len int

the sequence lenth of our data

required
dropout float

the dropout percentage

required
Source code in src/transformer/modules/positional_encoding.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def __init__(self, d_model: int, seq_len: int, dropout: float):
    """Trigonometric Positional Encoding

    Represents the position of a word within a sequence following
    Section 3.5 of "Attention is All you Need".

    Args:
        d_model: the Transformer model dimension
        seq_len: the sequence lenth of our data
        dropout: the dropout percentage
    """
    super().__init__()
    self.d_model = d_model
    self.seq_len = seq_len
    self.dropout = nn.Dropout(dropout)
    self._register_positional_encoding(d_model, seq_len)