Skip to content

Embedding

Embedding

Bases: Module

Source code in src/transformer/modules/embedding.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class Embedding(nn.Module):

    def __init__(self, d_model: int, vocab_size: int):
        """Transformer Input Embedding

        Args:
            d_model: the Transformer model dimension
            vocab_size: number of terms in our vocabulary
        """
        super().__init__()
        self.d_model = d_model
        self.vocab_size = vocab_size
        self.embedding = nn.Embedding(vocab_size, d_model)

    def forward(self, x):
        """Embed our tokenized inputs

        Note:
            Following section 3.4 in "Attention is All You Need",
            we multiply the embeddings by the square root of the 
            model's dimension
        """
        return self.embedding(x) * math.sqrt(self.d_model)

__init__(d_model, vocab_size)

Transformer Input Embedding

Parameters:

Name Type Description Default
d_model int

the Transformer model dimension

required
vocab_size int

number of terms in our vocabulary

required
Source code in src/transformer/modules/embedding.py
 8
 9
10
11
12
13
14
15
16
17
18
def __init__(self, d_model: int, vocab_size: int):
    """Transformer Input Embedding

    Args:
        d_model: the Transformer model dimension
        vocab_size: number of terms in our vocabulary
    """
    super().__init__()
    self.d_model = d_model
    self.vocab_size = vocab_size
    self.embedding = nn.Embedding(vocab_size, d_model)

forward(x)

Embed our tokenized inputs

Note

Following section 3.4 in "Attention is All You Need", we multiply the embeddings by the square root of the model's dimension

Source code in src/transformer/modules/embedding.py
20
21
22
23
24
25
26
27
28
def forward(self, x):
    """Embed our tokenized inputs

    Note:
        Following section 3.4 in "Attention is All You Need",
        we multiply the embeddings by the square root of the 
        model's dimension
    """
    return self.embedding(x) * math.sqrt(self.d_model)