##################################################### # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2021.02 # ##################################################### import torch import torch.nn as nn import math class PositionalEncoder(nn.Module): # Attention Is All You Need: https://arxiv.org/pdf/1706.03762.pdf # https://github.com/pytorch/examples/blob/master/word_language_model/model.py#L65 def __init__(self, d_model, max_seq_len, dropout=0.1): super(PositionalEncoder, self).__init__() self.d_model = d_model # create constant 'pe' matrix with values dependant on # pos and i pe = torch.zeros(max_seq_len, d_model) for pos in range(max_seq_len): for i in range(0, d_model): div = 10000 ** ((i // 2) * 2 / d_model) value = pos / div if i % 2 == 0: pe[pos, i] = math.sin(value) else: pe[pos, i] = math.cos(value) pe = pe.unsqueeze(0) self.dropout = nn.Dropout(p=dropout) self.register_buffer("pe", pe) def forward(self, x): batch, seq, fdim = x.shape[:3] embeddings = self.pe[:, :seq, :fdim] outs = self.dropout(x + embeddings) return outs