Topics
Very straightforward to implement, if one refers the GloVe objective function
class GloVe(nn.Module):
def __init__(self, vocab_size, embedding_size, x_max, alpha):
super().__init__()
self.weight = nn.Embedding(
num_embeddings=vocab_size,
embedding_dim=embedding_size,
sparse=True
)
self.weight_tilde = nn.Embedding(
num_embeddings=vocab_size,
embedding_dim=embedding_size,
sparse=True
)
self.bias = nn.Parameter(
torch.randn(
vocab_size,
dtype=torch.float,
)
)
self.bias_tilde = nn.Parameter(
torch.randn(
vocab_size,
dtype=torch.float,
)
)
self.weighting_func = lambda x: (x / x_max).float_power(alpha).clamp(0, 1)
def forward(self, i, j, x):
loss = torch.mul(self.weight(i), self.weight_tilde(j)).sum(dim=1)
loss = (loss + self.bias[i] + self.bias_tilde[j] - x.log()).square()
loss = torch.mul(self.weighting_func(x), loss).mean()
return lossNotice that we set sparse=True when creating the embeddings, as the gradient update is sparse by nature. In forward(...), the average batch loss is returned.
The final word embeddings are obtained by summing weight and weight_tilde.