def topk_similar_tokens(roberta, index, k, normalize=False, beta=100.):
embed_tokens = roberta.get_parameter('model.encoder.sentence_encoder.embed_tokens.weight')
if normalize:
embed_tokens = embed_tokens / embed_tokens.norm(dim=1, keepdim=True)
prob = (beta * embed_tokens[index] @ embed_tokens.T).softmax(dim=-1)
values, indices = prob.topk(k)
# Print the result
print("\nTop predictions:\n")
for value, index in zip(values, indices):
print(f"{roberta.decode(index.unsqueeze(0)) if index.item() != roberta.task.source_dictionary.pad() else '<pad>'}: {100 * value.item():.2f}%")
>>> topk_similar_tokens(roberta, roberta.task.mask_idx, 10, normalize=True, beta=10.)
Top predictions:
<mask>: 0.54%
: 0.02%
the: 0.01%
and: 0.01%
,: 0.01%
to: 0.01%
.: 0.01%
that: 0.01%
in: 0.01%
GG: 0.01%
>>> topk_similar_tokens(roberta, roberta.task.source_dictionary.bos(), 10, normalize=True, beta=10.)
Top predictions:
: 2.59%
<mask>: 0.02%
: 0.01%
.: 0.01%
the: 0.01%
,: 0.01%
a: 0.01%
!: 0.01%
。: 0.01%
?: 0.01%
>>> topk_similar_tokens(roberta, roberta.task.source_dictionary.pad(), 10, normalize=True, beta=10.)
Top predictions:
<pad>: 74.20%
: 0.02%
channelAvailability: 0.02%
PsyNetMessage: 0.01%
guiIcon: 0.01%
NetMessage: 0.01%
: 0.01%
?????-?????-: 0.01%
0.01%
EStreamFrame: 0.01%
>>> topk_similar_tokens(roberta, roberta.task.source_dictionary.eos(), 10, normalize=True, beta=10.)
Top predictions:
: 0.79%
.: 0.03%
<mask>: 0.03%
,: 0.02%
(: 0.02%
": 0.02%
and: 0.02%
the: 0.02%
The: 0.02%
-: 0.01%
Nothing shocking here. Other than <pad> it needs to get quite cold in terms of beta for the aligned embeddings to stand out though.
No comments:
Post a Comment