mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 21:00:08 +06:00
Fix loss
Please review @thomwolf but i think this is equivqlent (and it mimics the loss computation of the original loss)
This commit is contained in:
parent
25d5ca48e0
commit
72ab10399f
@ -492,9 +492,9 @@ class BertForQuestionAnswering(nn.Module):
|
||||
|
||||
def compute_loss(logits, positions):
|
||||
max_position = positions.max().item()
|
||||
one_hot = torch.FloatTensor(batch_size, max(max_position, seq_length) +1, device=input_ids.device).zero_()
|
||||
one_hot = torch.FloatTensor(batch_size, max(max_position, seq_length) +1).zero_()
|
||||
one_hot = one_hot.scatter(1, positions.cpu(), 1) # Second argument need to be LongTensor and not cuda.LongTensor
|
||||
one_hot = one_hot[:, :seq_length]
|
||||
one_hot = one_hot[:, :seq_length].to(input_ids.device)
|
||||
log_probs = nn.functional.log_softmax(logits, dim = -1).view(batch_size, seq_length)
|
||||
loss = -torch.mean(torch.sum(one_hot*log_probs), dim = -1)
|
||||
return loss
|
||||
|
Loading…
Reference in New Issue
Block a user