diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index afc79463bd7..b69b910a76d 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -1670,9 +1670,9 @@ class GPT2ForQuestionAnswering(GPT2PreTrainedModel): if start_positions is not None and end_positions is not None: # If we are on multi-GPU, split add a dimension if len(start_positions.size()) > 1: - start_positions = start_positions.squeeze(-1) + start_positions = start_positions.squeeze(-1).to(start_logits.device) if len(end_positions.size()) > 1: - end_positions = end_positions.squeeze(-1) + end_positions = end_positions.squeeze(-1).to(end_logits.device) # sometimes the start/end positions are outside our model inputs, we ignore these terms ignored_index = start_logits.size(1) start_positions = start_positions.clamp(0, ignored_index)