This commit is contained in:
Vasqu 2025-07-02 13:01:34 +02:00
parent 8fa32ca900
commit 786230b463
2 changed files with 432 additions and 1159 deletions

File diff suppressed because it is too large Load Diff

View File

@ -504,7 +504,7 @@ class RobertaAttention(nn.Module):
head_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
past_key_value: Optional[tuple[tuple[torch.FloatTensor]]] = None,
past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.Tensor] = None,
**kwargs,
) -> tuple[torch.Tensor]:
@ -1045,6 +1045,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel, GenerationMixin):
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
cache_position: Optional[torch.Tensor] = None,
**kwargs,
) -> Union[tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]:
r"""
@ -1096,6 +1097,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel, GenerationMixin):
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
cache_position=cache_position,
)
sequence_output = outputs[0]