Effectively allow encoder_outputs input to be a tuple in pix2struct (#23932)

consistentcy
This commit is contained in:
fxmarty 2023-06-01 22:07:57 +09:00 committed by GitHub
parent 9603ef890a
commit 9a35a7b9e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1734,6 +1734,12 @@ class Pix2StructForConditionalGeneration(Pix2StructPreTrainedModel):
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):
encoder_outputs = BaseModelOutput(
last_hidden_state=encoder_outputs[0],
hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,
attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
)
hidden_states = encoder_outputs[0]