mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
parent
3a24ba82ad
commit
fd70464fa7
@ -2214,7 +2214,7 @@ class MllamaForConditionalGeneration(MllamaPreTrainedModel, GenerationMixin):
|
||||
|
||||
# If we're in pre-fill or cacheless decoding step, then we need pixel_values and aspect ratios
|
||||
# to compute image hidden states, otherwise they are cached within each cross attn layer
|
||||
if (input_ids == self.config.image_token_index).any():
|
||||
if cache_position[0] == 0:
|
||||
model_inputs["pixel_values"] = pixel_values
|
||||
model_inputs["aspect_ratio_ids"] = aspect_ratio_ids
|
||||
model_inputs["aspect_ratio_mask"] = aspect_ratio_mask
|
||||
|
@ -243,6 +243,7 @@ def check_attribute_being_used(config_class, attributes, default_value, source_s
|
||||
"pad_index",
|
||||
"unk_index",
|
||||
"mask_index",
|
||||
"image_token_index", # for VLMs
|
||||
"image_size",
|
||||
"use_cache",
|
||||
"out_features",
|
||||
|
Loading…
Reference in New Issue
Block a user