Enabling imageGPT auto feature extractor. (#16871)

* Enablign `imageGPT` auto feature extractor.

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

* Small updates.

* Update after rebase to use `input_ids` instead of `pixel_values`.

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Nicolas Patry 2022-05-24 12:30:46 +02:00 committed by GitHub
parent 31ee80d556
commit d980929803
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 2 deletions

View File

@ -50,6 +50,7 @@ FEATURE_EXTRACTOR_MAPPING_NAMES = OrderedDict(
("flava", "FlavaFeatureExtractor"),
("glpn", "GLPNFeatureExtractor"),
("hubert", "Wav2Vec2FeatureExtractor"),
("imagegpt", "ImageGPTFeatureExtractor"),
("layoutlmv2", "LayoutLMv2FeatureExtractor"),
("layoutlmv3", "LayoutLMv3FeatureExtractor"),
("maskformer", "MaskFormerFeatureExtractor"),

View File

@ -75,14 +75,19 @@ def _pad(items, key, padding_value, padding_side):
# Others include `attention_mask` etc...
shape = items[0][key].shape
dim = len(shape)
if dim == 4:
if key == "pixel_values":
# This is probable image so padding shouldn't be necessary
# B, C, H, W
return torch.cat([item[key] for item in items], dim=0)
max_length = max(item[key].shape[1] for item in items)
min_length = min(item[key].shape[1] for item in items)
dtype = items[0][key].dtype
if dim == 2:
if max_length == min_length:
# Bypass for `ImageGPT` which doesn't provide a padding value, yet
# we can consistently pad since the size should be matching
return torch.cat([item[key] for item in items], dim=0)
tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value
elif dim == 3:
tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value
@ -146,7 +151,11 @@ def pad_collate_fn(tokenizer, feature_extractor):
padded = {}
for key in keys:
if key in {"input_ids"}:
_padding_value = t_padding_value
# ImageGPT uses a feature extractor
if feature_extractor is not None:
_padding_value = f_padding_value
else:
_padding_value = t_padding_value
elif key in {"input_values", "pixel_values", "input_features"}:
_padding_value = f_padding_value
elif key in {"p_mask", "special_tokens_mask"}:

View File

@ -171,6 +171,12 @@ class ImageGPTModelTester:
reorder_and_upcast_attn=reorder_and_upcast_attn,
)
def get_pipeline_config(self):
config = self.get_config()
config.vocab_size = 513
config.max_position_embeddings = 1024
return config
def prepare_config_and_inputs_for_decoder(self):
(
config,