mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 19:21:31 +06:00
Enabling imageGPT
auto feature extractor. (#16871)
* Enablign `imageGPT` auto feature extractor. Co-authored-by: ydshieh <ydshieh@users.noreply.github.com> * Small updates. * Update after rebase to use `input_ids` instead of `pixel_values`. Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
31ee80d556
commit
d980929803
@ -50,6 +50,7 @@ FEATURE_EXTRACTOR_MAPPING_NAMES = OrderedDict(
|
||||
("flava", "FlavaFeatureExtractor"),
|
||||
("glpn", "GLPNFeatureExtractor"),
|
||||
("hubert", "Wav2Vec2FeatureExtractor"),
|
||||
("imagegpt", "ImageGPTFeatureExtractor"),
|
||||
("layoutlmv2", "LayoutLMv2FeatureExtractor"),
|
||||
("layoutlmv3", "LayoutLMv3FeatureExtractor"),
|
||||
("maskformer", "MaskFormerFeatureExtractor"),
|
||||
|
@ -75,14 +75,19 @@ def _pad(items, key, padding_value, padding_side):
|
||||
# Others include `attention_mask` etc...
|
||||
shape = items[0][key].shape
|
||||
dim = len(shape)
|
||||
if dim == 4:
|
||||
if key == "pixel_values":
|
||||
# This is probable image so padding shouldn't be necessary
|
||||
# B, C, H, W
|
||||
return torch.cat([item[key] for item in items], dim=0)
|
||||
max_length = max(item[key].shape[1] for item in items)
|
||||
min_length = min(item[key].shape[1] for item in items)
|
||||
dtype = items[0][key].dtype
|
||||
|
||||
if dim == 2:
|
||||
if max_length == min_length:
|
||||
# Bypass for `ImageGPT` which doesn't provide a padding value, yet
|
||||
# we can consistently pad since the size should be matching
|
||||
return torch.cat([item[key] for item in items], dim=0)
|
||||
tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value
|
||||
elif dim == 3:
|
||||
tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value
|
||||
@ -146,7 +151,11 @@ def pad_collate_fn(tokenizer, feature_extractor):
|
||||
padded = {}
|
||||
for key in keys:
|
||||
if key in {"input_ids"}:
|
||||
_padding_value = t_padding_value
|
||||
# ImageGPT uses a feature extractor
|
||||
if feature_extractor is not None:
|
||||
_padding_value = f_padding_value
|
||||
else:
|
||||
_padding_value = t_padding_value
|
||||
elif key in {"input_values", "pixel_values", "input_features"}:
|
||||
_padding_value = f_padding_value
|
||||
elif key in {"p_mask", "special_tokens_mask"}:
|
||||
|
@ -171,6 +171,12 @@ class ImageGPTModelTester:
|
||||
reorder_and_upcast_attn=reorder_and_upcast_attn,
|
||||
)
|
||||
|
||||
def get_pipeline_config(self):
|
||||
config = self.get_config()
|
||||
config.vocab_size = 513
|
||||
config.max_position_embeddings = 1024
|
||||
return config
|
||||
|
||||
def prepare_config_and_inputs_for_decoder(self):
|
||||
(
|
||||
config,
|
||||
|
Loading…
Reference in New Issue
Block a user