Make AutoProcessor a magic loading class for all modalities (#18963)

* Make AutoProcessor a magic loading class for all modalities

* Quality
This commit is contained in:
Sylvain Gugger 2022-09-14 07:36:12 -04:00 committed by GitHub
parent a2a3afbc8d
commit 6f8f2f6a77
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 4 deletions

View File

@ -23,7 +23,7 @@ from ...configuration_utils import PretrainedConfig
from ...dynamic_module_utils import get_class_from_dynamic_module
from ...feature_extraction_utils import FeatureExtractionMixin
from ...tokenization_utils import TOKENIZER_CONFIG_FILE
from ...utils import CONFIG_NAME, FEATURE_EXTRACTOR_NAME, get_file_from_repo, logging
from ...utils import FEATURE_EXTRACTOR_NAME, get_file_from_repo, logging
from .auto_factory import _LazyAutoMapping
from .configuration_auto import (
CONFIG_MAPPING_NAMES,
@ -31,6 +31,8 @@ from .configuration_auto import (
model_type_to_module_name,
replace_list_option_in_docstrings,
)
from .feature_extraction_auto import AutoFeatureExtractor
from .tokenization_auto import AutoTokenizer
logger = logging.get_logger(__name__)
@ -250,10 +252,24 @@ class AutoProcessor:
if type(config) in PROCESSOR_MAPPING:
return PROCESSOR_MAPPING[type(config)].from_pretrained(pretrained_model_name_or_path, **kwargs)
# At this stage, there doesn't seem to be a `Processor` class available for this model, so let's try a
# tokenizer.
try:
return AutoTokenizer.from_pretrained(
pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
)
except Exception:
try:
return AutoFeatureExtractor.from_pretrained(
pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
)
except Exception:
pass
raise ValueError(
f"Unrecognized processor in {pretrained_model_name_or_path}. Should have a `processor_type` key in "
f"its {FEATURE_EXTRACTOR_NAME}, or one of the following `model_type` keys in its {CONFIG_NAME}: "
f"{', '.join(c for c in PROCESSOR_MAPPING_NAMES.keys())}"
f"Unrecognized processing class in {pretrained_model_name_or_path}. Can't instantiate a processor, a "
"tokenizer or a feature extractor for this model. Make sure the repository contains the files of at least "
"one of those processing classes."
)
@staticmethod

View File

@ -202,6 +202,14 @@ class AutoFeatureExtractorTest(unittest.TestCase):
if CustomConfig in PROCESSOR_MAPPING._extra_content:
del PROCESSOR_MAPPING._extra_content[CustomConfig]
def test_auto_processor_creates_tokenizer(self):
processor = AutoProcessor.from_pretrained("hf-internal-testing/tiny-random-bert")
self.assertEqual(processor.__class__.__name__, "BertTokenizerFast")
def test_auto_processor_creates_feature_extractor(self):
processor = AutoProcessor.from_pretrained("hf-internal-testing/tiny-random-convnext")
self.assertEqual(processor.__class__.__name__, "ConvNextFeatureExtractor")
@is_staging_test
class ProcessorPushToHubTester(unittest.TestCase):