mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Don't save processor_config.json
if a processor has no extra attribute (#28584)
* not save if empty * fix * fix * fix * fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
772307be76
commit
db9a7e9d3d
@ -234,8 +234,11 @@ class ProcessorMixin(PushToHubMixin):
|
||||
# If we save using the predefined names, we can load using `from_pretrained`
|
||||
output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
|
||||
|
||||
self.to_json_file(output_processor_file)
|
||||
logger.info(f"processor saved in {output_processor_file}")
|
||||
# For now, let's not save to `processor_config.json` if the processor doesn't have extra attributes and
|
||||
# `auto_map` is not specified.
|
||||
if set(self.to_dict().keys()) != {"processor_class"}:
|
||||
self.to_json_file(output_processor_file)
|
||||
logger.info(f"processor saved in {output_processor_file}")
|
||||
|
||||
if push_to_hub:
|
||||
self._upload_modified_files(
|
||||
@ -246,6 +249,8 @@ class ProcessorMixin(PushToHubMixin):
|
||||
token=kwargs.get("token"),
|
||||
)
|
||||
|
||||
if set(self.to_dict().keys()) == {"processor_class"}:
|
||||
return []
|
||||
return [output_processor_file]
|
||||
|
||||
@classmethod
|
||||
|
@ -101,6 +101,12 @@ class AutoFeatureExtractorTest(unittest.TestCase):
|
||||
# save in new folder
|
||||
processor.save_pretrained(tmpdirname)
|
||||
|
||||
if not os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
|
||||
# create one manually in order to perform this test's objective
|
||||
config_dict = {"processor_class": "Wav2Vec2Processor"}
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as fp:
|
||||
json.dump(config_dict, fp)
|
||||
|
||||
# drop `processor_class` in tokenizer config
|
||||
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
|
||||
config_dict = json.load(f)
|
||||
@ -123,13 +129,14 @@ class AutoFeatureExtractorTest(unittest.TestCase):
|
||||
# save in new folder
|
||||
processor.save_pretrained(tmpdirname)
|
||||
|
||||
# drop `processor_class` in processor
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
|
||||
config_dict = json.load(f)
|
||||
config_dict.pop("processor_class")
|
||||
if os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
|
||||
# drop `processor_class` in processor
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
|
||||
config_dict = json.load(f)
|
||||
config_dict.pop("processor_class")
|
||||
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
|
||||
f.write(json.dumps(config_dict))
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
|
||||
f.write(json.dumps(config_dict))
|
||||
|
||||
# drop `processor_class` in tokenizer
|
||||
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
|
||||
@ -153,13 +160,14 @@ class AutoFeatureExtractorTest(unittest.TestCase):
|
||||
# save in new folder
|
||||
processor.save_pretrained(tmpdirname)
|
||||
|
||||
# drop `processor_class` in processor
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
|
||||
config_dict = json.load(f)
|
||||
config_dict.pop("processor_class")
|
||||
if os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
|
||||
# drop `processor_class` in processor
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
|
||||
config_dict = json.load(f)
|
||||
config_dict.pop("processor_class")
|
||||
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
|
||||
f.write(json.dumps(config_dict))
|
||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
|
||||
f.write(json.dumps(config_dict))
|
||||
|
||||
# drop `processor_class` in feature extractor
|
||||
with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "r") as f:
|
||||
|
@ -75,11 +75,12 @@ class ProcessorTesterMixin:
|
||||
processor_first = self.get_processor()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
saved_file = processor_first.save_pretrained(tmpdirname)[0]
|
||||
check_json_file_has_correct_format(saved_file)
|
||||
processor_second = self.processor_class.from_pretrained(tmpdirname)
|
||||
saved_files = processor_first.save_pretrained(tmpdirname)
|
||||
if len(saved_files) > 0:
|
||||
check_json_file_has_correct_format(saved_files[0])
|
||||
processor_second = self.processor_class.from_pretrained(tmpdirname)
|
||||
|
||||
self.assertEqual(processor_second.to_dict(), processor_first.to_dict())
|
||||
self.assertEqual(processor_second.to_dict(), processor_first.to_dict())
|
||||
|
||||
|
||||
class MyProcessor(ProcessorMixin):
|
||||
|
Loading…
Reference in New Issue
Block a user