Copy object instead of passing the reference

This commit is contained in:
Lysandre 2020-01-29 16:15:39 -05:00
parent adb8c93134
commit 217349016a
2 changed files with 14 additions and 1 deletions

View File

@ -326,7 +326,7 @@ class PreTrainedTokenizer(object):
cls.pretrained_init_configuration
and pretrained_model_name_or_path in cls.pretrained_init_configuration
):
init_configuration = cls.pretrained_init_configuration[pretrained_model_name_or_path]
init_configuration = cls.pretrained_init_configuration[pretrained_model_name_or_path].copy()
else:
# Get the vocabulary from local files
logger.info(

View File

@ -495,3 +495,16 @@ class TokenizerTesterMixin:
assert [token_type_padding_idx] * padding_size + token_type_ids == padded_token_type_ids
assert [0] * padding_size + attention_mask == padded_attention_mask
assert [1] * padding_size + special_tokens_mask == padded_special_tokens_mask
def test_separate_tokenizers(self):
# This tests that tokenizers don't impact others. Unfortunately the case where it fails is when
# we're loading an S3 configuration from a pre-trained identifier, and we have no way of testing those today.
tokenizer = self.get_tokenizer(random_argument=True)
print(tokenizer.init_kwargs)
assert tokenizer.init_kwargs['random_argument'] is True
new_tokenizer = self.get_tokenizer(random_argument=False)
print(tokenizer.init_kwargs)
print(new_tokenizer.init_kwargs)
assert tokenizer.init_kwargs['random_argument'] is True
assert new_tokenizer.init_kwargs['random_argument'] is False