Fix deprecation warnings due to invalid escape sequences. (#4924)

This commit is contained in:
Karthikeyan Singaravelan 2020-06-18 03:16:58 +05:30 committed by GitHub
parent f1a3d03741
commit 38bba9cdd5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -114,7 +114,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
self.delimiter = delimiter
self.vocab_file = vocab_file
self.never_split = never_split
self.punctuation_symbols = '!"#$%&()*+,-./\:;<=>?@[\\]^_`{|}~' # noqa: W605
self.punctuation_symbols = '!"#$%&()*+,-./\\:;<=>?@[\\]^_`{|}~'
self.punction_without_space_before_pattern = re.compile(r"[^\s][{}]".format(self.punctuation_symbols))
self.punctuation_with_space_around_pattern = self._compile_space_around_punctuation_pattern()
@ -141,7 +141,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
def _compile_space_around_punctuation_pattern(self):
look_ahead_for_special_token = "(?=[{}])".format(self.punctuation_symbols)
look_ahead_to_match_all_except_space = "(?=[^\s])" # noqa: W605
look_ahead_to_match_all_except_space = r"(?=[^\s])"
return re.compile(r"" + look_ahead_for_special_token + look_ahead_to_match_all_except_space)
def count_file(self, path, verbose=False, add_eos=False):