mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Merge pull request #1059 from GuillemGSubies/master
Better use of spacy tokenizer in open ai and xlm tokenizers
This commit is contained in:
commit
41789c6c3d
@ -89,8 +89,9 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
|
||||
|
||||
try:
|
||||
import ftfy
|
||||
import spacy
|
||||
self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat'])
|
||||
from spacy.lang.en import English
|
||||
_nlp = English()
|
||||
self.nlp = _nlp.Defaults.create_tokenizer(_nlp)
|
||||
self.fix_text = ftfy.fix_text
|
||||
except ImportError:
|
||||
logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
|
||||
|
@ -124,8 +124,9 @@ class XLMTokenizer(PreTrainedTokenizer):
|
||||
**kwargs)
|
||||
try:
|
||||
import ftfy
|
||||
import spacy
|
||||
self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat'])
|
||||
from spacy.lang.en import English
|
||||
_nlp = English()
|
||||
self.nlp = _nlp.Defaults.create_tokenizer(_nlp)
|
||||
self.fix_text = ftfy.fix_text
|
||||
except ImportError:
|
||||
logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
|
||||
|
Loading…
Reference in New Issue
Block a user