From bfd75056b0a080addafb7f3d7c9336d27b883a0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guillem=20Garc=C3=ADa=20Subies?=
 <37592763+GuillemGSubies@users.noreply.github.com>
Date: Tue, 20 Aug 2019 14:06:17 +0200
Subject: [PATCH 1/5] Update tokenization_xlm.py

---
 pytorch_transformers/tokenization_xlm.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pytorch_transformers/tokenization_xlm.py b/pytorch_transformers/tokenization_xlm.py
index b690a3a9458..8e7c2954f2c 100644
--- a/pytorch_transformers/tokenization_xlm.py
+++ b/pytorch_transformers/tokenization_xlm.py
@@ -124,8 +124,9 @@ class XLMTokenizer(PreTrainedTokenizer):
                                            **kwargs)
         try:
             import ftfy
-            import spacy
-            self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat'])
+            from spacy.lang.en import English
+            _nlp = English()
+            self.nlp = nlp.Defaults.create_tokenizer(_nlp)
             self.fix_text = ftfy.fix_text
         except ImportError:
             logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")

From bb04446285be43059050406b3bc4938807c63c25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guillem=20Garc=C3=ADa=20Subies?=
 <37592763+GuillemGSubies@users.noreply.github.com>
Date: Tue, 20 Aug 2019 14:07:40 +0200
Subject: [PATCH 2/5] Update tokenization_openai.py

---
 pytorch_transformers/tokenization_openai.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytorch_transformers/tokenization_openai.py b/pytorch_transformers/tokenization_openai.py
index 0eb5281d399..0f6a8f1daec 100644
--- a/pytorch_transformers/tokenization_openai.py
+++ b/pytorch_transformers/tokenization_openai.py
@@ -89,9 +89,9 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
 
         try:
             import ftfy
-            import spacy
-            self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat'])
-            self.fix_text = ftfy.fix_text
+            from spacy.lang.en import English
+            _nlp = English()
+            self.nlp = nlp.Defaults.create_tokenizer(_nlp)
         except ImportError:
             logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
             self.nlp = BasicTokenizer(do_lower_case=True)

From 562b998366c7a4a2bd0addf1a860fbee0aa04d74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guillem=20Garc=C3=ADa=20Subies?=
 <37592763+GuillemGSubies@users.noreply.github.com>
Date: Tue, 20 Aug 2019 14:10:19 +0200
Subject: [PATCH 3/5] Update tokenization_openai.py

---
 pytorch_transformers/tokenization_openai.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_transformers/tokenization_openai.py b/pytorch_transformers/tokenization_openai.py
index 0f6a8f1daec..79eb023a8d1 100644
--- a/pytorch_transformers/tokenization_openai.py
+++ b/pytorch_transformers/tokenization_openai.py
@@ -92,6 +92,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
             from spacy.lang.en import English
             _nlp = English()
             self.nlp = nlp.Defaults.create_tokenizer(_nlp)
+            self.fix_text = ftfy.fix_text
         except ImportError:
             logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
             self.nlp = BasicTokenizer(do_lower_case=True)

From f5e2ed0fd89d5730126d71c03324fa07ae674ca7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guillem=20Garc=C3=ADa=20Subies?=
 <37592763+GuillemGSubies@users.noreply.github.com>
Date: Tue, 20 Aug 2019 14:19:25 +0200
Subject: [PATCH 4/5] Update tokenization_openai.py

---
 pytorch_transformers/tokenization_openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_transformers/tokenization_openai.py b/pytorch_transformers/tokenization_openai.py
index 79eb023a8d1..51b418ebd36 100644
--- a/pytorch_transformers/tokenization_openai.py
+++ b/pytorch_transformers/tokenization_openai.py
@@ -91,7 +91,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
             import ftfy
             from spacy.lang.en import English
             _nlp = English()
-            self.nlp = nlp.Defaults.create_tokenizer(_nlp)
+            self.nlp = _nlp.Defaults.create_tokenizer(_nlp)
             self.fix_text = ftfy.fix_text
         except ImportError:
             logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")

From 388e3251fa95b892949968dc89065e464a93b69f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guillem=20Garc=C3=ADa=20Subies?=
 <37592763+GuillemGSubies@users.noreply.github.com>
Date: Tue, 20 Aug 2019 14:19:39 +0200
Subject: [PATCH 5/5] Update tokenization_xlm.py

---
 pytorch_transformers/tokenization_xlm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_transformers/tokenization_xlm.py b/pytorch_transformers/tokenization_xlm.py
index 8e7c2954f2c..2d2f3a8cd4d 100644
--- a/pytorch_transformers/tokenization_xlm.py
+++ b/pytorch_transformers/tokenization_xlm.py
@@ -126,7 +126,7 @@ class XLMTokenizer(PreTrainedTokenizer):
             import ftfy
             from spacy.lang.en import English
             _nlp = English()
-            self.nlp = nlp.Defaults.create_tokenizer(_nlp)
+            self.nlp = _nlp.Defaults.create_tokenizer(_nlp)
             self.fix_text = ftfy.fix_text
         except ImportError:
             logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")