diff --git a/transformers/__init__.py b/transformers/__init__.py
index 9a767913b3b..f3f81f1dbec 100644
--- a/transformers/__init__.py
+++ b/transformers/__init__.py
@@ -27,7 +27,7 @@ from .data import (is_sklearn_available,
                    glue_output_modes, glue_convert_examples_to_features,
                    glue_processors, glue_tasks_num_labels,
                    squad_convert_examples_to_features, SquadFeatures, 
-                   SquadExample, read_squad_examples)
+                   SquadExample)
 
 if is_sklearn_available():
     from .data import glue_compute_metrics
diff --git a/transformers/data/__init__.py b/transformers/data/__init__.py
index 50f2e768f4a..b351bf625eb 100644
--- a/transformers/data/__init__.py
+++ b/transformers/data/__init__.py
@@ -1,6 +1,6 @@
 from .processors import InputExample, InputFeatures, DataProcessor, SquadFeatures
 from .processors import glue_output_modes, glue_processors, glue_tasks_num_labels, glue_convert_examples_to_features
-from .processors import squad_convert_examples_to_features, SquadExample, read_squad_examples
+from .processors import squad_convert_examples_to_features, SquadExample
 
 from .metrics import is_sklearn_available
 if is_sklearn_available():
diff --git a/transformers/data/processors/__init__.py b/transformers/data/processors/__init__.py
index 924b4a12453..1e527766295 100644
--- a/transformers/data/processors/__init__.py
+++ b/transformers/data/processors/__init__.py
@@ -1,4 +1,4 @@
 from .utils import InputExample, InputFeatures, DataProcessor
 from .glue import glue_output_modes, glue_processors, glue_tasks_num_labels, glue_convert_examples_to_features
-from .squad import squad_convert_examples_to_features, SquadFeatures, SquadExample, read_squad_examples
+from .squad import squad_convert_examples_to_features, SquadFeatures, SquadExample
 
diff --git a/transformers/data/processors/squad.py b/transformers/data/processors/squad.py
index 3d8f48c1bb5..39ee00ae566 100644
--- a/transformers/data/processors/squad.py
+++ b/transformers/data/processors/squad.py
@@ -46,7 +46,6 @@ def _check_is_max_context(doc_spans, cur_span_index, position):
 
     return cur_span_index == best_span_index
 
-
 def _new_check_is_max_context(doc_spans, cur_span_index, position):
     """Check if this is the 'max context' doc span for the token."""
     # if len(doc_spans) == 1:
@@ -92,7 +91,7 @@ def squad_convert_examples_to_features(examples, tokenizer, max_seq_length,
     features = []
     new_features = []
     for (example_index, example) in enumerate(tqdm(examples)):
-        if is_training:
+        if is_training and not example.is_impossible:
             # Get start and end position
             answer_length = len(example.answer_text)
             start_position = example.start_position
@@ -105,6 +104,7 @@ def squad_convert_examples_to_features(examples, tokenizer, max_seq_length,
                 logger.warning("Could not find answer: '%s' vs. '%s'", actual_text, cleaned_answer_text)
                 continue
 
+
         tok_to_orig_index = []
         orig_to_tok_index = []
         all_doc_tokens = []
@@ -115,6 +115,18 @@ def squad_convert_examples_to_features(examples, tokenizer, max_seq_length,
                 tok_to_orig_index.append(i)
                 all_doc_tokens.append(sub_token)
 
+
+        if is_training and not example.is_impossible:
+            tok_start_position = orig_to_tok_index[example.start_position]
+            if example.end_position < len(example.doc_tokens) - 1:
+                tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
+            else:
+                tok_end_position = len(all_doc_tokens) - 1
+
+            (tok_start_position, tok_end_position) = _improve_answer_span(
+                all_doc_tokens, tok_start_position, tok_end_position, tokenizer, example.answer_text
+            )
+
         spans = []
         
         truncated_query = tokenizer.encode(example.question_text, add_special_tokens=False, max_length=max_query_length)
@@ -187,6 +199,34 @@ def squad_convert_examples_to_features(examples, tokenizer, max_seq_length,
             # Set the CLS index to '0'
             p_mask[cls_index] = 0
 
+
+            span_is_impossible = example.is_impossible
+            start_position = 0
+            end_position = 0
+            if is_training and not span_is_impossible:
+                # For training, if our document chunk does not contain an annotation
+                # we throw it out, since there is nothing to predict.
+                doc_start = span["start"]
+                doc_end = span["start"] + span["length"] - 1
+                out_of_span = False
+
+                if not (tok_start_position >= doc_start and tok_end_position <= doc_end):
+                    out_of_span = True
+
+                if out_of_span:
+                    start_position = cls_index
+                    end_position = cls_index
+                    span_is_impossible = True
+                else:
+                    if sequence_a_is_doc:
+                        doc_offset = 0
+                    else:
+                        doc_offset = len(truncated_query) + sequence_added_tokens
+                        
+                    start_position = tok_start_position - doc_start + doc_offset
+                    end_position = tok_end_position - doc_start + doc_offset
+
+
             new_features.append(NewSquadFeatures(
                 span['input_ids'],
                 span['attention_mask'],
@@ -199,7 +239,10 @@ def squad_convert_examples_to_features(examples, tokenizer, max_seq_length,
                 paragraph_len=span['paragraph_len'],
                 token_is_max_context=span["token_is_max_context"],
                 tokens=span["tokens"],
-                token_to_orig_map=span["token_to_orig_map"]
+                token_to_orig_map=span["token_to_orig_map"],
+                
+                start_position=start_position,
+                end_position=end_position
             ))
 
             unique_id += 1
@@ -207,86 +250,10 @@ def squad_convert_examples_to_features(examples, tokenizer, max_seq_length,
     return new_features
 
 
-def read_squad_examples(input_file, is_training, version_2_with_negative):
-    """Read a SQuAD json file into a list of SquadExample."""
-    with open(input_file, "r", encoding='utf-8') as reader:
-        input_data = json.load(reader)["data"]
-
-    def is_whitespace(c):
-        if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
-            return True
-        return False
-
-    examples = []
-    for entry in input_data:
-        for paragraph in entry["paragraphs"]:
-            paragraph_text = paragraph["context"]
-            doc_tokens = []
-            char_to_word_offset = []
-            prev_is_whitespace = True
-            for c in paragraph_text:
-                if is_whitespace(c):
-                    prev_is_whitespace = True
-                else:
-                    if prev_is_whitespace:
-                        doc_tokens.append(c)
-                    else:
-                        doc_tokens[-1] += c
-                    prev_is_whitespace = False
-                char_to_word_offset.append(len(doc_tokens) - 1)
-
-            for qa in paragraph["qas"]:
-                qas_id = qa["id"]
-                question_text = qa["question"]
-                start_position = None
-                end_position = None
-                orig_answer_text = None
-                is_impossible = False
-                if is_training:
-                    if version_2_with_negative:
-                        is_impossible = qa["is_impossible"]
-                    if (len(qa["answers"]) != 1) and (not is_impossible):
-                        raise ValueError(
-                            "For training, each question should have exactly 1 answer.")
-                    if not is_impossible:
-                        answer = qa["answers"][0]
-                        orig_answer_text = answer["text"]
-                        answer_offset = answer["answer_start"]
-                        answer_length = len(orig_answer_text)
-                        start_position = char_to_word_offset[answer_offset]
-                        end_position = char_to_word_offset[answer_offset + answer_length - 1]
-                        # Only add answers where the text can be exactly recovered from the
-                        # document. If this CAN'T happen it's likely due to weird Unicode
-                        # stuff so we will just skip the example.
-                        #
-                        # Note that this means for training mode, every example is NOT
-                        # guaranteed to be preserved.
-                        actual_text = " ".join(doc_tokens[start_position:(end_position + 1)])
-                        cleaned_answer_text = " ".join(
-                            whitespace_tokenize(orig_answer_text))
-                        if actual_text.find(cleaned_answer_text) == -1:
-                            logger.warning("Could not find answer: '%s' vs. '%s'",
-                                           actual_text, cleaned_answer_text)
-                            continue
-                    else:
-                        start_position = -1
-                        end_position = -1
-                        orig_answer_text = ""
-
-                example = SquadExample(
-                    qas_id=qas_id,
-                    question_text=question_text,
-                    doc_tokens=doc_tokens,
-                    orig_answer_text=orig_answer_text,
-                    start_position=start_position,
-                    end_position=end_position,
-                    is_impossible=is_impossible)
-                examples.append(example)
-    return examples
-
-
-class SquadV1Processor(DataProcessor):
+class SquadProcessor(DataProcessor):
     """Processor for the SQuAD data set."""
+    train_file = None
+    dev_file = None
 
     def get_example_from_tensor_dict(self, tensor_dict):
         """See base class."""
@@ -301,13 +268,19 @@ class SquadV1Processor(DataProcessor):
 
     def get_train_examples(self, data_dir, only_first=None):
         """See base class."""
-        with open(os.path.join(data_dir, "train-v1.1.json"), "r", encoding='utf-8') as reader:
+        if self.train_file is None:
+            raise ValueError("SquadProcessor should be instantiated via SquadV1Processor or SquadV2Processor")
+
+        with open(os.path.join(data_dir, self.train_file), "r", encoding='utf-8') as reader:
             input_data = json.load(reader)["data"]
         return self._create_examples(input_data, "train", only_first)
 
     def get_dev_examples(self, data_dir, only_first=None):
         """See base class."""
-        with open(os.path.join(data_dir, "dev-v1.1.json"), "r", encoding='utf-8') as reader:
+        if self.dev_file is None:
+            raise ValueError("SquadProcessor should be instantiated via SquadV1Processor or SquadV2Processor")
+        
+        with open(os.path.join(data_dir, self.dev_file), "r", encoding='utf-8') as reader:
             input_data = json.load(reader)["data"]
         return self._create_examples(input_data, "dev", only_first)
 
@@ -329,7 +302,13 @@ class SquadV1Processor(DataProcessor):
                     question_text = qa["question"]
                     start_position_character = None
                     answer_text = None
-                    if is_training:
+                    
+                    if "is_impossible" in qa:
+                        is_impossible = qa["is_impossible"]
+                    else:
+                        is_impossible = False
+
+                    if not is_impossible and is_training:
                         if (len(qa["answers"]) != 1):
                             raise ValueError(
                                 "For training, each question should have exactly 1 answer.")
@@ -343,15 +322,25 @@ class SquadV1Processor(DataProcessor):
                         context_text=context_text,
                         answer_text=answer_text,
                         start_position_character=start_position_character,
-                        title=title
+                        title=title,
+                        is_impossible=is_impossible
                     )
+
                     examples.append(example)
 
                     if only_first is not None and len(examples) > only_first:
                         return examples
         return examples
-        
 
+class SquadV1Processor(SquadProcessor):
+    train_file = "train-v1.1.json"
+    dev_file = "dev-v1.1.json"
+
+
+class SquadV2Processor(SquadProcessor):
+    train_file = "train-v2.0.json"
+    dev_file = "dev-v2.0.json"
+    
 
 class NewSquadExample(object):
     """
@@ -364,13 +353,16 @@ class NewSquadExample(object):
                  context_text,
                  answer_text,
                  start_position_character,
-                 title):
+                 title,
+                 is_impossible=False):
         self.qas_id = qas_id
         self.question_text = question_text
         self.context_text = context_text
         self.answer_text = answer_text
         self.title = title
-        self.is_impossible = False
+        self.is_impossible = is_impossible 
+
+        self.start_position, self.end_position = 0, 0
 
         doc_tokens = []
         char_to_word_offset = []
@@ -392,7 +384,7 @@ class NewSquadExample(object):
         self.char_to_word_offset = char_to_word_offset
 
         # Start end end positions only has a value during evaluation.
-        if start_position_character is not None:
+        if start_position_character is not None and not is_impossible:
             self.start_position = char_to_word_offset[start_position_character]
             self.end_position = char_to_word_offset[start_position_character + len(answer_text) - 1]
 
@@ -415,7 +407,10 @@ class NewSquadFeatures(object):
                  paragraph_len,
                  token_is_max_context,
                  tokens,
-                 token_to_orig_map
+                 token_to_orig_map,
+
+                 start_position,
+                 end_position
         ):
         self.input_ids = input_ids 
         self.attention_mask = attention_mask
@@ -430,6 +425,9 @@ class NewSquadFeatures(object):
         self.tokens = tokens
         self.token_to_orig_map = token_to_orig_map
 
+        self.start_position = start_position
+        self.end_position = end_position
+
 class SquadExample(object):
     """
     A single training/test example for the Squad dataset.