diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3a4bae29845..89819c10095 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -10,7 +10,7 @@ jobs:
             - run: sudo pip install pytest codecov pytest-cov
             - run: sudo pip install spacy ftfy==4.4.3
             - run: sudo python -m spacy download en
-            - run: python -m pytest -sv tests/ --cov
+            - run: python -m pytest -sv ./pytorch_pretrained_bert/tests/ --cov
             - run: codecov
     build_py2:
         working_directory: ~/pytorch-pretrained-BERT
@@ -22,7 +22,7 @@ jobs:
             - run: sudo pip install pytest codecov pytest-cov
             - run: sudo pip install spacy ftfy==4.4.3
             - run: sudo python -m spacy download en
-            - run: python -m pytest -sv tests/ --cov
+            - run: python -m pytest -sv ./pytorch_pretrained_bert/tests/ --cov
             - run: codecov
 workflows:
   version: 2
diff --git a/pytorch_pretrained_bert/modeling_gpt2.py b/pytorch_pretrained_bert/modeling_gpt2.py
index 688512ae805..85ec85c16c2 100644
--- a/pytorch_pretrained_bert/modeling_gpt2.py
+++ b/pytorch_pretrained_bert/modeling_gpt2.py
@@ -175,6 +175,19 @@ class GPT2Config(PretrainedConfig):
     def total_tokens_embeddings(self):
         return self.vocab_size + self.n_special
 
+    @property
+    def hidden_size(self):
+        return self.n_embd
+
+    @property
+    def num_attention_heads(self):
+        return self.n_head
+
+    @property
+    def num_hidden_layers(self):
+        return self.n_layer
+
+
 
 class Attention(nn.Module):
     def __init__(self, nx, n_ctx, config, scale=False):
diff --git a/pytorch_pretrained_bert/modeling_openai.py b/pytorch_pretrained_bert/modeling_openai.py
index 5ee4e9224af..f394723d102 100644
--- a/pytorch_pretrained_bert/modeling_openai.py
+++ b/pytorch_pretrained_bert/modeling_openai.py
@@ -206,6 +206,18 @@ class OpenAIGPTConfig(PretrainedConfig):
     def total_tokens_embeddings(self):
         return self.vocab_size + self.n_special
 
+    @property
+    def hidden_size(self):
+        return self.n_embd
+
+    @property
+    def num_attention_heads(self):
+        return self.n_head
+
+    @property
+    def num_hidden_layers(self):
+        return self.n_layer
+
 
 class Attention(nn.Module):
     def __init__(self, nx, n_ctx, config, scale=False):
diff --git a/pytorch_pretrained_bert/modeling_transfo_xl.py b/pytorch_pretrained_bert/modeling_transfo_xl.py
index 84df603a53a..1b129544a34 100644
--- a/pytorch_pretrained_bert/modeling_transfo_xl.py
+++ b/pytorch_pretrained_bert/modeling_transfo_xl.py
@@ -289,6 +289,17 @@ class TransfoXLConfig(PretrainedConfig):
             raise ValueError("First argument must be either a vocabulary size (int)"
                              "or the path to a pretrained model config file (str)")
 
+    @property
+    def hidden_size(self):
+        return self.d_model
+
+    @property
+    def num_attention_heads(self):
+        return self.n_head
+
+    @property
+    def num_hidden_layers(self):
+        return self.n_layer
 
 
 class PositionalEmbedding(nn.Module):
diff --git a/pytorch_pretrained_bert/modeling_xlnet.py b/pytorch_pretrained_bert/modeling_xlnet.py
index 754a03f37dc..33fd78c61e1 100644
--- a/pytorch_pretrained_bert/modeling_xlnet.py
+++ b/pytorch_pretrained_bert/modeling_xlnet.py
@@ -313,6 +313,18 @@ class XLNetConfig(PretrainedConfig):
             raise ValueError("First argument must be either a vocabulary size (int)"
                              "or the path to a pretrained model config file (str)")
 
+    @property
+    def hidden_size(self):
+        return self.d_model
+
+    @property
+    def num_attention_heads(self):
+        return self.n_head
+
+    @property
+    def num_hidden_layers(self):
+        return self.n_layer
+
 
 try:
     from apex.normalization.fused_layer_norm import FusedLayerNorm as XLNetLayerNorm
diff --git a/pytorch_pretrained_bert/tests/model_tests_commons.py b/pytorch_pretrained_bert/tests/model_tests_commons.py
index 759b31aa0a8..6ebc9dbaa65 100644
--- a/pytorch_pretrained_bert/tests/model_tests_commons.py
+++ b/pytorch_pretrained_bert/tests/model_tests_commons.py
@@ -184,6 +184,12 @@ class ConfigTester(object):
         self.config_class = config_class
         self.inputs_dict = kwargs
 
+    def create_and_test_config_common_properties(self):
+        config = self.config_class(**self.inputs_dict)
+        self.parent.assertTrue(hasattr(config, 'hidden_size'))
+        self.parent.assertTrue(hasattr(config, 'num_attention_heads'))
+        self.parent.assertTrue(hasattr(config, 'num_hidden_layers'))
+
     def create_and_test_config_to_json_string(self):
         config = self.config_class(**self.inputs_dict)
         obj = json.loads(config.to_json_string())
@@ -199,6 +205,7 @@ class ConfigTester(object):
         self.parent.assertEqual(config_second.to_dict(), config_first.to_dict())
 
     def run_common_tests(self):
+        self.create_and_test_config_common_properties()
         self.create_and_test_config_to_json_string()
         self.create_and_test_config_to_json_file()