Replace assertions with value errors on distilbert model (#20463)

* Changed assert into 7-8 exceptions

* updated syntax error

* updated error

* updated file (Co-autho: Batese2001)

* Successful test on test_modeling_distilbert.py 

Successful raising errors and exceptions on the revised code in test_modeling_distilbert.py .

Co-credit: @batese2001

* Delete test_modeling_distilbert.ipynb

* Update modeling_distilbert.py

* Successful raising of exceptions with the conditions that are contrary to defined condition that asserts statements (Co-author: Batese2001)

* Successful raising of exceptions with the conditions that are contrary to defined condition that asserts statements (Co-author: Batese2001)

* committing the reformatted distilbert model

* reformatted distilbert model

* reformatted distilbert model

* reformatted distilbert model

* reformatted distilbert model with black

* Changed comments that explain better about raising exceptions for not having the even number of multi heads

* Changed comments that explain better about raising exceptions for not having the even number of multi heads

* changed based on the feedback

* Changed line 833 based on the suggestion made from @younesbelkada

* Changed line 833 based on the suggestion made from @younesbelkada draft2

* reformatted file

* Update src/transformers/models/distilbert/modeling_distilbert.py

* Update src/transformers/models/distilbert/modeling_distilbert.py

Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
This commit is contained in:
JuheonChu 2022-11-28 09:44:03 -05:00 committed by GitHub
parent 134a8e21ae
commit 98122794d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
PyTorch DistilBERT model adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) and in
part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert)
@ -141,7 +142,10 @@ class MultiHeadSelfAttention(nn.Module):
self.dim = config.dim
self.dropout = nn.Dropout(p=config.attention_dropout)
assert self.dim % self.n_heads == 0
# Have an even number of multi heads that divide the dimensions
if self.dim % self.n_heads != 0:
# Raise value errors for even multi-head attention nodes
raise ValueError(f"self.n_heads: {self.n_heads} must divide self.dim: {self.dim} evenly")
self.q_lin = nn.Linear(in_features=config.dim, out_features=config.dim)
self.k_lin = nn.Linear(in_features=config.dim, out_features=config.dim)
@ -255,7 +259,9 @@ class TransformerBlock(nn.Module):
def __init__(self, config: PretrainedConfig):
super().__init__()
assert config.dim % config.n_heads == 0
# Have an even number of Configure multi-heads
if config.dim % config.n_heads != 0:
raise ValueError(f"config.n_heads {config.n_heads} must divide config.dim {config.dim} evenly")
self.attention = MultiHeadSelfAttention(config)
self.sa_layer_norm = nn.LayerNorm(normalized_shape=config.dim, eps=1e-12)
@ -291,7 +297,9 @@ class TransformerBlock(nn.Module):
if output_attentions:
sa_output, sa_weights = sa_output # (bs, seq_length, dim), (bs, n_heads, seq_length, seq_length)
else: # To handle these `output_attentions` or `output_hidden_states` cases returning tuples
assert type(sa_output) == tuple
if type(sa_output) != tuple:
raise TypeError(f"sa_output must be a tuple but it is {type(sa_output)} type")
sa_output = sa_output[0]
sa_output = self.sa_layer_norm(sa_output + x) # (bs, seq_length, dim)
@ -320,6 +328,7 @@ class Transformer(nn.Module):
output_hidden_states: bool = False,
return_dict: Optional[bool] = None,
) -> Union[BaseModelOutput, Tuple[torch.Tensor, ...]]: # docstyle-ignore
"""
Parameters:
x: torch.tensor(bs, seq_length, dim) Input sequence embedded.
@ -348,11 +357,14 @@ class Transformer(nn.Module):
hidden_state = layer_outputs[-1]
if output_attentions:
assert len(layer_outputs) == 2
if len(layer_outputs) != 2:
raise ValueError(f"The length of the layer_outputs should be 2, but it is {len(layer_outputs)}")
attentions = layer_outputs[0]
all_attentions = all_attentions + (attentions,)
else:
assert len(layer_outputs) == 1
if len(layer_outputs) != 1:
raise ValueError(f"The length of the layer_outputs should be 1, but it is {len(layer_outputs)}")
# Add last layer
if output_hidden_states:
@ -810,7 +822,9 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
self.distilbert = DistilBertModel(config)
self.qa_outputs = nn.Linear(config.dim, config.num_labels)
assert config.num_labels == 2
if config.num_labels != 2:
raise ValueError(f"config.num_labels should be 2, but it is {config.num_labels}")
self.dropout = nn.Dropout(config.qa_dropout)
# Initialize weights and apply final processing