mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 19:21:31 +06:00
Replace assertions with value errors on distilbert model (#20463)
* Changed assert into 7-8 exceptions * updated syntax error * updated error * updated file (Co-autho: Batese2001) * Successful test on test_modeling_distilbert.py Successful raising errors and exceptions on the revised code in test_modeling_distilbert.py . Co-credit: @batese2001 * Delete test_modeling_distilbert.ipynb * Update modeling_distilbert.py * Successful raising of exceptions with the conditions that are contrary to defined condition that asserts statements (Co-author: Batese2001) * Successful raising of exceptions with the conditions that are contrary to defined condition that asserts statements (Co-author: Batese2001) * committing the reformatted distilbert model * reformatted distilbert model * reformatted distilbert model * reformatted distilbert model * reformatted distilbert model with black * Changed comments that explain better about raising exceptions for not having the even number of multi heads * Changed comments that explain better about raising exceptions for not having the even number of multi heads * changed based on the feedback * Changed line 833 based on the suggestion made from @younesbelkada * Changed line 833 based on the suggestion made from @younesbelkada draft2 * reformatted file * Update src/transformers/models/distilbert/modeling_distilbert.py * Update src/transformers/models/distilbert/modeling_distilbert.py Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
This commit is contained in:
parent
134a8e21ae
commit
98122794d4
@ -12,6 +12,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
PyTorch DistilBERT model adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) and in
|
||||
part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert)
|
||||
@ -141,7 +142,10 @@ class MultiHeadSelfAttention(nn.Module):
|
||||
self.dim = config.dim
|
||||
self.dropout = nn.Dropout(p=config.attention_dropout)
|
||||
|
||||
assert self.dim % self.n_heads == 0
|
||||
# Have an even number of multi heads that divide the dimensions
|
||||
if self.dim % self.n_heads != 0:
|
||||
# Raise value errors for even multi-head attention nodes
|
||||
raise ValueError(f"self.n_heads: {self.n_heads} must divide self.dim: {self.dim} evenly")
|
||||
|
||||
self.q_lin = nn.Linear(in_features=config.dim, out_features=config.dim)
|
||||
self.k_lin = nn.Linear(in_features=config.dim, out_features=config.dim)
|
||||
@ -255,7 +259,9 @@ class TransformerBlock(nn.Module):
|
||||
def __init__(self, config: PretrainedConfig):
|
||||
super().__init__()
|
||||
|
||||
assert config.dim % config.n_heads == 0
|
||||
# Have an even number of Configure multi-heads
|
||||
if config.dim % config.n_heads != 0:
|
||||
raise ValueError(f"config.n_heads {config.n_heads} must divide config.dim {config.dim} evenly")
|
||||
|
||||
self.attention = MultiHeadSelfAttention(config)
|
||||
self.sa_layer_norm = nn.LayerNorm(normalized_shape=config.dim, eps=1e-12)
|
||||
@ -291,7 +297,9 @@ class TransformerBlock(nn.Module):
|
||||
if output_attentions:
|
||||
sa_output, sa_weights = sa_output # (bs, seq_length, dim), (bs, n_heads, seq_length, seq_length)
|
||||
else: # To handle these `output_attentions` or `output_hidden_states` cases returning tuples
|
||||
assert type(sa_output) == tuple
|
||||
if type(sa_output) != tuple:
|
||||
raise TypeError(f"sa_output must be a tuple but it is {type(sa_output)} type")
|
||||
|
||||
sa_output = sa_output[0]
|
||||
sa_output = self.sa_layer_norm(sa_output + x) # (bs, seq_length, dim)
|
||||
|
||||
@ -320,6 +328,7 @@ class Transformer(nn.Module):
|
||||
output_hidden_states: bool = False,
|
||||
return_dict: Optional[bool] = None,
|
||||
) -> Union[BaseModelOutput, Tuple[torch.Tensor, ...]]: # docstyle-ignore
|
||||
|
||||
"""
|
||||
Parameters:
|
||||
x: torch.tensor(bs, seq_length, dim) Input sequence embedded.
|
||||
@ -348,11 +357,14 @@ class Transformer(nn.Module):
|
||||
hidden_state = layer_outputs[-1]
|
||||
|
||||
if output_attentions:
|
||||
assert len(layer_outputs) == 2
|
||||
if len(layer_outputs) != 2:
|
||||
raise ValueError(f"The length of the layer_outputs should be 2, but it is {len(layer_outputs)}")
|
||||
|
||||
attentions = layer_outputs[0]
|
||||
all_attentions = all_attentions + (attentions,)
|
||||
else:
|
||||
assert len(layer_outputs) == 1
|
||||
if len(layer_outputs) != 1:
|
||||
raise ValueError(f"The length of the layer_outputs should be 1, but it is {len(layer_outputs)}")
|
||||
|
||||
# Add last layer
|
||||
if output_hidden_states:
|
||||
@ -810,7 +822,9 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
|
||||
|
||||
self.distilbert = DistilBertModel(config)
|
||||
self.qa_outputs = nn.Linear(config.dim, config.num_labels)
|
||||
assert config.num_labels == 2
|
||||
if config.num_labels != 2:
|
||||
raise ValueError(f"config.num_labels should be 2, but it is {config.num_labels}")
|
||||
|
||||
self.dropout = nn.Dropout(config.qa_dropout)
|
||||
|
||||
# Initialize weights and apply final processing
|
||||
|
Loading…
Reference in New Issue
Block a user