mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 02:31:11 +06:00
Fix deprecation warnings for int div (#15180)
* Fix deprecation warnings for int div Co-authored-by: mgoldey <matthew.goldey@gmail.com> * Fix import * ensure that tensor output is python scalar * make backward compatible * make code more readable * adapt test functions Co-authored-by: mgoldey <matthew.goldey@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
parent
f6d3fee855
commit
531336bbfd
@ -302,6 +302,8 @@ class DataCollatorForWav2Vec2Pretraining:
|
|||||||
batch_size = batch["input_values"].shape[0]
|
batch_size = batch["input_values"].shape[0]
|
||||||
|
|
||||||
mask_indices_seq_length = self.model._get_feat_extract_output_lengths(batch["input_values"].shape[-1])
|
mask_indices_seq_length = self.model._get_feat_extract_output_lengths(batch["input_values"].shape[-1])
|
||||||
|
# make sure masked sequence length is a Python scalar
|
||||||
|
mask_indices_seq_length = int(mask_indices_seq_length)
|
||||||
|
|
||||||
# make sure that no loss is computed on padded inputs
|
# make sure that no loss is computed on padded inputs
|
||||||
if batch.get("attention_mask") is not None:
|
if batch.get("attention_mask") is not None:
|
||||||
|
@ -23,6 +23,7 @@ from functools import partial
|
|||||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
from torch import Tensor, device, nn
|
from torch import Tensor, device, nn
|
||||||
from torch.nn import CrossEntropyLoss
|
from torch.nn import CrossEntropyLoss
|
||||||
|
|
||||||
@ -2362,3 +2363,13 @@ def apply_chunking_to_forward(
|
|||||||
return torch.cat(output_chunks, dim=chunk_dim)
|
return torch.cat(output_chunks, dim=chunk_dim)
|
||||||
|
|
||||||
return forward_fn(*input_tensors)
|
return forward_fn(*input_tensors)
|
||||||
|
|
||||||
|
|
||||||
|
def torch_int_div(tensor1, tensor2):
|
||||||
|
"""
|
||||||
|
A function that performs integer division across different versions of PyTorch.
|
||||||
|
"""
|
||||||
|
if version.parse(torch.__version__) < version.parse("1.8.0"):
|
||||||
|
return tensor1 // tensor2
|
||||||
|
else:
|
||||||
|
return torch.div(tensor1, tensor2, rounding_mode="floor")
|
||||||
|
@ -33,7 +33,7 @@ from ...file_utils import (
|
|||||||
replace_return_docstrings,
|
replace_return_docstrings,
|
||||||
)
|
)
|
||||||
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
||||||
from ...modeling_utils import PreTrainedModel
|
from ...modeling_utils import PreTrainedModel, torch_int_div
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from .configuration_hubert import HubertConfig
|
from .configuration_hubert import HubertConfig
|
||||||
|
|
||||||
@ -829,7 +829,7 @@ class HubertPreTrainedModel(PreTrainedModel):
|
|||||||
def _conv_out_length(input_length, kernel_size, stride):
|
def _conv_out_length(input_length, kernel_size, stride):
|
||||||
# 1D convolutional layer output length formula taken
|
# 1D convolutional layer output length formula taken
|
||||||
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
||||||
return (input_length - kernel_size) // stride + 1
|
return torch_int_div(input_length - kernel_size, stride) + 1
|
||||||
|
|
||||||
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
||||||
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
||||||
|
@ -29,7 +29,7 @@ from transformers.deepspeed import is_deepspeed_zero3_enabled
|
|||||||
from ...activations import ACT2FN
|
from ...activations import ACT2FN
|
||||||
from ...file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
from ...file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||||
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
||||||
from ...modeling_utils import PreTrainedModel
|
from ...modeling_utils import PreTrainedModel, torch_int_div
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from .configuration_sew import SEWConfig
|
from .configuration_sew import SEWConfig
|
||||||
|
|
||||||
@ -735,7 +735,7 @@ class SEWPreTrainedModel(PreTrainedModel):
|
|||||||
def _conv_out_length(input_length, kernel_size, stride):
|
def _conv_out_length(input_length, kernel_size, stride):
|
||||||
# 1D convolutional layer output length formula taken
|
# 1D convolutional layer output length formula taken
|
||||||
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
||||||
return (input_length - kernel_size) // stride + 1
|
return torch_int_div(input_length - kernel_size, stride) + 1
|
||||||
|
|
||||||
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
||||||
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
||||||
|
@ -30,7 +30,7 @@ from transformers.deepspeed import is_deepspeed_zero3_enabled
|
|||||||
from ...activations import ACT2FN
|
from ...activations import ACT2FN
|
||||||
from ...file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
from ...file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||||
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
||||||
from ...modeling_utils import PreTrainedModel
|
from ...modeling_utils import PreTrainedModel, torch_int_div
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from .configuration_sew_d import SEWDConfig
|
from .configuration_sew_d import SEWDConfig
|
||||||
|
|
||||||
@ -1266,7 +1266,7 @@ class SEWDPreTrainedModel(PreTrainedModel):
|
|||||||
def _conv_out_length(input_length, kernel_size, stride):
|
def _conv_out_length(input_length, kernel_size, stride):
|
||||||
# 1D convolutional layer output length formula taken
|
# 1D convolutional layer output length formula taken
|
||||||
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
||||||
return (input_length - kernel_size) // stride + 1
|
return torch_int_div(input_length - kernel_size, stride) + 1
|
||||||
|
|
||||||
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
||||||
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
||||||
|
@ -35,7 +35,7 @@ from ...file_utils import (
|
|||||||
replace_return_docstrings,
|
replace_return_docstrings,
|
||||||
)
|
)
|
||||||
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
||||||
from ...modeling_utils import PreTrainedModel
|
from ...modeling_utils import PreTrainedModel, torch_int_div
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from .configuration_unispeech import UniSpeechConfig
|
from .configuration_unispeech import UniSpeechConfig
|
||||||
|
|
||||||
@ -969,7 +969,7 @@ class UniSpeechPreTrainedModel(PreTrainedModel):
|
|||||||
def _conv_out_length(input_length, kernel_size, stride):
|
def _conv_out_length(input_length, kernel_size, stride):
|
||||||
# 1D convolutional layer output length formula taken
|
# 1D convolutional layer output length formula taken
|
||||||
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
||||||
return (input_length - kernel_size) // stride + 1
|
return torch_int_div(input_length - kernel_size, stride) + 1
|
||||||
|
|
||||||
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
||||||
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
||||||
|
@ -35,7 +35,7 @@ from ...file_utils import (
|
|||||||
replace_return_docstrings,
|
replace_return_docstrings,
|
||||||
)
|
)
|
||||||
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput, TokenClassifierOutput
|
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput, TokenClassifierOutput
|
||||||
from ...modeling_utils import PreTrainedModel
|
from ...modeling_utils import PreTrainedModel, torch_int_div
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from .configuration_unispeech_sat import UniSpeechSatConfig
|
from .configuration_unispeech_sat import UniSpeechSatConfig
|
||||||
|
|
||||||
@ -1003,7 +1003,7 @@ class UniSpeechSatPreTrainedModel(PreTrainedModel):
|
|||||||
def _conv_out_length(input_length, kernel_size, stride):
|
def _conv_out_length(input_length, kernel_size, stride):
|
||||||
# 1D convolutional layer output length formula taken
|
# 1D convolutional layer output length formula taken
|
||||||
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
||||||
return (input_length - kernel_size) // stride + 1
|
return torch_int_div(input_length - kernel_size, stride) + 1
|
||||||
|
|
||||||
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
||||||
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
||||||
|
@ -41,7 +41,7 @@ from ...modeling_outputs import (
|
|||||||
SequenceClassifierOutput,
|
SequenceClassifierOutput,
|
||||||
TokenClassifierOutput,
|
TokenClassifierOutput,
|
||||||
)
|
)
|
||||||
from ...modeling_utils import PreTrainedModel
|
from ...modeling_utils import PreTrainedModel, torch_int_div
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from .configuration_wav2vec2 import Wav2Vec2Config
|
from .configuration_wav2vec2 import Wav2Vec2Config
|
||||||
|
|
||||||
@ -1104,7 +1104,7 @@ class Wav2Vec2PreTrainedModel(PreTrainedModel):
|
|||||||
def _conv_out_length(input_length, kernel_size, stride):
|
def _conv_out_length(input_length, kernel_size, stride):
|
||||||
# 1D convolutional layer output length formula taken
|
# 1D convolutional layer output length formula taken
|
||||||
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
||||||
return (input_length - kernel_size) // stride + 1
|
return torch_int_div(input_length - kernel_size, stride) + 1
|
||||||
|
|
||||||
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
||||||
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
||||||
|
@ -35,7 +35,7 @@ from ...file_utils import (
|
|||||||
add_start_docstrings_to_model_forward,
|
add_start_docstrings_to_model_forward,
|
||||||
)
|
)
|
||||||
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput, TokenClassifierOutput
|
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput, TokenClassifierOutput
|
||||||
from ...modeling_utils import PreTrainedModel
|
from ...modeling_utils import PreTrainedModel, torch_int_div
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from .configuration_wavlm import WavLMConfig
|
from .configuration_wavlm import WavLMConfig
|
||||||
|
|
||||||
@ -1057,7 +1057,7 @@ class WavLMPreTrainedModel(PreTrainedModel):
|
|||||||
def _conv_out_length(input_length, kernel_size, stride):
|
def _conv_out_length(input_length, kernel_size, stride):
|
||||||
# 1D convolutional layer output length formula taken
|
# 1D convolutional layer output length formula taken
|
||||||
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
|
||||||
return (input_length - kernel_size) // stride + 1
|
return torch_int_div(input_length - kernel_size, stride) + 1
|
||||||
|
|
||||||
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
|
||||||
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
|
||||||
|
@ -794,10 +794,10 @@ class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
model = Wav2Vec2ForPreTraining(config).to(torch_device)
|
model = Wav2Vec2ForPreTraining(config).to(torch_device)
|
||||||
|
|
||||||
features_shape = (
|
batch_size = inputs_dict["input_values"].shape[0]
|
||||||
inputs_dict["input_values"].shape[0],
|
feature_seq_length = int(model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]))
|
||||||
model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]),
|
|
||||||
)
|
features_shape = (batch_size, feature_seq_length)
|
||||||
|
|
||||||
mask_time_indices = _compute_mask_indices(
|
mask_time_indices = _compute_mask_indices(
|
||||||
features_shape,
|
features_shape,
|
||||||
@ -1158,10 +1158,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
|
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
|
||||||
|
|
||||||
features_shape = (
|
batch_size = inputs_dict["input_values"].shape[0]
|
||||||
inputs_dict["input_values"].shape[0],
|
feature_seq_length = int(model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]))
|
||||||
model._get_feat_extract_output_lengths(torch.tensor(inputs_dict["input_values"].shape[1])),
|
|
||||||
)
|
features_shape = (batch_size, feature_seq_length)
|
||||||
|
|
||||||
np.random.seed(4)
|
np.random.seed(4)
|
||||||
mask_time_indices = _compute_mask_indices(
|
mask_time_indices = _compute_mask_indices(
|
||||||
@ -1208,10 +1208,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
|
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
|
||||||
|
|
||||||
features_shape = (
|
batch_size = inputs_dict["input_values"].shape[0]
|
||||||
inputs_dict["input_values"].shape[0],
|
feature_seq_length = int(model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]))
|
||||||
model._get_feat_extract_output_lengths(torch.tensor(inputs_dict["input_values"].shape[1])),
|
|
||||||
)
|
features_shape = (batch_size, feature_seq_length)
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
mask_time_indices = _compute_mask_indices(
|
mask_time_indices = _compute_mask_indices(
|
||||||
@ -1279,10 +1279,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
|
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
|
||||||
|
|
||||||
features_shape = (
|
batch_size = inputs_dict["input_values"].shape[0]
|
||||||
inputs_dict["input_values"].shape[0],
|
feature_seq_length = int(model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]))
|
||||||
model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]),
|
|
||||||
)
|
features_shape = (batch_size, feature_seq_length)
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
np.random.seed(0)
|
np.random.seed(0)
|
||||||
|
Loading…
Reference in New Issue
Block a user