Remove old code for PyTorch, Accelerator and tokenizers (#37234)

* Remove unneeded library version checks

Signed-off-by: cyy <cyyever@outlook.com>

* Remove PyTorch condition

Signed-off-by: cyy <cyyever@outlook.com>

* Remove PyTorch condition

Signed-off-by: cyy <cyyever@outlook.com>

* Fix ROCm get_device_capability

Signed-off-by: cyy <cyyever@outlook.com>

* Revert "Fix ROCm get_device_capability"

This reverts commit 0e756434bd.

* Remove unnecessary check

Signed-off-by: cyy <cyyever@outlook.com>

* Revert changes

Signed-off-by: cyy <cyyever@outlook.com>

---------

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
cyyever 2025-04-11 02:54:21 +08:00 committed by GitHub
parent 7ff896c0f2
commit 371c44d0ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
42 changed files with 53 additions and 178 deletions

View File

@ -16,7 +16,6 @@ import math
from collections import OrderedDict from collections import OrderedDict
import torch import torch
from packaging import version
from torch import Tensor, nn from torch import Tensor, nn
from .utils import logging from .utils import logging
@ -34,14 +33,6 @@ class PytorchGELUTanh(nn.Module):
match due to rounding errors. match due to rounding errors.
""" """
def __init__(self):
super().__init__()
if version.parse(torch.__version__) < version.parse("1.12.0"):
raise ImportError(
f"You are using torch=={torch.__version__}, but torch>=1.12.0 is required to use "
"PytorchGELUTanh. Please upgrade torch."
)
def forward(self, input: Tensor) -> Tensor: def forward(self, input: Tensor) -> Tensor:
return nn.functional.gelu(input, approximate="tanh") return nn.functional.gelu(input, approximate="tanh")
@ -145,10 +136,7 @@ class MishActivation(nn.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
if version.parse(torch.__version__) < version.parse("1.9.0"): self.act = nn.functional.mish
self.act = self._mish_python
else:
self.act = nn.functional.mish
def _mish_python(self, input: Tensor) -> Tensor: def _mish_python(self, input: Tensor) -> Tensor:
return input * torch.tanh(nn.functional.softplus(input)) return input * torch.tanh(nn.functional.softplus(input))

View File

@ -1500,7 +1500,6 @@ class ModuleUtilsMixin:
seq_ids = torch.arange(seq_length, device=device) seq_ids = torch.arange(seq_length, device=device)
causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None] causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
# in case past_key_values are used we need to add a prefix ones mask to the causal mask # in case past_key_values are used we need to add a prefix ones mask to the causal mask
# causal and attention masks must have same type with pytorch version < 1.3
causal_mask = causal_mask.to(attention_mask.dtype) causal_mask = causal_mask.to(attention_mask.dtype)
if causal_mask.shape[1] < attention_mask.shape[1]: if causal_mask.shape[1] < attention_mask.shape[1]:

View File

@ -633,7 +633,6 @@ class BlipTextModel(BlipTextPreTrainedModel):
seq_ids = torch.arange(seq_length, device=device) seq_ids = torch.arange(seq_length, device=device)
causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None] causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
# in case past_key_values are used we need to add a prefix ones mask to the causal mask # in case past_key_values are used we need to add a prefix ones mask to the causal mask
# causal and attention masks must have same type with pytorch version < 1.3
causal_mask = causal_mask.to(attention_mask.dtype) causal_mask = causal_mask.to(attention_mask.dtype)
if causal_mask.shape[1] < attention_mask.shape[1]: if causal_mask.shape[1] < attention_mask.shape[1]:

View File

@ -20,11 +20,8 @@ from tokenizers import normalizers, processors
from ...tokenization_utils_fast import PreTrainedTokenizerFast from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import is_sentencepiece_available, logging from ...utils import is_sentencepiece_available, logging
from ...utils.versions import require_version
require_version("tokenizers>=0.13.3")
if is_sentencepiece_available(): if is_sentencepiece_available():
from .tokenization_code_llama import CodeLlamaTokenizer from .tokenization_code_llama import CodeLlamaTokenizer
else: else:

View File

@ -23,11 +23,8 @@ from tokenizers import processors
from ...tokenization_utils_base import BatchEncoding from ...tokenization_utils_base import BatchEncoding
from ...tokenization_utils_fast import PreTrainedTokenizerFast from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import logging from ...utils import logging
from ...utils.versions import require_version
require_version("tokenizers>=0.13.3")
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"tokenizer_file": "tokenizer.json"} VOCAB_FILES_NAMES = {"tokenizer_file": "tokenizer.json"}

View File

@ -20,11 +20,8 @@ from tokenizers import processors
from ...tokenization_utils_fast import PreTrainedTokenizerFast from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import is_sentencepiece_available, logging from ...utils import is_sentencepiece_available, logging
from ...utils.versions import require_version
require_version("tokenizers>=0.13.3")
if is_sentencepiece_available(): if is_sentencepiece_available():
from .tokenization_gemma import GemmaTokenizer from .tokenization_gemma import GemmaTokenizer
else: else:

View File

@ -42,7 +42,6 @@ from ...utils import (
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_model_forward, add_start_docstrings_to_model_forward,
is_torch_flex_attn_available, is_torch_flex_attn_available,
is_torch_fx_available,
logging, logging,
) )
from .configuration_gpt_neo import GPTNeoConfig from .configuration_gpt_neo import GPTNeoConfig
@ -60,8 +59,7 @@ if is_flash_attn_available():
# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph. # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
# It means that the function will not be traced through and simply appear as a node in the graph. # It means that the function will not be traced through and simply appear as a node in the graph.
if is_torch_fx_available(): _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)

View File

@ -20,11 +20,8 @@ from tokenizers import processors
from ...tokenization_utils_fast import PreTrainedTokenizerFast from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import is_sentencepiece_available, logging from ...utils import is_sentencepiece_available, logging
from ...utils.versions import require_version
require_version("tokenizers>=0.13.3")
if is_sentencepiece_available(): if is_sentencepiece_available():
from .tokenization_llama import LlamaTokenizer from .tokenization_llama import LlamaTokenizer
else: else:

View File

@ -42,7 +42,6 @@ from ...utils import (
replace_return_docstrings, replace_return_docstrings,
) )
from ...utils.deprecation import deprecate_kwarg from ...utils.deprecation import deprecate_kwarg
from ...utils.import_utils import is_torch_fx_available
from .configuration_phimoe import PhimoeConfig from .configuration_phimoe import PhimoeConfig
@ -51,8 +50,7 @@ if is_flash_attn_available():
# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph. # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
# It means that the function will not be traced through and simply appear as a node in the graph. # It means that the function will not be traced through and simply appear as a node in the graph.
if is_torch_fx_available(): _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)

View File

@ -171,7 +171,7 @@ class ViltEmbeddings(nn.Module):
select = torch.cat(select, dim=0) select = torch.cat(select, dim=0)
x = x[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels) x = x[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels)
x_mask = x_mask[select[:, 0], select[:, 1]].view(batch_size, -1) x_mask = x_mask[select[:, 0], select[:, 1]].view(batch_size, -1)
# `patch_index` should be on the same device as `select` (for torch>=1.13), which is ensured at definition time. # `patch_index` should be on the same device as `select`, which is ensured at definition time.
patch_index = patch_index[select[:, 0], select[:, 1]].view(batch_size, -1, 2) patch_index = patch_index[select[:, 0], select[:, 1]].view(batch_size, -1, 2)
pos_embed = pos_embed[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels) pos_embed = pos_embed[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels)

View File

@ -25,7 +25,6 @@ from torch.optim.lr_scheduler import LambdaLR, ReduceLROnPlateau
from .trainer_pt_utils import LayerWiseDummyOptimizer, LayerWiseDummyScheduler from .trainer_pt_utils import LayerWiseDummyOptimizer, LayerWiseDummyScheduler
from .trainer_utils import SchedulerType from .trainer_utils import SchedulerType
from .utils import logging from .utils import logging
from .utils.versions import require_version
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@ -701,7 +700,6 @@ class Adafactor(Optimizer):
relative_step=True, relative_step=True,
warmup_init=False, warmup_init=False,
): ):
require_version("torch>=1.5.0") # add_ with alpha
if lr is not None and relative_step: if lr is not None and relative_step:
raise ValueError("Cannot combine manual `lr` and `relative_step=True` options") raise ValueError("Cannot combine manual `lr` and `relative_step=True` options")
if warmup_init and not relative_step: if warmup_init and not relative_step:

View File

@ -138,7 +138,6 @@ from .utils import (
is_tokenizers_available, is_tokenizers_available,
is_torch_available, is_torch_available,
is_torch_bf16_available_on_device, is_torch_bf16_available_on_device,
is_torch_bf16_cpu_available,
is_torch_bf16_gpu_available, is_torch_bf16_gpu_available,
is_torch_deterministic, is_torch_deterministic,
is_torch_fp16_available_on_device, is_torch_fp16_available_on_device,
@ -1073,14 +1072,6 @@ def require_torch_bf16_gpu(test_case):
)(test_case) )(test_case)
def require_torch_bf16_cpu(test_case):
"""Decorator marking a test that requires torch>=1.10, using CPU."""
return unittest.skipUnless(
is_torch_bf16_cpu_available(),
"test requires torch>=1.10, using CPU",
)(test_case)
def require_deterministic_for_xpu(test_case): def require_deterministic_for_xpu(test_case):
if is_torch_xpu_available(): if is_torch_xpu_available():
return unittest.skipUnless(is_torch_deterministic(), "test requires torch to use deterministic algorithms")( return unittest.skipUnless(is_torch_deterministic(), "test requires torch to use deterministic algorithms")(

View File

@ -164,7 +164,6 @@ from .utils import (
is_sagemaker_dp_enabled, is_sagemaker_dp_enabled,
is_sagemaker_mp_enabled, is_sagemaker_mp_enabled,
is_schedulefree_available, is_schedulefree_available,
is_torch_compile_available,
is_torch_hpu_available, is_torch_hpu_available,
is_torch_mlu_available, is_torch_mlu_available,
is_torch_mps_available, is_torch_mps_available,
@ -257,7 +256,7 @@ if is_accelerate_available("0.28.0"):
def _is_peft_model(model): def _is_peft_model(model):
if is_peft_available(): if is_peft_available():
classes_to_check = (PeftModel,) if is_peft_available() else () classes_to_check = (PeftModel,)
# Here we also check if the model is an instance of `PeftMixedModel` introduced in peft>=0.7.0: https://github.com/huggingface/transformers/pull/28321 # Here we also check if the model is an instance of `PeftMixedModel` introduced in peft>=0.7.0: https://github.com/huggingface/transformers/pull/28321
if version.parse(importlib.metadata.version("peft")) >= version.parse("0.7.0"): if version.parse(importlib.metadata.version("peft")) >= version.parse("0.7.0"):
from peft import PeftMixedModel from peft import PeftMixedModel
@ -797,10 +796,6 @@ class Trainer:
# very last # very last
self._memory_tracker.stop_and_update_metrics() self._memory_tracker.stop_and_update_metrics()
# torch.compile
if args.torch_compile and not is_torch_compile_available():
raise RuntimeError("Using torch.compile requires PyTorch 2.0 or higher.")
self.is_fsdp_xla_v2_enabled = args.fsdp_config.get("xla_fsdp_v2", False) self.is_fsdp_xla_v2_enabled = args.fsdp_config.get("xla_fsdp_v2", False)
if self.is_fsdp_xla_v2_enabled: if self.is_fsdp_xla_v2_enabled:
if not IS_XLA_FSDPV2_POST_2_2: if not IS_XLA_FSDPV2_POST_2_2:
@ -1987,7 +1982,7 @@ class Trainer:
if self.accelerator.unwrap_model(model) is not model: if self.accelerator.unwrap_model(model) is not model:
return model return model
# Mixed precision training with apex (torch < 1.6) # Mixed precision training with apex
if self.use_apex and training: if self.use_apex and training:
model, self.optimizer = amp.initialize(model, self.optimizer, opt_level=self.args.fp16_opt_level) model, self.optimizer = amp.initialize(model, self.optimizer, opt_level=self.args.fp16_opt_level)
@ -3739,7 +3734,7 @@ class Trainer:
torch.musa.empty_cache() torch.musa.empty_cache()
elif is_torch_npu_available(): elif is_torch_npu_available():
torch.npu.empty_cache() torch.npu.empty_cache()
elif is_torch_mps_available(min_version="2.0"): elif is_torch_mps_available():
torch.mps.empty_cache() torch.mps.empty_cache()
elif is_torch_hpu_available(): elif is_torch_hpu_available():
logger.warning( logger.warning(

View File

@ -44,7 +44,6 @@ from .utils import (
is_sagemaker_dp_enabled, is_sagemaker_dp_enabled,
is_sagemaker_mp_enabled, is_sagemaker_mp_enabled,
is_torch_available, is_torch_available,
is_torch_bf16_cpu_available,
is_torch_bf16_gpu_available, is_torch_bf16_gpu_available,
is_torch_hpu_available, is_torch_hpu_available,
is_torch_mlu_available, is_torch_mlu_available,
@ -1161,7 +1160,6 @@ class TrainingArguments:
"help": ( "help": (
"Number of batches loaded in advance by each worker. " "Number of batches loaded in advance by each worker. "
"2 means there will be a total of 2 * num_workers batches prefetched across all workers. " "2 means there will be a total of 2 * num_workers batches prefetched across all workers. "
"Default is 2 for PyTorch < 2.0.0 and otherwise None."
) )
}, },
) )
@ -1681,7 +1679,7 @@ class TrainingArguments:
self.half_precision_backend = self.fp16_backend self.half_precision_backend = self.fp16_backend
if self.bf16 or self.bf16_full_eval: if self.bf16 or self.bf16_full_eval:
if self.use_cpu and not is_torch_bf16_cpu_available() and not is_torch_xla_available(): if self.use_cpu and not is_torch_available() and not is_torch_xla_available():
# cpu # cpu
raise ValueError("Your setup doesn't support bf16/(cpu, tpu, neuroncore). You need torch>=1.10") raise ValueError("Your setup doesn't support bf16/(cpu, tpu, neuroncore). You need torch>=1.10")
elif not self.use_cpu: elif not self.use_cpu:

View File

@ -61,10 +61,7 @@ from ..models.auto.modeling_auto import (
) )
from .import_utils import ( from .import_utils import (
ENV_VARS_TRUE_VALUES, ENV_VARS_TRUE_VALUES,
TORCH_FX_REQUIRED_VERSION,
get_torch_version,
is_peft_available, is_peft_available,
is_torch_fx_available,
) )
@ -891,12 +888,6 @@ class HFTracer(Tracer):
def __init__(self, autowrap_modules=(math,), autowrap_functions=()): def __init__(self, autowrap_modules=(math,), autowrap_functions=()):
super().__init__(autowrap_modules=autowrap_modules, autowrap_functions=autowrap_functions) super().__init__(autowrap_modules=autowrap_modules, autowrap_functions=autowrap_functions)
if not is_torch_fx_available():
raise ImportError(
f"Found an incompatible version of torch. Found version {get_torch_version()}, but only version "
f"{TORCH_FX_REQUIRED_VERSION} is supported."
)
def _generate_dummy_input( def _generate_dummy_input(
self, model: "PreTrainedModel", input_name: str, shape: list[int], input_names: list[str] self, model: "PreTrainedModel", input_name: str, shape: list[int], input_names: list[str]
) -> dict[str, torch.Tensor]: ) -> dict[str, torch.Tensor]:

View File

@ -222,6 +222,10 @@ _torch_version = "N/A"
_torch_available = False _torch_available = False
if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES: if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES:
_torch_available, _torch_version = _is_package_available("torch", return_version=True) _torch_available, _torch_version = _is_package_available("torch", return_version=True)
if _torch_available:
_torch_available = version.parse(_torch_version) >= version.parse("2.1.0")
if not _torch_available:
logger.warning(f"Disabling PyTorch because PyTorch >= 2.1 is required but found {_torch_version}")
else: else:
logger.info("Disabling PyTorch because USE_TF is set") logger.info("Disabling PyTorch because USE_TF is set")
_torch_available = False _torch_available = False
@ -310,15 +314,6 @@ if USE_JAX in ENV_VARS_TRUE_AND_AUTO_VALUES:
_jax_version = _flax_version = "N/A" _jax_version = _flax_version = "N/A"
_torch_fx_available = False
if _torch_available:
torch_version = version.parse(_torch_version)
_torch_fx_available = (torch_version.major, torch_version.minor) >= (
TORCH_FX_REQUIRED_VERSION.major,
TORCH_FX_REQUIRED_VERSION.minor,
)
_torch_xla_available = False _torch_xla_available = False
if USE_TORCH_XLA in ENV_VARS_TRUE_VALUES: if USE_TORCH_XLA in ENV_VARS_TRUE_VALUES:
_torch_xla_available, _torch_xla_version = _is_package_available("torch_xla", return_version=True) _torch_xla_available, _torch_xla_version = _is_package_available("torch_xla", return_version=True)
@ -526,19 +521,8 @@ def is_torch_bf16_gpu_available():
return torch.cuda.is_available() and torch.cuda.is_bf16_supported() return torch.cuda.is_available() and torch.cuda.is_bf16_supported()
def is_torch_bf16_cpu_available(): def is_torch_bf16_cpu_available() -> bool:
if not is_torch_available(): return is_torch_available()
return False
import torch
try:
# multiple levels of AttributeError depending on the pytorch version so do them all in one check
_ = torch.cpu.amp.autocast
except AttributeError:
return False
return True
def is_torch_bf16_available(): def is_torch_bf16_available():
@ -618,16 +602,11 @@ def is_torch_tf32_available():
return False return False
if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8: if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
return False return False
if int(torch.version.cuda.split(".")[0]) < 11:
return False
if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.7"):
return False
return True return True
def is_torch_fx_available(): def is_torch_fx_available():
return _torch_fx_available return is_torch_available()
def is_peft_available(): def is_peft_available():
@ -832,21 +811,11 @@ def is_habana_gaudi1():
def is_torchdynamo_available(): def is_torchdynamo_available():
if not is_torch_available(): return is_torch_available()
return False
return True
def is_torch_compile_available(): def is_torch_compile_available():
if not is_torch_available(): return is_torch_available()
return False
import torch
# We don't do any version check here to support nighlies marked as 1.14. Ultimately needs to check version against
# 2.0 but let's do it later.
return hasattr(torch, "compile")
def is_torchdynamo_compiling(): def is_torchdynamo_compiling():
@ -979,10 +948,10 @@ def is_torch_xpu_available(check_device=False):
return False return False
torch_version = version.parse(_torch_version) torch_version = version.parse(_torch_version)
if torch_version.major < 2 or (torch_version.major == 2 and torch_version.minor < 6): if torch_version.major == 2 and torch_version.minor < 6:
if is_ipex_available(): if is_ipex_available():
import intel_extension_for_pytorch # noqa: F401 import intel_extension_for_pytorch # noqa: F401
elif torch_version.major < 2 or (torch_version.major == 2 and torch_version.minor < 4): elif torch_version.major == 2 and torch_version.minor < 4:
return False return False
import torch import torch

View File

@ -323,7 +323,6 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon):
@require_torch_multi_accelerator @require_torch_multi_accelerator
@slow @slow
@require_fsdp
@require_fsdp_v2_version @require_fsdp_v2_version
@require_accelerate_fsdp2 @require_accelerate_fsdp2
def test_accelerate_fsdp2_integration(self): def test_accelerate_fsdp2_integration(self):

View File

@ -510,7 +510,6 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -273,7 +273,6 @@ class BertGenerationEncoderTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs) self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -506,7 +506,6 @@ class BigBirdModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -354,7 +354,6 @@ class ChineseCLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -409,7 +409,6 @@ class Data2VecTextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTes
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -492,7 +492,6 @@ class ErnieModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -306,7 +306,6 @@ class GPTNeoXModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask) self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder() config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder()
input_mask = None input_mask = None

View File

@ -223,7 +223,6 @@ class GPTNeoXModelJapaneseTest(ModelTesterMixin, GenerationTesterMixin, Pipeline
self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask) self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder() config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder()
input_mask = None input_mask = None

View File

@ -23,7 +23,6 @@ import pytest
from transformers import HubertConfig, is_torch_available from transformers import HubertConfig, is_torch_available
from transformers.testing_utils import require_soundfile, require_torch, slow, torch_device from transformers.testing_utils import require_soundfile, require_torch, slow, torch_device
from transformers.utils import is_torch_fx_available
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ( from ...test_modeling_common import (
@ -48,8 +47,7 @@ if is_torch_available():
) )
from transformers.models.hubert.modeling_hubert import _compute_mask_indices from transformers.models.hubert.modeling_hubert import _compute_mask_indices
if is_torch_fx_available(): from transformers.utils.fx import symbolic_trace
from transformers.utils.fx import symbolic_trace
class HubertModelTester: class HubertModelTester:
@ -438,8 +436,8 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
# TODO: fix it # TODO: fix it
self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.") self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
if not is_torch_fx_available() or not self.fx_compatible: if not self.fx_compatible:
self.skipTest(reason="torch fx is not available or not compatible with this model") self.skipTest(reason="torch fx is not compatible with this model")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.return_dict = False configs_no_init.return_dict = False

View File

@ -27,7 +27,7 @@ from transformers.testing_utils import (
slow, slow,
torch_device, torch_device,
) )
from transformers.utils import is_torch_fx_available from transformers.utils.fx import symbolic_trace
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
@ -35,9 +35,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
from ...test_pipeline_mixin import PipelineTesterMixin from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_fx_available():
from transformers.utils.fx import symbolic_trace
if is_torch_available(): if is_torch_available():
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
@ -598,8 +595,8 @@ class MT5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
return False return False
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False): def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
if not is_torch_fx_available() or not self.fx_compatible: if not self.fx_compatible:
self.skipTest(reason="torch.fx is not available or not compatible with this model") self.skipTest(reason="torch.fx is not compatible with this model")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.return_dict = False configs_no_init.return_dict = False

View File

@ -416,7 +416,6 @@ class RemBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -417,7 +417,6 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -421,7 +421,6 @@ class RobertaPreLayerNormModelTest(ModelTesterMixin, GenerationTesterMixin, Pipe
# Copied from tests.models.roberta.test_modeling_roberta.RobertaModelTest.test_model_as_decoder_with_default_input_mask # Copied from tests.models.roberta.test_modeling_roberta.RobertaModelTest.test_model_as_decoder_with_default_input_mask
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -664,7 +664,6 @@ class RoCBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -433,7 +433,6 @@ class RoFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -32,7 +32,8 @@ from transformers.testing_utils import (
slow, slow,
torch_device, torch_device,
) )
from transformers.utils import cached_property, is_torch_fx_available from transformers.utils import cached_property
from transformers.utils.fx import symbolic_trace
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
@ -40,10 +41,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
from ...test_pipeline_mixin import PipelineTesterMixin from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_fx_available():
from transformers.utils.fx import symbolic_trace
if is_torch_available(): if is_torch_available():
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
@ -603,8 +600,8 @@ class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
return False return False
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False): def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
if not is_torch_fx_available() or not self.fx_compatible: if not self.fx_compatible:
self.skipTest(reason="torch.fx is not available or not compatible with this model") self.skipTest(reason="torch.fx is not compatible with this model")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.return_dict = False configs_no_init.return_dict = False

View File

@ -27,7 +27,7 @@ from transformers.testing_utils import (
slow, slow,
torch_device, torch_device,
) )
from transformers.utils import is_torch_fx_available from transformers.utils.fx import symbolic_trace
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
@ -35,10 +35,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
from ...test_pipeline_mixin import PipelineTesterMixin from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_fx_available():
from transformers.utils.fx import symbolic_trace
if is_torch_available(): if is_torch_available():
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
@ -300,8 +296,8 @@ class UMT5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
return False return False
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False): def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
if not is_torch_fx_available() or not self.fx_compatible: if not self.fx_compatible:
self.skipTest(reason="torch fx is not available or not compatible with this model") self.skipTest(reason="torch fx is not compatible with this model")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.return_dict = False configs_no_init.return_dict = False

View File

@ -42,7 +42,6 @@ from transformers.testing_utils import (
slow, slow,
torch_device, torch_device,
) )
from transformers.utils import is_torch_fx_available
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ( from ...test_modeling_common import (
@ -90,8 +89,7 @@ if is_pyctcdecode_available():
from transformers.models.wav2vec2_with_lm import processing_wav2vec2_with_lm from transformers.models.wav2vec2_with_lm import processing_wav2vec2_with_lm
if is_torch_fx_available(): from transformers.utils.fx import symbolic_trace
from transformers.utils.fx import symbolic_trace
def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout): def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
@ -716,8 +714,8 @@ class Wav2Vec2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
# TODO: fix it # TODO: fix it
self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.") self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
if not is_torch_fx_available() or not self.fx_compatible: if not self.fx_compatible:
self.skipTest(reason="torch fx not available or not compatible with this model") self.skipTest(reason="torch fx is not compatible with this model")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.return_dict = False configs_no_init.return_dict = False

View File

@ -425,7 +425,6 @@ class XLMRobertaXLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTes
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -420,7 +420,6 @@ class XmodModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
def test_model_as_decoder_with_default_input_mask(self): def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
( (
config, config,
input_ids, input_ids,

View File

@ -101,7 +101,6 @@ from transformers.utils import (
is_accelerate_available, is_accelerate_available,
is_torch_bf16_available_on_device, is_torch_bf16_available_on_device,
is_torch_fp16_available_on_device, is_torch_fp16_available_on_device,
is_torch_fx_available,
is_torch_sdpa_available, is_torch_sdpa_available,
) )
from transformers.utils.generic import ContextManagers from transformers.utils.generic import ContextManagers
@ -125,8 +124,8 @@ if is_torch_available():
from transformers.modeling_utils import load_state_dict, no_init_weights from transformers.modeling_utils import load_state_dict, no_init_weights
from transformers.pytorch_utils import id_tensor_storage from transformers.pytorch_utils import id_tensor_storage
if is_torch_fx_available(): from transformers.utils.fx import _FX_SUPPORTED_MODELS_WITH_KV_CACHE, symbolic_trace
from transformers.utils.fx import _FX_SUPPORTED_MODELS_WITH_KV_CACHE, symbolic_trace
if is_deepspeed_available(): if is_deepspeed_available():
import deepspeed import deepspeed
@ -1190,10 +1189,8 @@ class ModelTesterMixin:
self._create_and_check_torch_fx_tracing(config, inputs_dict, output_loss=True) self._create_and_check_torch_fx_tracing(config, inputs_dict, output_loss=True)
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False): def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
if not is_torch_fx_available() or not self.fx_compatible: if not self.fx_compatible:
self.skipTest( self.skipTest(f"The model type {config.model_type} is not compatible with torch.fx")
f"Either torch.fx is not available, or the model type {config.model_type} is not compatible with torch.fx"
)
configs_no_init = _config_zero_init(config) # To be sure we have no Nan configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.return_dict = False configs_no_init.return_dict = False

View File

@ -99,7 +99,6 @@ from transformers.testing_utils import (
require_torch_tensorrt_fx, require_torch_tensorrt_fx,
require_torch_tf32, require_torch_tf32,
require_torch_up_to_2_accelerators, require_torch_up_to_2_accelerators,
require_torchdynamo,
require_vision, require_vision,
require_wandb, require_wandb,
run_first, run_first,
@ -3994,10 +3993,9 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
@require_non_xpu @require_non_xpu
@require_torch_non_multi_gpu @require_torch_non_multi_gpu
@require_torchdynamo
@require_torch_tensorrt_fx @require_torch_tensorrt_fx
def test_torchdynamo_full_eval(self): def test_torchdynamo_full_eval(self):
import torchdynamo from torch import _dynamo as torchdynamo
# torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu # torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu
n_gpus = get_gpu_count() n_gpus = get_gpu_count()
@ -4017,30 +4015,35 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
del trainer del trainer
# 2. TorchDynamo eager # 2. TorchDynamo eager
trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="eager", output_dir=tmp_dir) trainer = get_regression_trainer(
a=a, b=b, eval_len=eval_len, torch_compile_backend="eager", output_dir=tmp_dir
)
metrics = trainer.evaluate() metrics = trainer.evaluate()
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss) self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
del trainer del trainer
torchdynamo.reset() torchdynamo.reset()
# 3. TorchDynamo nvfuser # 3. TorchDynamo nvfuser
trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="nvfuser", output_dir=tmp_dir) trainer = get_regression_trainer(
a=a, b=b, eval_len=eval_len, torch_compile_backend="nvfuser", output_dir=tmp_dir
)
metrics = trainer.evaluate() metrics = trainer.evaluate()
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss) self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
torchdynamo.reset() torchdynamo.reset()
# 4. TorchDynamo fx2trt # 4. TorchDynamo fx2trt
trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="fx2trt", output_dir=tmp_dir) trainer = get_regression_trainer(
a=a, b=b, eval_len=eval_len, torch_compile_backend="fx2trt", output_dir=tmp_dir
)
metrics = trainer.evaluate() metrics = trainer.evaluate()
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss) self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
torchdynamo.reset() torchdynamo.reset()
@unittest.skip(reason="torch 2.0.0 gives `ModuleNotFoundError: No module named 'torchdynamo'`.")
@require_torch_non_multi_gpu @require_torch_non_multi_gpu
@require_torchdynamo @require_torch_gpu
def test_torchdynamo_memory(self): def test_torchdynamo_memory(self):
# torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu # torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu
import torchdynamo from torch import _dynamo as torchdynamo
class CustomTrainer(Trainer): class CustomTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False): def compute_loss(self, model, inputs, return_outputs=False):
@ -4085,7 +4088,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
a = torch.ones(1024, 1024, device="cuda", requires_grad=True) a = torch.ones(1024, 1024, device="cuda", requires_grad=True)
a.grad = None a.grad = None
args = TrainingArguments(output_dir=tmp_dir, torchdynamo="nvfuser") args = TrainingArguments(output_dir=tmp_dir, torch_compile_backend="nvfuser")
trainer = CustomTrainer(model=mod, args=args) trainer = CustomTrainer(model=mod, args=args)
# warmup # warmup
for _ in range(10): for _ in range(10):

View File

@ -21,7 +21,6 @@ from transformers.testing_utils import (
get_torch_dist_unique_port, get_torch_dist_unique_port,
require_accelerate, require_accelerate,
require_fp8, require_fp8,
require_fsdp,
require_torch_multi_accelerator, require_torch_multi_accelerator,
run_first, run_first,
torch_device, torch_device,
@ -68,7 +67,6 @@ if is_torch_available():
class TestFSDPTrainer(TestCasePlus): class TestFSDPTrainer(TestCasePlus):
@require_torch_multi_accelerator @require_torch_multi_accelerator
@require_accelerate @require_accelerate
@require_fsdp
@run_first @run_first
def test_trainer(self): def test_trainer(self):
output_dir = self.get_auto_remove_tmp_dir() output_dir = self.get_auto_remove_tmp_dir()
@ -95,7 +93,6 @@ class TestFSDPTrainer(TestCasePlus):
class TestFSDPTrainerFP8(TestCasePlus): class TestFSDPTrainerFP8(TestCasePlus):
@require_torch_multi_accelerator @require_torch_multi_accelerator
@require_accelerate @require_accelerate
@require_fsdp
@require_fp8 @require_fp8
@run_first @run_first
def test_trainer(self): def test_trainer(self):
@ -125,7 +122,6 @@ class TestFSDPTrainerFP8(TestCasePlus):
class TestFSDPTrainerWrap(TestCasePlus): class TestFSDPTrainerWrap(TestCasePlus):
@require_torch_multi_accelerator @require_torch_multi_accelerator
@require_accelerate @require_accelerate
@require_fsdp
@run_first @run_first
def test_trainer(self): def test_trainer(self):
output_dir = self.get_auto_remove_tmp_dir() output_dir = self.get_auto_remove_tmp_dir()

View File

@ -81,7 +81,6 @@ from transformers.utils.import_utils import (
is_tf_available, is_tf_available,
is_torch_npu_available, is_torch_npu_available,
is_torch_sdpa_available, is_torch_sdpa_available,
is_torchdynamo_available,
) )
@ -1483,8 +1482,6 @@ class ModelUtilsTest(TestCasePlus):
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None) model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
self.assertIn("You may ignore this warning if your `pad_token_id`", cl.out) self.assertIn("You may ignore this warning if your `pad_token_id`", cl.out)
if not is_torchdynamo_available():
self.skipTest(reason="torchdynamo is not available")
with self.subTest("Ensure that the warning code is skipped when compiling with torchdynamo."): with self.subTest("Ensure that the warning code is skipped when compiling with torchdynamo."):
logger.warning_once.cache_clear() logger.warning_once.cache_clear()
from torch._dynamo import config, testing from torch._dynamo import config, testing

View File

@ -86,7 +86,7 @@ class DependencyVersionCheckTest(TestCasePlus):
def test_python(self): def test_python(self):
# matching requirement # matching requirement
require_version("python>=3.6.0") require_version("python>=3.9.0")
# not matching requirements # not matching requirements
for req in ["python>9.9.9", "python<3.0.0"]: for req in ["python>9.9.9", "python<3.0.0"]: