mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
Remove old code for PyTorch, Accelerator and tokenizers (#37234)
* Remove unneeded library version checks
Signed-off-by: cyy <cyyever@outlook.com>
* Remove PyTorch condition
Signed-off-by: cyy <cyyever@outlook.com>
* Remove PyTorch condition
Signed-off-by: cyy <cyyever@outlook.com>
* Fix ROCm get_device_capability
Signed-off-by: cyy <cyyever@outlook.com>
* Revert "Fix ROCm get_device_capability"
This reverts commit 0e756434bd
.
* Remove unnecessary check
Signed-off-by: cyy <cyyever@outlook.com>
* Revert changes
Signed-off-by: cyy <cyyever@outlook.com>
---------
Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
parent
7ff896c0f2
commit
371c44d0ef
@ -16,7 +16,6 @@ import math
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from packaging import version
|
|
||||||
from torch import Tensor, nn
|
from torch import Tensor, nn
|
||||||
|
|
||||||
from .utils import logging
|
from .utils import logging
|
||||||
@ -34,14 +33,6 @@ class PytorchGELUTanh(nn.Module):
|
|||||||
match due to rounding errors.
|
match due to rounding errors.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
if version.parse(torch.__version__) < version.parse("1.12.0"):
|
|
||||||
raise ImportError(
|
|
||||||
f"You are using torch=={torch.__version__}, but torch>=1.12.0 is required to use "
|
|
||||||
"PytorchGELUTanh. Please upgrade torch."
|
|
||||||
)
|
|
||||||
|
|
||||||
def forward(self, input: Tensor) -> Tensor:
|
def forward(self, input: Tensor) -> Tensor:
|
||||||
return nn.functional.gelu(input, approximate="tanh")
|
return nn.functional.gelu(input, approximate="tanh")
|
||||||
|
|
||||||
@ -145,10 +136,7 @@ class MishActivation(nn.Module):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
if version.parse(torch.__version__) < version.parse("1.9.0"):
|
self.act = nn.functional.mish
|
||||||
self.act = self._mish_python
|
|
||||||
else:
|
|
||||||
self.act = nn.functional.mish
|
|
||||||
|
|
||||||
def _mish_python(self, input: Tensor) -> Tensor:
|
def _mish_python(self, input: Tensor) -> Tensor:
|
||||||
return input * torch.tanh(nn.functional.softplus(input))
|
return input * torch.tanh(nn.functional.softplus(input))
|
||||||
|
@ -1500,7 +1500,6 @@ class ModuleUtilsMixin:
|
|||||||
seq_ids = torch.arange(seq_length, device=device)
|
seq_ids = torch.arange(seq_length, device=device)
|
||||||
causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
|
causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
|
||||||
# in case past_key_values are used we need to add a prefix ones mask to the causal mask
|
# in case past_key_values are used we need to add a prefix ones mask to the causal mask
|
||||||
# causal and attention masks must have same type with pytorch version < 1.3
|
|
||||||
causal_mask = causal_mask.to(attention_mask.dtype)
|
causal_mask = causal_mask.to(attention_mask.dtype)
|
||||||
|
|
||||||
if causal_mask.shape[1] < attention_mask.shape[1]:
|
if causal_mask.shape[1] < attention_mask.shape[1]:
|
||||||
|
@ -633,7 +633,6 @@ class BlipTextModel(BlipTextPreTrainedModel):
|
|||||||
seq_ids = torch.arange(seq_length, device=device)
|
seq_ids = torch.arange(seq_length, device=device)
|
||||||
causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
|
causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
|
||||||
# in case past_key_values are used we need to add a prefix ones mask to the causal mask
|
# in case past_key_values are used we need to add a prefix ones mask to the causal mask
|
||||||
# causal and attention masks must have same type with pytorch version < 1.3
|
|
||||||
causal_mask = causal_mask.to(attention_mask.dtype)
|
causal_mask = causal_mask.to(attention_mask.dtype)
|
||||||
|
|
||||||
if causal_mask.shape[1] < attention_mask.shape[1]:
|
if causal_mask.shape[1] < attention_mask.shape[1]:
|
||||||
|
@ -20,11 +20,8 @@ from tokenizers import normalizers, processors
|
|||||||
|
|
||||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
||||||
from ...utils import is_sentencepiece_available, logging
|
from ...utils import is_sentencepiece_available, logging
|
||||||
from ...utils.versions import require_version
|
|
||||||
|
|
||||||
|
|
||||||
require_version("tokenizers>=0.13.3")
|
|
||||||
|
|
||||||
if is_sentencepiece_available():
|
if is_sentencepiece_available():
|
||||||
from .tokenization_code_llama import CodeLlamaTokenizer
|
from .tokenization_code_llama import CodeLlamaTokenizer
|
||||||
else:
|
else:
|
||||||
|
@ -23,11 +23,8 @@ from tokenizers import processors
|
|||||||
from ...tokenization_utils_base import BatchEncoding
|
from ...tokenization_utils_base import BatchEncoding
|
||||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
from ...utils.versions import require_version
|
|
||||||
|
|
||||||
|
|
||||||
require_version("tokenizers>=0.13.3")
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
VOCAB_FILES_NAMES = {"tokenizer_file": "tokenizer.json"}
|
VOCAB_FILES_NAMES = {"tokenizer_file": "tokenizer.json"}
|
||||||
|
|
||||||
|
@ -20,11 +20,8 @@ from tokenizers import processors
|
|||||||
|
|
||||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
||||||
from ...utils import is_sentencepiece_available, logging
|
from ...utils import is_sentencepiece_available, logging
|
||||||
from ...utils.versions import require_version
|
|
||||||
|
|
||||||
|
|
||||||
require_version("tokenizers>=0.13.3")
|
|
||||||
|
|
||||||
if is_sentencepiece_available():
|
if is_sentencepiece_available():
|
||||||
from .tokenization_gemma import GemmaTokenizer
|
from .tokenization_gemma import GemmaTokenizer
|
||||||
else:
|
else:
|
||||||
|
@ -42,7 +42,6 @@ from ...utils import (
|
|||||||
add_start_docstrings,
|
add_start_docstrings,
|
||||||
add_start_docstrings_to_model_forward,
|
add_start_docstrings_to_model_forward,
|
||||||
is_torch_flex_attn_available,
|
is_torch_flex_attn_available,
|
||||||
is_torch_fx_available,
|
|
||||||
logging,
|
logging,
|
||||||
)
|
)
|
||||||
from .configuration_gpt_neo import GPTNeoConfig
|
from .configuration_gpt_neo import GPTNeoConfig
|
||||||
@ -60,8 +59,7 @@ if is_flash_attn_available():
|
|||||||
|
|
||||||
# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
|
# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
|
||||||
# It means that the function will not be traced through and simply appear as a node in the graph.
|
# It means that the function will not be traced through and simply appear as a node in the graph.
|
||||||
if is_torch_fx_available():
|
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
|
||||||
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
@ -20,11 +20,8 @@ from tokenizers import processors
|
|||||||
|
|
||||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
||||||
from ...utils import is_sentencepiece_available, logging
|
from ...utils import is_sentencepiece_available, logging
|
||||||
from ...utils.versions import require_version
|
|
||||||
|
|
||||||
|
|
||||||
require_version("tokenizers>=0.13.3")
|
|
||||||
|
|
||||||
if is_sentencepiece_available():
|
if is_sentencepiece_available():
|
||||||
from .tokenization_llama import LlamaTokenizer
|
from .tokenization_llama import LlamaTokenizer
|
||||||
else:
|
else:
|
||||||
|
@ -42,7 +42,6 @@ from ...utils import (
|
|||||||
replace_return_docstrings,
|
replace_return_docstrings,
|
||||||
)
|
)
|
||||||
from ...utils.deprecation import deprecate_kwarg
|
from ...utils.deprecation import deprecate_kwarg
|
||||||
from ...utils.import_utils import is_torch_fx_available
|
|
||||||
from .configuration_phimoe import PhimoeConfig
|
from .configuration_phimoe import PhimoeConfig
|
||||||
|
|
||||||
|
|
||||||
@ -51,8 +50,7 @@ if is_flash_attn_available():
|
|||||||
|
|
||||||
# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
|
# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
|
||||||
# It means that the function will not be traced through and simply appear as a node in the graph.
|
# It means that the function will not be traced through and simply appear as a node in the graph.
|
||||||
if is_torch_fx_available():
|
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
|
||||||
_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
@ -171,7 +171,7 @@ class ViltEmbeddings(nn.Module):
|
|||||||
select = torch.cat(select, dim=0)
|
select = torch.cat(select, dim=0)
|
||||||
x = x[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels)
|
x = x[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels)
|
||||||
x_mask = x_mask[select[:, 0], select[:, 1]].view(batch_size, -1)
|
x_mask = x_mask[select[:, 0], select[:, 1]].view(batch_size, -1)
|
||||||
# `patch_index` should be on the same device as `select` (for torch>=1.13), which is ensured at definition time.
|
# `patch_index` should be on the same device as `select`, which is ensured at definition time.
|
||||||
patch_index = patch_index[select[:, 0], select[:, 1]].view(batch_size, -1, 2)
|
patch_index = patch_index[select[:, 0], select[:, 1]].view(batch_size, -1, 2)
|
||||||
pos_embed = pos_embed[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels)
|
pos_embed = pos_embed[select[:, 0], select[:, 1]].view(batch_size, -1, num_channels)
|
||||||
|
|
||||||
|
@ -25,7 +25,6 @@ from torch.optim.lr_scheduler import LambdaLR, ReduceLROnPlateau
|
|||||||
from .trainer_pt_utils import LayerWiseDummyOptimizer, LayerWiseDummyScheduler
|
from .trainer_pt_utils import LayerWiseDummyOptimizer, LayerWiseDummyScheduler
|
||||||
from .trainer_utils import SchedulerType
|
from .trainer_utils import SchedulerType
|
||||||
from .utils import logging
|
from .utils import logging
|
||||||
from .utils.versions import require_version
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
@ -701,7 +700,6 @@ class Adafactor(Optimizer):
|
|||||||
relative_step=True,
|
relative_step=True,
|
||||||
warmup_init=False,
|
warmup_init=False,
|
||||||
):
|
):
|
||||||
require_version("torch>=1.5.0") # add_ with alpha
|
|
||||||
if lr is not None and relative_step:
|
if lr is not None and relative_step:
|
||||||
raise ValueError("Cannot combine manual `lr` and `relative_step=True` options")
|
raise ValueError("Cannot combine manual `lr` and `relative_step=True` options")
|
||||||
if warmup_init and not relative_step:
|
if warmup_init and not relative_step:
|
||||||
|
@ -138,7 +138,6 @@ from .utils import (
|
|||||||
is_tokenizers_available,
|
is_tokenizers_available,
|
||||||
is_torch_available,
|
is_torch_available,
|
||||||
is_torch_bf16_available_on_device,
|
is_torch_bf16_available_on_device,
|
||||||
is_torch_bf16_cpu_available,
|
|
||||||
is_torch_bf16_gpu_available,
|
is_torch_bf16_gpu_available,
|
||||||
is_torch_deterministic,
|
is_torch_deterministic,
|
||||||
is_torch_fp16_available_on_device,
|
is_torch_fp16_available_on_device,
|
||||||
@ -1073,14 +1072,6 @@ def require_torch_bf16_gpu(test_case):
|
|||||||
)(test_case)
|
)(test_case)
|
||||||
|
|
||||||
|
|
||||||
def require_torch_bf16_cpu(test_case):
|
|
||||||
"""Decorator marking a test that requires torch>=1.10, using CPU."""
|
|
||||||
return unittest.skipUnless(
|
|
||||||
is_torch_bf16_cpu_available(),
|
|
||||||
"test requires torch>=1.10, using CPU",
|
|
||||||
)(test_case)
|
|
||||||
|
|
||||||
|
|
||||||
def require_deterministic_for_xpu(test_case):
|
def require_deterministic_for_xpu(test_case):
|
||||||
if is_torch_xpu_available():
|
if is_torch_xpu_available():
|
||||||
return unittest.skipUnless(is_torch_deterministic(), "test requires torch to use deterministic algorithms")(
|
return unittest.skipUnless(is_torch_deterministic(), "test requires torch to use deterministic algorithms")(
|
||||||
|
@ -164,7 +164,6 @@ from .utils import (
|
|||||||
is_sagemaker_dp_enabled,
|
is_sagemaker_dp_enabled,
|
||||||
is_sagemaker_mp_enabled,
|
is_sagemaker_mp_enabled,
|
||||||
is_schedulefree_available,
|
is_schedulefree_available,
|
||||||
is_torch_compile_available,
|
|
||||||
is_torch_hpu_available,
|
is_torch_hpu_available,
|
||||||
is_torch_mlu_available,
|
is_torch_mlu_available,
|
||||||
is_torch_mps_available,
|
is_torch_mps_available,
|
||||||
@ -257,7 +256,7 @@ if is_accelerate_available("0.28.0"):
|
|||||||
|
|
||||||
def _is_peft_model(model):
|
def _is_peft_model(model):
|
||||||
if is_peft_available():
|
if is_peft_available():
|
||||||
classes_to_check = (PeftModel,) if is_peft_available() else ()
|
classes_to_check = (PeftModel,)
|
||||||
# Here we also check if the model is an instance of `PeftMixedModel` introduced in peft>=0.7.0: https://github.com/huggingface/transformers/pull/28321
|
# Here we also check if the model is an instance of `PeftMixedModel` introduced in peft>=0.7.0: https://github.com/huggingface/transformers/pull/28321
|
||||||
if version.parse(importlib.metadata.version("peft")) >= version.parse("0.7.0"):
|
if version.parse(importlib.metadata.version("peft")) >= version.parse("0.7.0"):
|
||||||
from peft import PeftMixedModel
|
from peft import PeftMixedModel
|
||||||
@ -797,10 +796,6 @@ class Trainer:
|
|||||||
# very last
|
# very last
|
||||||
self._memory_tracker.stop_and_update_metrics()
|
self._memory_tracker.stop_and_update_metrics()
|
||||||
|
|
||||||
# torch.compile
|
|
||||||
if args.torch_compile and not is_torch_compile_available():
|
|
||||||
raise RuntimeError("Using torch.compile requires PyTorch 2.0 or higher.")
|
|
||||||
|
|
||||||
self.is_fsdp_xla_v2_enabled = args.fsdp_config.get("xla_fsdp_v2", False)
|
self.is_fsdp_xla_v2_enabled = args.fsdp_config.get("xla_fsdp_v2", False)
|
||||||
if self.is_fsdp_xla_v2_enabled:
|
if self.is_fsdp_xla_v2_enabled:
|
||||||
if not IS_XLA_FSDPV2_POST_2_2:
|
if not IS_XLA_FSDPV2_POST_2_2:
|
||||||
@ -1987,7 +1982,7 @@ class Trainer:
|
|||||||
if self.accelerator.unwrap_model(model) is not model:
|
if self.accelerator.unwrap_model(model) is not model:
|
||||||
return model
|
return model
|
||||||
|
|
||||||
# Mixed precision training with apex (torch < 1.6)
|
# Mixed precision training with apex
|
||||||
if self.use_apex and training:
|
if self.use_apex and training:
|
||||||
model, self.optimizer = amp.initialize(model, self.optimizer, opt_level=self.args.fp16_opt_level)
|
model, self.optimizer = amp.initialize(model, self.optimizer, opt_level=self.args.fp16_opt_level)
|
||||||
|
|
||||||
@ -3739,7 +3734,7 @@ class Trainer:
|
|||||||
torch.musa.empty_cache()
|
torch.musa.empty_cache()
|
||||||
elif is_torch_npu_available():
|
elif is_torch_npu_available():
|
||||||
torch.npu.empty_cache()
|
torch.npu.empty_cache()
|
||||||
elif is_torch_mps_available(min_version="2.0"):
|
elif is_torch_mps_available():
|
||||||
torch.mps.empty_cache()
|
torch.mps.empty_cache()
|
||||||
elif is_torch_hpu_available():
|
elif is_torch_hpu_available():
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
@ -44,7 +44,6 @@ from .utils import (
|
|||||||
is_sagemaker_dp_enabled,
|
is_sagemaker_dp_enabled,
|
||||||
is_sagemaker_mp_enabled,
|
is_sagemaker_mp_enabled,
|
||||||
is_torch_available,
|
is_torch_available,
|
||||||
is_torch_bf16_cpu_available,
|
|
||||||
is_torch_bf16_gpu_available,
|
is_torch_bf16_gpu_available,
|
||||||
is_torch_hpu_available,
|
is_torch_hpu_available,
|
||||||
is_torch_mlu_available,
|
is_torch_mlu_available,
|
||||||
@ -1161,7 +1160,6 @@ class TrainingArguments:
|
|||||||
"help": (
|
"help": (
|
||||||
"Number of batches loaded in advance by each worker. "
|
"Number of batches loaded in advance by each worker. "
|
||||||
"2 means there will be a total of 2 * num_workers batches prefetched across all workers. "
|
"2 means there will be a total of 2 * num_workers batches prefetched across all workers. "
|
||||||
"Default is 2 for PyTorch < 2.0.0 and otherwise None."
|
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -1681,7 +1679,7 @@ class TrainingArguments:
|
|||||||
self.half_precision_backend = self.fp16_backend
|
self.half_precision_backend = self.fp16_backend
|
||||||
|
|
||||||
if self.bf16 or self.bf16_full_eval:
|
if self.bf16 or self.bf16_full_eval:
|
||||||
if self.use_cpu and not is_torch_bf16_cpu_available() and not is_torch_xla_available():
|
if self.use_cpu and not is_torch_available() and not is_torch_xla_available():
|
||||||
# cpu
|
# cpu
|
||||||
raise ValueError("Your setup doesn't support bf16/(cpu, tpu, neuroncore). You need torch>=1.10")
|
raise ValueError("Your setup doesn't support bf16/(cpu, tpu, neuroncore). You need torch>=1.10")
|
||||||
elif not self.use_cpu:
|
elif not self.use_cpu:
|
||||||
|
@ -61,10 +61,7 @@ from ..models.auto.modeling_auto import (
|
|||||||
)
|
)
|
||||||
from .import_utils import (
|
from .import_utils import (
|
||||||
ENV_VARS_TRUE_VALUES,
|
ENV_VARS_TRUE_VALUES,
|
||||||
TORCH_FX_REQUIRED_VERSION,
|
|
||||||
get_torch_version,
|
|
||||||
is_peft_available,
|
is_peft_available,
|
||||||
is_torch_fx_available,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -891,12 +888,6 @@ class HFTracer(Tracer):
|
|||||||
def __init__(self, autowrap_modules=(math,), autowrap_functions=()):
|
def __init__(self, autowrap_modules=(math,), autowrap_functions=()):
|
||||||
super().__init__(autowrap_modules=autowrap_modules, autowrap_functions=autowrap_functions)
|
super().__init__(autowrap_modules=autowrap_modules, autowrap_functions=autowrap_functions)
|
||||||
|
|
||||||
if not is_torch_fx_available():
|
|
||||||
raise ImportError(
|
|
||||||
f"Found an incompatible version of torch. Found version {get_torch_version()}, but only version "
|
|
||||||
f"{TORCH_FX_REQUIRED_VERSION} is supported."
|
|
||||||
)
|
|
||||||
|
|
||||||
def _generate_dummy_input(
|
def _generate_dummy_input(
|
||||||
self, model: "PreTrainedModel", input_name: str, shape: list[int], input_names: list[str]
|
self, model: "PreTrainedModel", input_name: str, shape: list[int], input_names: list[str]
|
||||||
) -> dict[str, torch.Tensor]:
|
) -> dict[str, torch.Tensor]:
|
||||||
|
@ -222,6 +222,10 @@ _torch_version = "N/A"
|
|||||||
_torch_available = False
|
_torch_available = False
|
||||||
if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES:
|
if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES:
|
||||||
_torch_available, _torch_version = _is_package_available("torch", return_version=True)
|
_torch_available, _torch_version = _is_package_available("torch", return_version=True)
|
||||||
|
if _torch_available:
|
||||||
|
_torch_available = version.parse(_torch_version) >= version.parse("2.1.0")
|
||||||
|
if not _torch_available:
|
||||||
|
logger.warning(f"Disabling PyTorch because PyTorch >= 2.1 is required but found {_torch_version}")
|
||||||
else:
|
else:
|
||||||
logger.info("Disabling PyTorch because USE_TF is set")
|
logger.info("Disabling PyTorch because USE_TF is set")
|
||||||
_torch_available = False
|
_torch_available = False
|
||||||
@ -310,15 +314,6 @@ if USE_JAX in ENV_VARS_TRUE_AND_AUTO_VALUES:
|
|||||||
_jax_version = _flax_version = "N/A"
|
_jax_version = _flax_version = "N/A"
|
||||||
|
|
||||||
|
|
||||||
_torch_fx_available = False
|
|
||||||
if _torch_available:
|
|
||||||
torch_version = version.parse(_torch_version)
|
|
||||||
_torch_fx_available = (torch_version.major, torch_version.minor) >= (
|
|
||||||
TORCH_FX_REQUIRED_VERSION.major,
|
|
||||||
TORCH_FX_REQUIRED_VERSION.minor,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
_torch_xla_available = False
|
_torch_xla_available = False
|
||||||
if USE_TORCH_XLA in ENV_VARS_TRUE_VALUES:
|
if USE_TORCH_XLA in ENV_VARS_TRUE_VALUES:
|
||||||
_torch_xla_available, _torch_xla_version = _is_package_available("torch_xla", return_version=True)
|
_torch_xla_available, _torch_xla_version = _is_package_available("torch_xla", return_version=True)
|
||||||
@ -526,19 +521,8 @@ def is_torch_bf16_gpu_available():
|
|||||||
return torch.cuda.is_available() and torch.cuda.is_bf16_supported()
|
return torch.cuda.is_available() and torch.cuda.is_bf16_supported()
|
||||||
|
|
||||||
|
|
||||||
def is_torch_bf16_cpu_available():
|
def is_torch_bf16_cpu_available() -> bool:
|
||||||
if not is_torch_available():
|
return is_torch_available()
|
||||||
return False
|
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
try:
|
|
||||||
# multiple levels of AttributeError depending on the pytorch version so do them all in one check
|
|
||||||
_ = torch.cpu.amp.autocast
|
|
||||||
except AttributeError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def is_torch_bf16_available():
|
def is_torch_bf16_available():
|
||||||
@ -618,16 +602,11 @@ def is_torch_tf32_available():
|
|||||||
return False
|
return False
|
||||||
if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
|
if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
|
||||||
return False
|
return False
|
||||||
if int(torch.version.cuda.split(".")[0]) < 11:
|
|
||||||
return False
|
|
||||||
if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.7"):
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def is_torch_fx_available():
|
def is_torch_fx_available():
|
||||||
return _torch_fx_available
|
return is_torch_available()
|
||||||
|
|
||||||
|
|
||||||
def is_peft_available():
|
def is_peft_available():
|
||||||
@ -832,21 +811,11 @@ def is_habana_gaudi1():
|
|||||||
|
|
||||||
|
|
||||||
def is_torchdynamo_available():
|
def is_torchdynamo_available():
|
||||||
if not is_torch_available():
|
return is_torch_available()
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def is_torch_compile_available():
|
def is_torch_compile_available():
|
||||||
if not is_torch_available():
|
return is_torch_available()
|
||||||
return False
|
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
# We don't do any version check here to support nighlies marked as 1.14. Ultimately needs to check version against
|
|
||||||
# 2.0 but let's do it later.
|
|
||||||
return hasattr(torch, "compile")
|
|
||||||
|
|
||||||
|
|
||||||
def is_torchdynamo_compiling():
|
def is_torchdynamo_compiling():
|
||||||
@ -979,10 +948,10 @@ def is_torch_xpu_available(check_device=False):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
torch_version = version.parse(_torch_version)
|
torch_version = version.parse(_torch_version)
|
||||||
if torch_version.major < 2 or (torch_version.major == 2 and torch_version.minor < 6):
|
if torch_version.major == 2 and torch_version.minor < 6:
|
||||||
if is_ipex_available():
|
if is_ipex_available():
|
||||||
import intel_extension_for_pytorch # noqa: F401
|
import intel_extension_for_pytorch # noqa: F401
|
||||||
elif torch_version.major < 2 or (torch_version.major == 2 and torch_version.minor < 4):
|
elif torch_version.major == 2 and torch_version.minor < 4:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
@ -323,7 +323,6 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
|
|
||||||
@require_torch_multi_accelerator
|
@require_torch_multi_accelerator
|
||||||
@slow
|
@slow
|
||||||
@require_fsdp
|
|
||||||
@require_fsdp_v2_version
|
@require_fsdp_v2_version
|
||||||
@require_accelerate_fsdp2
|
@require_accelerate_fsdp2
|
||||||
def test_accelerate_fsdp2_integration(self):
|
def test_accelerate_fsdp2_integration(self):
|
||||||
|
@ -510,7 +510,6 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -273,7 +273,6 @@ class BertGenerationEncoderTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
|
|||||||
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
|
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -506,7 +506,6 @@ class BigBirdModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -354,7 +354,6 @@ class ChineseCLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -409,7 +409,6 @@ class Data2VecTextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTes
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -492,7 +492,6 @@ class ErnieModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -306,7 +306,6 @@ class GPTNeoXModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
|||||||
self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask)
|
self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder()
|
config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder()
|
||||||
|
|
||||||
input_mask = None
|
input_mask = None
|
||||||
|
@ -223,7 +223,6 @@ class GPTNeoXModelJapaneseTest(ModelTesterMixin, GenerationTesterMixin, Pipeline
|
|||||||
self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask)
|
self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder()
|
config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder()
|
||||||
|
|
||||||
input_mask = None
|
input_mask = None
|
||||||
|
@ -23,7 +23,6 @@ import pytest
|
|||||||
|
|
||||||
from transformers import HubertConfig, is_torch_available
|
from transformers import HubertConfig, is_torch_available
|
||||||
from transformers.testing_utils import require_soundfile, require_torch, slow, torch_device
|
from transformers.testing_utils import require_soundfile, require_torch, slow, torch_device
|
||||||
from transformers.utils import is_torch_fx_available
|
|
||||||
|
|
||||||
from ...test_configuration_common import ConfigTester
|
from ...test_configuration_common import ConfigTester
|
||||||
from ...test_modeling_common import (
|
from ...test_modeling_common import (
|
||||||
@ -48,8 +47,7 @@ if is_torch_available():
|
|||||||
)
|
)
|
||||||
from transformers.models.hubert.modeling_hubert import _compute_mask_indices
|
from transformers.models.hubert.modeling_hubert import _compute_mask_indices
|
||||||
|
|
||||||
if is_torch_fx_available():
|
from transformers.utils.fx import symbolic_trace
|
||||||
from transformers.utils.fx import symbolic_trace
|
|
||||||
|
|
||||||
|
|
||||||
class HubertModelTester:
|
class HubertModelTester:
|
||||||
@ -438,8 +436,8 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
|||||||
# TODO: fix it
|
# TODO: fix it
|
||||||
self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
|
self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
|
||||||
|
|
||||||
if not is_torch_fx_available() or not self.fx_compatible:
|
if not self.fx_compatible:
|
||||||
self.skipTest(reason="torch fx is not available or not compatible with this model")
|
self.skipTest(reason="torch fx is not compatible with this model")
|
||||||
|
|
||||||
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
||||||
configs_no_init.return_dict = False
|
configs_no_init.return_dict = False
|
||||||
|
@ -27,7 +27,7 @@ from transformers.testing_utils import (
|
|||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
)
|
)
|
||||||
from transformers.utils import is_torch_fx_available
|
from transformers.utils.fx import symbolic_trace
|
||||||
|
|
||||||
from ...generation.test_utils import GenerationTesterMixin
|
from ...generation.test_utils import GenerationTesterMixin
|
||||||
from ...test_configuration_common import ConfigTester
|
from ...test_configuration_common import ConfigTester
|
||||||
@ -35,9 +35,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
|
|||||||
from ...test_pipeline_mixin import PipelineTesterMixin
|
from ...test_pipeline_mixin import PipelineTesterMixin
|
||||||
|
|
||||||
|
|
||||||
if is_torch_fx_available():
|
|
||||||
from transformers.utils.fx import symbolic_trace
|
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
@ -598,8 +595,8 @@ class MT5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
|
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
|
||||||
if not is_torch_fx_available() or not self.fx_compatible:
|
if not self.fx_compatible:
|
||||||
self.skipTest(reason="torch.fx is not available or not compatible with this model")
|
self.skipTest(reason="torch.fx is not compatible with this model")
|
||||||
|
|
||||||
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
||||||
configs_no_init.return_dict = False
|
configs_no_init.return_dict = False
|
||||||
|
@ -416,7 +416,6 @@ class RemBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -417,7 +417,6 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -421,7 +421,6 @@ class RobertaPreLayerNormModelTest(ModelTesterMixin, GenerationTesterMixin, Pipe
|
|||||||
|
|
||||||
# Copied from tests.models.roberta.test_modeling_roberta.RobertaModelTest.test_model_as_decoder_with_default_input_mask
|
# Copied from tests.models.roberta.test_modeling_roberta.RobertaModelTest.test_model_as_decoder_with_default_input_mask
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -664,7 +664,6 @@ class RoCBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -433,7 +433,6 @@ class RoFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -32,7 +32,8 @@ from transformers.testing_utils import (
|
|||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
)
|
)
|
||||||
from transformers.utils import cached_property, is_torch_fx_available
|
from transformers.utils import cached_property
|
||||||
|
from transformers.utils.fx import symbolic_trace
|
||||||
|
|
||||||
from ...generation.test_utils import GenerationTesterMixin
|
from ...generation.test_utils import GenerationTesterMixin
|
||||||
from ...test_configuration_common import ConfigTester
|
from ...test_configuration_common import ConfigTester
|
||||||
@ -40,10 +41,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
|
|||||||
from ...test_pipeline_mixin import PipelineTesterMixin
|
from ...test_pipeline_mixin import PipelineTesterMixin
|
||||||
|
|
||||||
|
|
||||||
if is_torch_fx_available():
|
|
||||||
from transformers.utils.fx import symbolic_trace
|
|
||||||
|
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
@ -603,8 +600,8 @@ class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
|
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
|
||||||
if not is_torch_fx_available() or not self.fx_compatible:
|
if not self.fx_compatible:
|
||||||
self.skipTest(reason="torch.fx is not available or not compatible with this model")
|
self.skipTest(reason="torch.fx is not compatible with this model")
|
||||||
|
|
||||||
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
||||||
configs_no_init.return_dict = False
|
configs_no_init.return_dict = False
|
||||||
|
@ -27,7 +27,7 @@ from transformers.testing_utils import (
|
|||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
)
|
)
|
||||||
from transformers.utils import is_torch_fx_available
|
from transformers.utils.fx import symbolic_trace
|
||||||
|
|
||||||
from ...generation.test_utils import GenerationTesterMixin
|
from ...generation.test_utils import GenerationTesterMixin
|
||||||
from ...test_configuration_common import ConfigTester
|
from ...test_configuration_common import ConfigTester
|
||||||
@ -35,10 +35,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
|
|||||||
from ...test_pipeline_mixin import PipelineTesterMixin
|
from ...test_pipeline_mixin import PipelineTesterMixin
|
||||||
|
|
||||||
|
|
||||||
if is_torch_fx_available():
|
|
||||||
from transformers.utils.fx import symbolic_trace
|
|
||||||
|
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
@ -300,8 +296,8 @@ class UMT5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
|
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
|
||||||
if not is_torch_fx_available() or not self.fx_compatible:
|
if not self.fx_compatible:
|
||||||
self.skipTest(reason="torch fx is not available or not compatible with this model")
|
self.skipTest(reason="torch fx is not compatible with this model")
|
||||||
|
|
||||||
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
||||||
configs_no_init.return_dict = False
|
configs_no_init.return_dict = False
|
||||||
|
@ -42,7 +42,6 @@ from transformers.testing_utils import (
|
|||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
)
|
)
|
||||||
from transformers.utils import is_torch_fx_available
|
|
||||||
|
|
||||||
from ...test_configuration_common import ConfigTester
|
from ...test_configuration_common import ConfigTester
|
||||||
from ...test_modeling_common import (
|
from ...test_modeling_common import (
|
||||||
@ -90,8 +89,7 @@ if is_pyctcdecode_available():
|
|||||||
from transformers.models.wav2vec2_with_lm import processing_wav2vec2_with_lm
|
from transformers.models.wav2vec2_with_lm import processing_wav2vec2_with_lm
|
||||||
|
|
||||||
|
|
||||||
if is_torch_fx_available():
|
from transformers.utils.fx import symbolic_trace
|
||||||
from transformers.utils.fx import symbolic_trace
|
|
||||||
|
|
||||||
|
|
||||||
def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
|
def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
|
||||||
@ -716,8 +714,8 @@ class Wav2Vec2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
|||||||
# TODO: fix it
|
# TODO: fix it
|
||||||
self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
|
self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
|
||||||
|
|
||||||
if not is_torch_fx_available() or not self.fx_compatible:
|
if not self.fx_compatible:
|
||||||
self.skipTest(reason="torch fx not available or not compatible with this model")
|
self.skipTest(reason="torch fx is not compatible with this model")
|
||||||
|
|
||||||
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
||||||
configs_no_init.return_dict = False
|
configs_no_init.return_dict = False
|
||||||
|
@ -425,7 +425,6 @@ class XLMRobertaXLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTes
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -420,7 +420,6 @@ class XmodModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
|||||||
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
|
||||||
|
|
||||||
def test_model_as_decoder_with_default_input_mask(self):
|
def test_model_as_decoder_with_default_input_mask(self):
|
||||||
# This regression test was failing with PyTorch < 1.3
|
|
||||||
(
|
(
|
||||||
config,
|
config,
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -101,7 +101,6 @@ from transformers.utils import (
|
|||||||
is_accelerate_available,
|
is_accelerate_available,
|
||||||
is_torch_bf16_available_on_device,
|
is_torch_bf16_available_on_device,
|
||||||
is_torch_fp16_available_on_device,
|
is_torch_fp16_available_on_device,
|
||||||
is_torch_fx_available,
|
|
||||||
is_torch_sdpa_available,
|
is_torch_sdpa_available,
|
||||||
)
|
)
|
||||||
from transformers.utils.generic import ContextManagers
|
from transformers.utils.generic import ContextManagers
|
||||||
@ -125,8 +124,8 @@ if is_torch_available():
|
|||||||
from transformers.modeling_utils import load_state_dict, no_init_weights
|
from transformers.modeling_utils import load_state_dict, no_init_weights
|
||||||
from transformers.pytorch_utils import id_tensor_storage
|
from transformers.pytorch_utils import id_tensor_storage
|
||||||
|
|
||||||
if is_torch_fx_available():
|
from transformers.utils.fx import _FX_SUPPORTED_MODELS_WITH_KV_CACHE, symbolic_trace
|
||||||
from transformers.utils.fx import _FX_SUPPORTED_MODELS_WITH_KV_CACHE, symbolic_trace
|
|
||||||
|
|
||||||
if is_deepspeed_available():
|
if is_deepspeed_available():
|
||||||
import deepspeed
|
import deepspeed
|
||||||
@ -1190,10 +1189,8 @@ class ModelTesterMixin:
|
|||||||
self._create_and_check_torch_fx_tracing(config, inputs_dict, output_loss=True)
|
self._create_and_check_torch_fx_tracing(config, inputs_dict, output_loss=True)
|
||||||
|
|
||||||
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
|
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
|
||||||
if not is_torch_fx_available() or not self.fx_compatible:
|
if not self.fx_compatible:
|
||||||
self.skipTest(
|
self.skipTest(f"The model type {config.model_type} is not compatible with torch.fx")
|
||||||
f"Either torch.fx is not available, or the model type {config.model_type} is not compatible with torch.fx"
|
|
||||||
)
|
|
||||||
|
|
||||||
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
||||||
configs_no_init.return_dict = False
|
configs_no_init.return_dict = False
|
||||||
|
@ -99,7 +99,6 @@ from transformers.testing_utils import (
|
|||||||
require_torch_tensorrt_fx,
|
require_torch_tensorrt_fx,
|
||||||
require_torch_tf32,
|
require_torch_tf32,
|
||||||
require_torch_up_to_2_accelerators,
|
require_torch_up_to_2_accelerators,
|
||||||
require_torchdynamo,
|
|
||||||
require_vision,
|
require_vision,
|
||||||
require_wandb,
|
require_wandb,
|
||||||
run_first,
|
run_first,
|
||||||
@ -3994,10 +3993,9 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
|
|
||||||
@require_non_xpu
|
@require_non_xpu
|
||||||
@require_torch_non_multi_gpu
|
@require_torch_non_multi_gpu
|
||||||
@require_torchdynamo
|
|
||||||
@require_torch_tensorrt_fx
|
@require_torch_tensorrt_fx
|
||||||
def test_torchdynamo_full_eval(self):
|
def test_torchdynamo_full_eval(self):
|
||||||
import torchdynamo
|
from torch import _dynamo as torchdynamo
|
||||||
|
|
||||||
# torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu
|
# torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu
|
||||||
n_gpus = get_gpu_count()
|
n_gpus = get_gpu_count()
|
||||||
@ -4017,30 +4015,35 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
del trainer
|
del trainer
|
||||||
|
|
||||||
# 2. TorchDynamo eager
|
# 2. TorchDynamo eager
|
||||||
trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="eager", output_dir=tmp_dir)
|
trainer = get_regression_trainer(
|
||||||
|
a=a, b=b, eval_len=eval_len, torch_compile_backend="eager", output_dir=tmp_dir
|
||||||
|
)
|
||||||
metrics = trainer.evaluate()
|
metrics = trainer.evaluate()
|
||||||
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
|
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
|
||||||
del trainer
|
del trainer
|
||||||
torchdynamo.reset()
|
torchdynamo.reset()
|
||||||
|
|
||||||
# 3. TorchDynamo nvfuser
|
# 3. TorchDynamo nvfuser
|
||||||
trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="nvfuser", output_dir=tmp_dir)
|
trainer = get_regression_trainer(
|
||||||
|
a=a, b=b, eval_len=eval_len, torch_compile_backend="nvfuser", output_dir=tmp_dir
|
||||||
|
)
|
||||||
metrics = trainer.evaluate()
|
metrics = trainer.evaluate()
|
||||||
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
|
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
|
||||||
torchdynamo.reset()
|
torchdynamo.reset()
|
||||||
|
|
||||||
# 4. TorchDynamo fx2trt
|
# 4. TorchDynamo fx2trt
|
||||||
trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="fx2trt", output_dir=tmp_dir)
|
trainer = get_regression_trainer(
|
||||||
|
a=a, b=b, eval_len=eval_len, torch_compile_backend="fx2trt", output_dir=tmp_dir
|
||||||
|
)
|
||||||
metrics = trainer.evaluate()
|
metrics = trainer.evaluate()
|
||||||
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
|
self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
|
||||||
torchdynamo.reset()
|
torchdynamo.reset()
|
||||||
|
|
||||||
@unittest.skip(reason="torch 2.0.0 gives `ModuleNotFoundError: No module named 'torchdynamo'`.")
|
|
||||||
@require_torch_non_multi_gpu
|
@require_torch_non_multi_gpu
|
||||||
@require_torchdynamo
|
@require_torch_gpu
|
||||||
def test_torchdynamo_memory(self):
|
def test_torchdynamo_memory(self):
|
||||||
# torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu
|
# torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu
|
||||||
import torchdynamo
|
from torch import _dynamo as torchdynamo
|
||||||
|
|
||||||
class CustomTrainer(Trainer):
|
class CustomTrainer(Trainer):
|
||||||
def compute_loss(self, model, inputs, return_outputs=False):
|
def compute_loss(self, model, inputs, return_outputs=False):
|
||||||
@ -4085,7 +4088,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
a = torch.ones(1024, 1024, device="cuda", requires_grad=True)
|
a = torch.ones(1024, 1024, device="cuda", requires_grad=True)
|
||||||
a.grad = None
|
a.grad = None
|
||||||
args = TrainingArguments(output_dir=tmp_dir, torchdynamo="nvfuser")
|
args = TrainingArguments(output_dir=tmp_dir, torch_compile_backend="nvfuser")
|
||||||
trainer = CustomTrainer(model=mod, args=args)
|
trainer = CustomTrainer(model=mod, args=args)
|
||||||
# warmup
|
# warmup
|
||||||
for _ in range(10):
|
for _ in range(10):
|
||||||
|
@ -21,7 +21,6 @@ from transformers.testing_utils import (
|
|||||||
get_torch_dist_unique_port,
|
get_torch_dist_unique_port,
|
||||||
require_accelerate,
|
require_accelerate,
|
||||||
require_fp8,
|
require_fp8,
|
||||||
require_fsdp,
|
|
||||||
require_torch_multi_accelerator,
|
require_torch_multi_accelerator,
|
||||||
run_first,
|
run_first,
|
||||||
torch_device,
|
torch_device,
|
||||||
@ -68,7 +67,6 @@ if is_torch_available():
|
|||||||
class TestFSDPTrainer(TestCasePlus):
|
class TestFSDPTrainer(TestCasePlus):
|
||||||
@require_torch_multi_accelerator
|
@require_torch_multi_accelerator
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
@require_fsdp
|
|
||||||
@run_first
|
@run_first
|
||||||
def test_trainer(self):
|
def test_trainer(self):
|
||||||
output_dir = self.get_auto_remove_tmp_dir()
|
output_dir = self.get_auto_remove_tmp_dir()
|
||||||
@ -95,7 +93,6 @@ class TestFSDPTrainer(TestCasePlus):
|
|||||||
class TestFSDPTrainerFP8(TestCasePlus):
|
class TestFSDPTrainerFP8(TestCasePlus):
|
||||||
@require_torch_multi_accelerator
|
@require_torch_multi_accelerator
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
@require_fsdp
|
|
||||||
@require_fp8
|
@require_fp8
|
||||||
@run_first
|
@run_first
|
||||||
def test_trainer(self):
|
def test_trainer(self):
|
||||||
@ -125,7 +122,6 @@ class TestFSDPTrainerFP8(TestCasePlus):
|
|||||||
class TestFSDPTrainerWrap(TestCasePlus):
|
class TestFSDPTrainerWrap(TestCasePlus):
|
||||||
@require_torch_multi_accelerator
|
@require_torch_multi_accelerator
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
@require_fsdp
|
|
||||||
@run_first
|
@run_first
|
||||||
def test_trainer(self):
|
def test_trainer(self):
|
||||||
output_dir = self.get_auto_remove_tmp_dir()
|
output_dir = self.get_auto_remove_tmp_dir()
|
||||||
|
@ -81,7 +81,6 @@ from transformers.utils.import_utils import (
|
|||||||
is_tf_available,
|
is_tf_available,
|
||||||
is_torch_npu_available,
|
is_torch_npu_available,
|
||||||
is_torch_sdpa_available,
|
is_torch_sdpa_available,
|
||||||
is_torchdynamo_available,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1483,8 +1482,6 @@ class ModelUtilsTest(TestCasePlus):
|
|||||||
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
|
||||||
self.assertIn("You may ignore this warning if your `pad_token_id`", cl.out)
|
self.assertIn("You may ignore this warning if your `pad_token_id`", cl.out)
|
||||||
|
|
||||||
if not is_torchdynamo_available():
|
|
||||||
self.skipTest(reason="torchdynamo is not available")
|
|
||||||
with self.subTest("Ensure that the warning code is skipped when compiling with torchdynamo."):
|
with self.subTest("Ensure that the warning code is skipped when compiling with torchdynamo."):
|
||||||
logger.warning_once.cache_clear()
|
logger.warning_once.cache_clear()
|
||||||
from torch._dynamo import config, testing
|
from torch._dynamo import config, testing
|
||||||
|
@ -86,7 +86,7 @@ class DependencyVersionCheckTest(TestCasePlus):
|
|||||||
|
|
||||||
def test_python(self):
|
def test_python(self):
|
||||||
# matching requirement
|
# matching requirement
|
||||||
require_version("python>=3.6.0")
|
require_version("python>=3.9.0")
|
||||||
|
|
||||||
# not matching requirements
|
# not matching requirements
|
||||||
for req in ["python>9.9.9", "python<3.0.0"]:
|
for req in ["python>9.9.9", "python<3.0.0"]:
|
||||||
|
Loading…
Reference in New Issue
Block a user