mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 10:12:23 +06:00
Byebye pytorch 1.9 (#24080)
byebye --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
62d71f4083
commit
896a58de15
@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: ["1.13", "1.12", "1.11", "1.10", "1.9"]
|
||||
version: ["1.13", "1.12", "1.11", "1.10"]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
|
@ -67,21 +67,10 @@ jobs:
|
||||
sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-9:
|
||||
name: PyTorch 1.9
|
||||
if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
|
||||
needs: [run_past_ci_pytorch_1-10]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.9"
|
||||
sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-11:
|
||||
name: TensorFlow 2.11
|
||||
if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
|
||||
needs: [run_past_ci_pytorch_1-9]
|
||||
needs: [run_past_ci_pytorch_1-10]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
|
@ -24,7 +24,7 @@ ARG FRAMEWORK
|
||||
ARG VERSION
|
||||
|
||||
# Control `setuptools` version to avoid some issues
|
||||
RUN [ "$VERSION" != "1.9" -a "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5"
|
||||
RUN [ "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5"
|
||||
|
||||
# Remove all frameworks
|
||||
RUN python3 -m pip uninstall -y torch torchvision torchaudio tensorflow jax flax
|
||||
|
@ -33,19 +33,13 @@ from ...modeling_outputs import (
|
||||
SequenceClassifierOutput,
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel, apply_chunking_to_forward
|
||||
from ...pytorch_utils import find_pruneable_heads_and_indices, is_torch_greater_or_equal_than_1_10, prune_linear_layer
|
||||
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
|
||||
from .configuration_bridgetower import BridgeTowerConfig, BridgeTowerTextConfig, BridgeTowerVisionConfig
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
if not is_torch_greater_or_equal_than_1_10:
|
||||
logger.warning(
|
||||
f"You are using torch=={torch.__version__}, but torch>=1.10.0 is required to use "
|
||||
"BridgeTowerModel. Please upgrade torch."
|
||||
)
|
||||
|
||||
_CONFIG_FOR_DOC = "BridgeTowerConfig"
|
||||
_CHECKPOINT_FOR_DOC = "BridgeTower/bridgetower-base"
|
||||
_TOKENIZER_FOR_DOC = "RobertaTokenizer"
|
||||
|
@ -36,7 +36,6 @@ from ...modeling_outputs import (
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import (
|
||||
find_pruneable_heads_and_indices,
|
||||
is_torch_greater_or_equal_than_1_10,
|
||||
meshgrid,
|
||||
prune_linear_layer,
|
||||
)
|
||||
@ -46,12 +45,6 @@ from .configuration_vilt import ViltConfig
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
if not is_torch_greater_or_equal_than_1_10:
|
||||
logger.warning(
|
||||
f"You are using torch=={torch.__version__}, but torch>=1.10.0 is required to use "
|
||||
"ViltModel. Please upgrade torch."
|
||||
)
|
||||
|
||||
_CONFIG_FOR_DOC = "ViltConfig"
|
||||
_CHECKPOINT_FOR_DOC = "dandelin/vilt-b32-mlm"
|
||||
|
||||
|
@ -31,7 +31,6 @@ parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_
|
||||
is_torch_greater_or_equal_than_2_0 = parsed_torch_version_base >= version.parse("2.0")
|
||||
is_torch_greater_or_equal_than_1_12 = parsed_torch_version_base >= version.parse("1.12")
|
||||
is_torch_greater_or_equal_than_1_11 = parsed_torch_version_base >= version.parse("1.11")
|
||||
is_torch_greater_or_equal_than_1_10 = parsed_torch_version_base >= version.parse("1.10")
|
||||
is_torch_less_than_1_11 = parsed_torch_version_base < version.parse("1.11")
|
||||
|
||||
|
||||
@ -275,12 +274,7 @@ def meshgrid(
|
||||
|
||||
Reference: https://pytorch.org/docs/1.13/generated/torch.meshgrid.html
|
||||
"""
|
||||
if is_torch_greater_or_equal_than_1_10:
|
||||
return torch.meshgrid(*tensors, indexing=indexing)
|
||||
else:
|
||||
if indexing != "ij":
|
||||
raise ValueError('torch.meshgrid only supports `indexing="ij"` for torch<1.10.')
|
||||
return torch.meshgrid(*tensors)
|
||||
return torch.meshgrid(*tensors, indexing=indexing)
|
||||
|
||||
|
||||
def id_tensor_storage(tensor: torch.Tensor) -> Tuple[torch.device, int, int]:
|
||||
|
@ -70,7 +70,7 @@ from .modelcard import TrainingSummary
|
||||
from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model
|
||||
from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_MAPPING_NAMES
|
||||
from .optimization import Adafactor, get_scheduler
|
||||
from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_10
|
||||
from .pytorch_utils import ALL_LAYERNORM_LAYERS
|
||||
from .tokenization_utils_base import PreTrainedTokenizerBase
|
||||
from .trainer_callback import (
|
||||
CallbackHandler,
|
||||
@ -155,8 +155,6 @@ from .utils import (
|
||||
from .utils.generic import ContextManagers
|
||||
|
||||
|
||||
_is_native_cpu_amp_available = is_torch_greater_or_equal_than_1_10
|
||||
|
||||
DEFAULT_CALLBACKS = [DefaultFlowCallback]
|
||||
DEFAULT_PROGRESS_CALLBACK = ProgressCallback
|
||||
|
||||
@ -621,10 +619,8 @@ class Trainer:
|
||||
if args.device == torch.device("cpu"):
|
||||
if args.fp16:
|
||||
raise ValueError("Tried to use `fp16` but it is not supported on cpu")
|
||||
elif _is_native_cpu_amp_available:
|
||||
args.half_precision_backend = "cpu_amp"
|
||||
else:
|
||||
raise ValueError("Tried to use cpu amp but native cpu amp is not available")
|
||||
args.half_precision_backend = "cpu_amp"
|
||||
else:
|
||||
args.half_precision_backend = "cuda_amp"
|
||||
|
||||
@ -2595,14 +2591,11 @@ class Trainer:
|
||||
arguments, depending on the situation.
|
||||
"""
|
||||
if self.use_cuda_amp or self.use_cpu_amp:
|
||||
if is_torch_greater_or_equal_than_1_10:
|
||||
ctx_manager = (
|
||||
torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
|
||||
if self.use_cpu_amp
|
||||
else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
|
||||
)
|
||||
else:
|
||||
ctx_manager = torch.cuda.amp.autocast()
|
||||
ctx_manager = (
|
||||
torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
|
||||
if self.use_cpu_amp
|
||||
else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
|
||||
)
|
||||
else:
|
||||
ctx_manager = contextlib.nullcontext() if sys.version_info >= (3, 7) else contextlib.suppress()
|
||||
|
||||
|
@ -258,16 +258,12 @@ def is_torch_bf16_gpu_available():
|
||||
# since currently no utility function is available we build our own.
|
||||
# some bits come from https://github.com/pytorch/pytorch/blob/2289a12f21c54da93bf5d696e3f9aea83dd9c10d/torch/testing/_internal/common_cuda.py#L51
|
||||
# with additional check for torch version
|
||||
# to succeed:
|
||||
# 1. torch >= 1.10 (1.9 should be enough for AMP API has changed in 1.10, so using 1.10 as minimal)
|
||||
# 2. the hardware needs to support bf16 (GPU arch >= Ampere, or CPU)
|
||||
# 3. if using gpu, CUDA >= 11
|
||||
# 4. torch.autocast exists
|
||||
# to succeed: (torch is required to be >= 1.10 anyway)
|
||||
# 1. the hardware needs to support bf16 (GPU arch >= Ampere, or CPU)
|
||||
# 2. if using gpu, CUDA >= 11
|
||||
# 3. torch.autocast exists
|
||||
# XXX: one problem here is that it may give invalid results on mixed gpus setup, so it's
|
||||
# really only correct for the 0th gpu (or currently set default device if different from 0)
|
||||
if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.10"):
|
||||
return False
|
||||
|
||||
if torch.cuda.is_available() and torch.version.cuda is not None:
|
||||
if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
|
||||
return False
|
||||
@ -287,9 +283,6 @@ def is_torch_bf16_cpu_available():
|
||||
|
||||
import torch
|
||||
|
||||
if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.10"):
|
||||
return False
|
||||
|
||||
try:
|
||||
# multiple levels of AttributeError depending on the pytorch version so do them all in one check
|
||||
_ = torch.cpu.amp.autocast
|
||||
@ -526,8 +519,6 @@ def is_optimum_neuron_available():
|
||||
|
||||
|
||||
def is_safetensors_available():
|
||||
if is_torch_available() and version.parse(_torch_version) < version.parse("1.10"):
|
||||
return False
|
||||
return _safetensors_available
|
||||
|
||||
|
||||
|
@ -38,9 +38,6 @@ if is_torch_available():
|
||||
BloomModel,
|
||||
BloomTokenizerFast,
|
||||
)
|
||||
from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
|
||||
else:
|
||||
is_torch_greater_or_equal_than_1_10 = False
|
||||
|
||||
|
||||
@require_torch
|
||||
@ -518,10 +515,6 @@ class BloomEmbeddingTest(unittest.TestCase):
|
||||
super().setUp()
|
||||
self.path_bigscience_model = "bigscience/bigscience-small-testing"
|
||||
|
||||
@unittest.skipIf(
|
||||
not is_torch_greater_or_equal_than_1_10,
|
||||
"Test failed with torch < 1.10 (`LayerNormKernelImpl` not implemented for `BFloat16`)",
|
||||
)
|
||||
@require_torch
|
||||
def test_embeddings(self):
|
||||
# The config in this checkpoint has `bfloat16` as `torch_dtype` -> model in `bfloat16`
|
||||
|
@ -50,9 +50,6 @@ if is_torch_available():
|
||||
BridgeTowerModel,
|
||||
)
|
||||
from transformers.models.bridgetower.modeling_bridgetower import BRIDGETOWER_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
|
||||
else:
|
||||
is_torch_greater_or_equal_than_1_10 = False
|
||||
|
||||
if is_vision_available():
|
||||
from PIL import Image
|
||||
@ -298,7 +295,6 @@ class BridgeTowerModelTester:
|
||||
|
||||
|
||||
@require_torch
|
||||
@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
|
||||
class BridgeTowerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (
|
||||
(
|
||||
@ -516,7 +512,6 @@ def prepare_img():
|
||||
|
||||
@require_torch
|
||||
@require_vision
|
||||
@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
|
||||
class BridgeTowerModelIntegrationTest(unittest.TestCase):
|
||||
@cached_property
|
||||
def default_processor(self):
|
||||
@ -601,7 +596,6 @@ class BridgeTowerModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
@require_torch
|
||||
@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
|
||||
class BridgeTowerModelTrainingTest(unittest.TestCase):
|
||||
all_training_supported_model_classes = (
|
||||
(BridgeTowerForImageAndTextRetrieval, BridgeTowerForMaskedLM, BridgeTowerForContrastiveLearning)
|
||||
|
@ -20,8 +20,6 @@ import unittest
|
||||
import warnings
|
||||
from math import ceil, floor
|
||||
|
||||
from packaging import version
|
||||
|
||||
from transformers import LevitConfig
|
||||
from transformers.file_utils import cached_property, is_torch_available, is_vision_available
|
||||
from transformers.models.auto import get_values
|
||||
@ -346,10 +344,6 @@ class LevitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
loss.backward()
|
||||
|
||||
def test_problem_types(self):
|
||||
parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_version)
|
||||
if parsed_torch_version_base.base_version.startswith("1.9"):
|
||||
self.skipTest(reason="This test fails with PyTorch 1.9.x: some CUDA issue")
|
||||
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
problem_types = [
|
||||
|
@ -42,9 +42,6 @@ if is_torch_available():
|
||||
|
||||
from transformers import TvltForAudioVisualClassification, TvltForPreTraining, TvltModel
|
||||
from transformers.models.tvlt.modeling_tvlt import TVLT_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
|
||||
else:
|
||||
is_torch_greater_or_equal_than_1_10 = False
|
||||
|
||||
|
||||
if is_datasets_available():
|
||||
@ -322,7 +319,6 @@ class TvltModelTester:
|
||||
|
||||
|
||||
@require_torch
|
||||
@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "TVLT is only available in torch v1.10+")
|
||||
class TvltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (
|
||||
(TvltModel, TvltForPreTraining, TvltForAudioVisualClassification) if is_torch_available() else ()
|
||||
|
@ -42,9 +42,6 @@ if is_torch_available():
|
||||
ViltModel,
|
||||
)
|
||||
from transformers.models.vilt.modeling_vilt import VILT_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
|
||||
else:
|
||||
is_torch_greater_or_equal_than_1_10 = False
|
||||
|
||||
if is_vision_available():
|
||||
import PIL
|
||||
@ -218,7 +215,6 @@ class ViltModelTester:
|
||||
|
||||
|
||||
@require_torch
|
||||
@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
|
||||
class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (
|
||||
(
|
||||
@ -520,7 +516,6 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@require_torch
|
||||
@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
|
||||
class ViltForImagesAndTextClassificationModelTest(ViltModelTest, unittest.TestCase):
|
||||
all_model_classes = (ViltForImagesAndTextClassification,) if is_torch_available() else ()
|
||||
|
||||
@ -545,7 +540,6 @@ def prepare_img():
|
||||
|
||||
@require_torch
|
||||
@require_vision
|
||||
@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
|
||||
class ViltModelIntegrationTest(unittest.TestCase):
|
||||
@cached_property
|
||||
def default_processor(self):
|
||||
|
Loading…
Reference in New Issue
Block a user