Byebye pytorch 1.9 (#24080)

byebye --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-07-31 10:12:23 +06:00 · 2023-06-16 16:38:23 +02:00 · 2023-06-16 16:38:23 +02:00 · 896a58de15
commit 896a58de15
parent 62d71f4083
13 changed files with 16 additions and 91 deletions
--- a/.github/workflows/build-past-ci-docker-images.yml
+++ b/.github/workflows/build-past-ci-docker-images.yml
@ -15,7 +15,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        version: ["1.13", "1.12", "1.11", "1.10", "1.9"]
+        version: ["1.13", "1.12", "1.11", "1.10"]
    runs-on: ubuntu-latest
    steps:
      -
--- a/.github/workflows/self-nightly-past-ci-caller.yml
+++ b/.github/workflows/self-nightly-past-ci-caller.yml
@ -67,21 +67,10 @@ jobs:
      sha: ${{ github.sha }}
    secrets: inherit

-  run_past_ci_pytorch_1-9:
-    name: PyTorch 1.9
-    if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
-    needs: [run_past_ci_pytorch_1-10]
-    uses: ./.github/workflows/self-past.yml
-    with:
-      framework: pytorch
-      version: "1.9"
-      sha: ${{ github.sha }}
-    secrets: inherit
-
  run_past_ci_tensorflow_2-11:
    name: TensorFlow 2.11
    if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    needs: [run_past_ci_pytorch_1-9]
+    needs: [run_past_ci_pytorch_1-10]
    uses: ./.github/workflows/self-past.yml
    with:
      framework: tensorflow
--- a/docker/transformers-past-gpu/Dockerfile
+++ b/docker/transformers-past-gpu/Dockerfile
@ -24,7 +24,7 @@ ARG FRAMEWORK
 ARG VERSION

 # Control `setuptools` version to avoid some issues
-RUN [ "$VERSION" != "1.9" -a "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5"
+RUN [ "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5"

 # Remove all frameworks
 RUN python3 -m pip uninstall -y torch torchvision torchaudio tensorflow jax flax
--- a/src/transformers/models/bridgetower/modeling_bridgetower.py
+++ b/src/transformers/models/bridgetower/modeling_bridgetower.py
@ -33,19 +33,13 @@ from ...modeling_outputs import (
    SequenceClassifierOutput,
 )
 from ...modeling_utils import PreTrainedModel, apply_chunking_to_forward
-from ...pytorch_utils import find_pruneable_heads_and_indices, is_torch_greater_or_equal_than_1_10, prune_linear_layer
+from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
 from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
 from .configuration_bridgetower import BridgeTowerConfig, BridgeTowerTextConfig, BridgeTowerVisionConfig


 logger = logging.get_logger(__name__)

-if not is_torch_greater_or_equal_than_1_10:
-    logger.warning(
-        f"You are using torch=={torch.__version__}, but torch>=1.10.0 is required to use "
-        "BridgeTowerModel. Please upgrade torch."
-    )
-
 _CONFIG_FOR_DOC = "BridgeTowerConfig"
 _CHECKPOINT_FOR_DOC = "BridgeTower/bridgetower-base"
 _TOKENIZER_FOR_DOC = "RobertaTokenizer"
--- a/src/transformers/models/vilt/modeling_vilt.py
+++ b/src/transformers/models/vilt/modeling_vilt.py
@ -36,7 +36,6 @@ from ...modeling_outputs import (
 from ...modeling_utils import PreTrainedModel
 from ...pytorch_utils import (
    find_pruneable_heads_and_indices,
-    is_torch_greater_or_equal_than_1_10,
    meshgrid,
    prune_linear_layer,
 )
@ -46,12 +45,6 @@ from .configuration_vilt import ViltConfig

 logger = logging.get_logger(__name__)

-if not is_torch_greater_or_equal_than_1_10:
-    logger.warning(
-        f"You are using torch=={torch.__version__}, but torch>=1.10.0 is required to use "
-        "ViltModel. Please upgrade torch."
-    )
-
 _CONFIG_FOR_DOC = "ViltConfig"
 _CHECKPOINT_FOR_DOC = "dandelin/vilt-b32-mlm"

--- a/src/transformers/pytorch_utils.py
+++ b/src/transformers/pytorch_utils.py
@ -31,7 +31,6 @@ parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_
 is_torch_greater_or_equal_than_2_0 = parsed_torch_version_base >= version.parse("2.0")
 is_torch_greater_or_equal_than_1_12 = parsed_torch_version_base >= version.parse("1.12")
 is_torch_greater_or_equal_than_1_11 = parsed_torch_version_base >= version.parse("1.11")
-is_torch_greater_or_equal_than_1_10 = parsed_torch_version_base >= version.parse("1.10")
 is_torch_less_than_1_11 = parsed_torch_version_base < version.parse("1.11")


@ -275,12 +274,7 @@ def meshgrid(

    Reference: https://pytorch.org/docs/1.13/generated/torch.meshgrid.html
    """
-    if is_torch_greater_or_equal_than_1_10:
-        return torch.meshgrid(*tensors, indexing=indexing)
-    else:
-        if indexing != "ij":
-            raise ValueError('torch.meshgrid only supports `indexing="ij"` for torch<1.10.')
-        return torch.meshgrid(*tensors)
+    return torch.meshgrid(*tensors, indexing=indexing)


 def id_tensor_storage(tensor: torch.Tensor) -> Tuple[torch.device, int, int]:
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@ -70,7 +70,7 @@ from .modelcard import TrainingSummary
 from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model
 from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_MAPPING_NAMES
 from .optimization import Adafactor, get_scheduler
-from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_10
+from .pytorch_utils import ALL_LAYERNORM_LAYERS
 from .tokenization_utils_base import PreTrainedTokenizerBase
 from .trainer_callback import (
    CallbackHandler,
@ -155,8 +155,6 @@ from .utils import (
 from .utils.generic import ContextManagers


-_is_native_cpu_amp_available = is_torch_greater_or_equal_than_1_10
-
 DEFAULT_CALLBACKS = [DefaultFlowCallback]
 DEFAULT_PROGRESS_CALLBACK = ProgressCallback

@ -621,10 +619,8 @@ class Trainer:
                if args.device == torch.device("cpu"):
                    if args.fp16:
                        raise ValueError("Tried to use `fp16` but it is not supported on cpu")
-                    elif _is_native_cpu_amp_available:
-                        args.half_precision_backend = "cpu_amp"
                    else:
-                        raise ValueError("Tried to use cpu amp but native cpu amp is not available")
+                        args.half_precision_backend = "cpu_amp"
                else:
                    args.half_precision_backend = "cuda_amp"

@ -2595,14 +2591,11 @@ class Trainer:
        arguments, depending on the situation.
        """
        if self.use_cuda_amp or self.use_cpu_amp:
-            if is_torch_greater_or_equal_than_1_10:
-                ctx_manager = (
-                    torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
-                    if self.use_cpu_amp
-                    else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
-                )
-            else:
-                ctx_manager = torch.cuda.amp.autocast()
+            ctx_manager = (
+                torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
+                if self.use_cpu_amp
+                else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
+            )
        else:
            ctx_manager = contextlib.nullcontext() if sys.version_info >= (3, 7) else contextlib.suppress()

--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@ -258,16 +258,12 @@ def is_torch_bf16_gpu_available():
    # since currently no utility function is available we build our own.
    # some bits come from https://github.com/pytorch/pytorch/blob/2289a12f21c54da93bf5d696e3f9aea83dd9c10d/torch/testing/_internal/common_cuda.py#L51
    # with additional check for torch version
-    # to succeed:
-    # 1. torch >= 1.10 (1.9 should be enough for AMP API has changed in 1.10, so using 1.10 as minimal)
-    # 2. the hardware needs to support bf16 (GPU arch >= Ampere, or CPU)
-    # 3. if using gpu, CUDA >= 11
-    # 4. torch.autocast exists
+    # to succeed: (torch is required to be >= 1.10 anyway)
+    # 1. the hardware needs to support bf16 (GPU arch >= Ampere, or CPU)
+    # 2. if using gpu, CUDA >= 11
+    # 3. torch.autocast exists
    # XXX: one problem here is that it may give invalid results on mixed gpus setup, so it's
    # really only correct for the 0th gpu (or currently set default device if different from 0)
-    if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.10"):
-        return False
-
    if torch.cuda.is_available() and torch.version.cuda is not None:
        if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
            return False
@ -287,9 +283,6 @@ def is_torch_bf16_cpu_available():

    import torch

-    if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.10"):
-        return False
-
    try:
        # multiple levels of AttributeError depending on the pytorch version so do them all in one check
        _ = torch.cpu.amp.autocast
@ -526,8 +519,6 @@ def is_optimum_neuron_available():


 def is_safetensors_available():
-    if is_torch_available() and version.parse(_torch_version) < version.parse("1.10"):
-        return False
    return _safetensors_available


--- a/tests/models/bloom/test_modeling_bloom.py
+++ b/tests/models/bloom/test_modeling_bloom.py
@ -38,9 +38,6 @@ if is_torch_available():
        BloomModel,
        BloomTokenizerFast,
    )
-    from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
-else:
-    is_torch_greater_or_equal_than_1_10 = False


@require_torch
@ -518,10 +515,6 @@ class BloomEmbeddingTest(unittest.TestCase):
        super().setUp()
        self.path_bigscience_model = "bigscience/bigscience-small-testing"

-    @unittest.skipIf(
-        not is_torch_greater_or_equal_than_1_10,
-        "Test failed with torch < 1.10 (`LayerNormKernelImpl` not implemented for `BFloat16`)",
-    )
    @require_torch
    def test_embeddings(self):
        # The config in this checkpoint has `bfloat16` as `torch_dtype` -> model in `bfloat16`
--- a/tests/models/bridgetower/test_modeling_bridgetower.py
+++ b/tests/models/bridgetower/test_modeling_bridgetower.py
@ -50,9 +50,6 @@ if is_torch_available():
        BridgeTowerModel,
    )
    from transformers.models.bridgetower.modeling_bridgetower import BRIDGETOWER_PRETRAINED_MODEL_ARCHIVE_LIST
-    from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
-else:
-    is_torch_greater_or_equal_than_1_10 = False

 if is_vision_available():
    from PIL import Image
@ -298,7 +295,6 @@ class BridgeTowerModelTester:


@require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
 class BridgeTowerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
@ -516,7 +512,6 @@ def prepare_img():

@require_torch
@require_vision
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
 class BridgeTowerModelIntegrationTest(unittest.TestCase):
    @cached_property
    def default_processor(self):
@ -601,7 +596,6 @@ class BridgeTowerModelIntegrationTest(unittest.TestCase):

@slow
@require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
 class BridgeTowerModelTrainingTest(unittest.TestCase):
    all_training_supported_model_classes = (
        (BridgeTowerForImageAndTextRetrieval, BridgeTowerForMaskedLM, BridgeTowerForContrastiveLearning)
--- a/tests/models/levit/test_modeling_levit.py
+++ b/tests/models/levit/test_modeling_levit.py
@ -20,8 +20,6 @@ import unittest
 import warnings
 from math import ceil, floor

-from packaging import version
-
 from transformers import LevitConfig
 from transformers.file_utils import cached_property, is_torch_available, is_vision_available
 from transformers.models.auto import get_values
@ -346,10 +344,6 @@ class LevitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
            loss.backward()

    def test_problem_types(self):
-        parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_version)
-        if parsed_torch_version_base.base_version.startswith("1.9"):
-            self.skipTest(reason="This test fails with PyTorch 1.9.x: some CUDA issue")
-
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        problem_types = [
--- a/tests/models/tvlt/test_modeling_tvlt.py
+++ b/tests/models/tvlt/test_modeling_tvlt.py
@ -42,9 +42,6 @@ if is_torch_available():

    from transformers import TvltForAudioVisualClassification, TvltForPreTraining, TvltModel
    from transformers.models.tvlt.modeling_tvlt import TVLT_PRETRAINED_MODEL_ARCHIVE_LIST
-    from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
-else:
-    is_torch_greater_or_equal_than_1_10 = False


 if is_datasets_available():
@ -322,7 +319,6 @@ class TvltModelTester:


@require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "TVLT is only available in torch v1.10+")
 class TvltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (TvltModel, TvltForPreTraining, TvltForAudioVisualClassification) if is_torch_available() else ()
--- a/tests/models/vilt/test_modeling_vilt.py
+++ b/tests/models/vilt/test_modeling_vilt.py
@ -42,9 +42,6 @@ if is_torch_available():
        ViltModel,
    )
    from transformers.models.vilt.modeling_vilt import VILT_PRETRAINED_MODEL_ARCHIVE_LIST
-    from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
-else:
-    is_torch_greater_or_equal_than_1_10 = False

 if is_vision_available():
    import PIL
@ -218,7 +215,6 @@ class ViltModelTester:


@require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
 class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (
        (
@ -520,7 +516,6 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):


@require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
 class ViltForImagesAndTextClassificationModelTest(ViltModelTest, unittest.TestCase):
    all_model_classes = (ViltForImagesAndTextClassification,) if is_torch_available() else ()

@ -545,7 +540,6 @@ def prepare_img():

@require_torch
@require_vision
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
 class ViltModelIntegrationTest(unittest.TestCase):
    @cached_property
    def default_processor(self):