Remove deprecated code (#37059)

* Remove deprecated code * fix get_loading_attributes * fix error * skip test --------- Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> Co-authored-by: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com>
2025-07-04 13:20:12 +06:00 · 2025-03-31 17:15:35 +08:00 · 2025-03-31 17:15:35 +08:00 · f99c279d20
commit f99c279d20
parent d1efaf0318
8 changed files with 13 additions and 66 deletions
--- a/examples/pytorch/language-modeling/run_fim.py
+++ b/examples/pytorch/language-modeling/run_fim.py
@ -47,7 +47,7 @@ from transformers import (
    Trainer,
    TrainingArguments,
    default_data_collator,
-    is_torch_tpu_available,
+    is_torch_xla_available,
    set_seed,
 )
 from transformers.integrations import is_deepspeed_zero3_enabled
@ -525,7 +525,7 @@ def main():
    if torch.cuda.is_availble():
        pad_factor = 8
-    elif is_torch_tpu_available():
+    elif is_torch_xla_available(check_is_tpu=True):
        pad_factor = 128
    # Add the new tokens to the tokenizer
@ -795,9 +795,13 @@ def main():
        processing_class=tokenizer,
        # Data collator will default to DataCollatorWithPadding, so we change it.
        data_collator=default_data_collator,
-        compute_metrics=compute_metrics if training_args.do_eval and not is_torch_tpu_available() else None,
+        compute_metrics=compute_metrics
        if training_args.do_eval and not is_torch_xla_available(check_is_tpu=True)
        else None,
        preprocess_logits_for_metrics=(
-            preprocess_logits_for_metrics if training_args.do_eval and not is_torch_tpu_available() else None
+            preprocess_logits_for_metrics
            if training_args.do_eval and not is_torch_xla_available(check_is_tpu=True)
            else None
        ),
    )
--- a/examples/pytorch/language-modeling/run_fim_no_trainer.py
+++ b/examples/pytorch/language-modeling/run_fim_no_trainer.py
@ -52,7 +52,7 @@ from transformers import (
    SchedulerType,
    default_data_collator,
    get_scheduler,
-    is_torch_tpu_available,
+    is_torch_xla_available,
 )
 from transformers.integrations import is_deepspeed_zero3_enabled
 from transformers.utils import check_min_version, send_example_telemetry
@ -492,7 +492,7 @@ def main():
    if torch.cuda.is_availble():
        pad_factor = 8
-    elif is_torch_tpu_available():
+    elif is_torch_xla_available(check_is_tpu=True):
        pad_factor = 128
    # Add the new tokens to the tokenizer
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@ -1037,7 +1037,6 @@ _import_structure = {
        "is_torch_musa_available",
        "is_torch_neuroncore_available",
        "is_torch_npu_available",
        "is_torch_tpu_available",
        "is_torchvision_available",
        "is_torch_xla_available",
        "is_torch_xpu_available",
@ -6341,7 +6340,6 @@ if TYPE_CHECKING:
        is_torch_musa_available,
        is_torch_neuroncore_available,
        is_torch_npu_available,
        is_torch_tpu_available,
        is_torch_xla_available,
        is_torch_xpu_available,
        is_torchvision_available,
--- a/src/transformers/image_transforms.py
+++ b/src/transformers/image_transforms.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import warnings
 from collections.abc import Collection, Iterable
 from math import ceil
 from typing import Optional, Union
@ -453,7 +452,6 @@ def center_crop(
    size: tuple[int, int],
    data_format: Optional[Union[str, ChannelDimension]] = None,
    input_data_format: Optional[Union[str, ChannelDimension]] = None,
    return_numpy: Optional[bool] = None,
 ) -> np.ndarray:
    """
    Crops the `image` to the specified `size` using a center crop. Note that if the image is too small to be cropped to
@ -474,22 +472,11 @@ def center_crop(
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.
        return_numpy (`bool`, *optional*):
            Whether or not to return the cropped image as a numpy array. Used for backwards compatibility with the
            previous ImageFeatureExtractionMixin method.
                - Unset: will return the same type as the input image.
                - `True`: will return a numpy array.
                - `False`: will return a `PIL.Image.Image` object.
    Returns:
        `np.ndarray`: The cropped image.
    """
    requires_backends(center_crop, ["vision"])
    if return_numpy is not None:
        warnings.warn("return_numpy is deprecated and will be removed in v.4.33", FutureWarning)
    return_numpy = True if return_numpy is None else return_numpy
    if not isinstance(image, np.ndarray):
        raise TypeError(f"Input image must be of type np.ndarray, got {type(image)}")
@ -541,9 +528,6 @@ def center_crop(
    new_image = new_image[..., max(0, top) : min(new_height, bottom), max(0, left) : min(new_width, right)]
    new_image = to_channel_dimension_format(new_image, output_data_format, ChannelDimension.FIRST)
    if not return_numpy:
        new_image = to_pil_image(new_image)
    return new_image
--- a/src/transformers/utils/init.py
+++ b/src/transformers/utils/init.py
@ -228,7 +228,6 @@ from .import_utils import (
    is_torch_sdpa_available,
    is_torch_tensorrt_fx_available,
    is_torch_tf32_available,
    is_torch_tpu_available,
    is_torch_xla_available,
    is_torch_xpu_available,
    is_torchao_available,
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@ -675,31 +675,6 @@ def is_g2p_en_available():
    return _g2p_en_available
@lru_cache()
 def is_torch_tpu_available(check_device=True):
    "Checks if `torch_xla` is installed and potentially if a TPU is in the environment"
    warnings.warn(
        "`is_torch_tpu_available` is deprecated and will be removed in 4.41.0. "
        "Please use the `is_torch_xla_available` instead.",
        FutureWarning,
    )
    if not _torch_available:
        return False
    if importlib.util.find_spec("torch_xla") is not None:
        if check_device:
            # We need to check if `xla_device` can be found, will raise a RuntimeError if not
            try:
                import torch_xla.core.xla_model as xm
                _ = xm.xla_device()
                return True
            except RuntimeError:
                return False
        return True
    return False
@lru_cache
 def is_torch_xla_available(check_is_tpu=False, check_is_gpu=False):
    """
--- a/src/transformers/utils/quantization_config.py
+++ b/src/transformers/utils/quantization_config.py
@ -682,7 +682,6 @@ class GPTQConfig(QuantizationConfigMixin):
        self.use_exllama = use_exllama
        self.max_input_length = max_input_length
        self.exllama_config = exllama_config
        self.disable_exllama = kwargs.pop("disable_exllama", None)
        self.cache_block_outputs = cache_block_outputs
        self.modules_in_block_to_quantize = modules_in_block_to_quantize
        self.post_init()
@ -690,7 +689,6 @@ class GPTQConfig(QuantizationConfigMixin):
    def get_loading_attributes(self):
        attibutes_dict = copy.deepcopy(self.__dict__)
        loading_attibutes = [
            "disable_exllama",
            "use_exllama",
            "exllama_config",
            "use_cuda_fp16",
@ -739,20 +737,9 @@ class GPTQConfig(QuantizationConfigMixin):
                self.use_exllama = False
        # auto-gptq specific kernel control logic
-        if self.disable_exllama is None and self.use_exllama is None:
+        if self.use_exllama is None:
            # New default behaviour
            self.use_exllama = True
        elif self.disable_exllama is not None and self.use_exllama is None:
            # Follow pattern of old config
            logger.warning(
                "Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`."
                "The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file."
            )
            self.use_exllama = not self.disable_exllama
            self.disable_exllama = None
        elif self.disable_exllama is not None and self.use_exllama is not None:
            # Only happens if user explicitly passes in both arguments
            raise ValueError("Cannot specify both `disable_exllama` and `use_exllama`. Please use just `use_exllama`")
        if self.exllama_config is None:
            self.exllama_config = {"version": ExllamaVersion.ONE}
@ -809,7 +796,7 @@ class GPTQConfig(QuantizationConfigMixin):
        if "disable_exllama" in config_dict:
            config_dict["use_exllama"] = not config_dict["disable_exllama"]
            # switch to None to not trigger the warning
-            config_dict["disable_exllama"] = None
+            config_dict.pop("disable_exllama")
        config = cls(**config_dict)
        return config
--- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py
+++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py
@ -592,7 +592,7 @@ class SeamlessM4Tv2ModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase)
    # TODO: @ydshieh: refer to #34968
    @unittest.skip(reason="Failing on multi-gpu runner")
    def test_retain_grad_hidden_states_attentions(self):
-        pass
+        self.skipTest(reason="Failing on multi-gpu runner")
@require_torch