Optimize inference only mode memory if ipex is used (#21083)

* Optimize inference only mode memory if ipex is used Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> * fix code style Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2025-08-02 19:21:31 +06:00 · 2023-01-12 17:01:17 +08:00 · 2023-01-12 17:01:17 +08:00 · e849e5bb4a
commit e849e5bb4a
parent 6767ce71d6
1 changed files with 2 additions and 1 deletions
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@ -1309,8 +1309,9 @@ class Trainer:

        if not training:
            model.eval()
+            dtype = torch.bfloat16 if not self.is_in_train and self.args.bf16_full_eval else dtype
            # conv_bn_folding is disabled as it fails in symbolic tracing, resulting in ipex warnings
-            model = ipex.optimize(model, dtype=dtype, level="O1", conv_bn_folding=False)
+            model = ipex.optimize(model, dtype=dtype, level="O1", conv_bn_folding=False, inplace=not self.is_in_train)
        else:
            if not model.training:
                model.train()