Add tearDown method to Quark to solve OOM issues (#38234)

fix
2025-07-03 12:50:06 +06:00 · 2025-05-21 14:26:44 +02:00 · 2025-05-21 14:26:44 +02:00 · 9a962dd9ed
commit 9a962dd9ed
parent 101b3fa4ea
1 changed files with 9 additions and 1 deletions
--- a/tests/quantization/quark_integration/test_quark.py
+++ b/tests/quantization/quark_integration/test_quark.py
@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import gc
 import unittest

 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, QuarkConfig
@ -77,6 +77,14 @@ class QuarkTest(unittest.TestCase):
            device_map=cls.device_map,
        )

+    def tearDown(self):
+        r"""
+        TearDown function needs to be called at the end of each test to free the GPU memory and cache, also to
+        avoid unexpected behaviors. Please see: https://discuss.pytorch.org/t/how-can-we-release-gpu-memory-cache/14530/27
+        """
+        gc.collect()
+        torch.cuda.empty_cache()
+
    def test_memory_footprint(self):
        mem_quantized = self.quantized_model.get_memory_footprint()