Add tearDown method to Quark to solve OOM issues (#38234)

fix
This commit is contained in:
Mohamed Mekkouri 2025-05-21 14:26:44 +02:00 committed by GitHub
parent 101b3fa4ea
commit 9a962dd9ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import unittest
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, QuarkConfig
@ -77,6 +77,14 @@ class QuarkTest(unittest.TestCase):
device_map=cls.device_map,
)
def tearDown(self):
r"""
TearDown function needs to be called at the end of each test to free the GPU memory and cache, also to
avoid unexpected behaviors. Please see: https://discuss.pytorch.org/t/how-can-we-release-gpu-memory-cache/14530/27
"""
gc.collect()
torch.cuda.empty_cache()
def test_memory_footprint(self):
mem_quantized = self.quantized_model.get_memory_footprint()