From f57f014936404f32818cfec0ce977fb805bf259c Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 2 May 2024 13:59:40 +0200 Subject: [PATCH] Use `contiguous()` in clip checkpoint conversion script (#30613) * fix * fix --------- Co-authored-by: ydshieh --- .../models/clip/convert_clip_original_pytorch_to_hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py b/src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py index ff716a5b93f..60849c2efb7 100644 --- a/src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py +++ b/src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py @@ -82,7 +82,7 @@ def copy_encoder(hf_encoder, pt_model): def copy_text_model_and_projection(hf_model, pt_model): # copy projection - hf_model.text_projection.weight.data = pt_model.text_projection.data.T + hf_model.text_projection.weight.data = pt_model.text_projection.data.T.contiguous() # copy text encoder copy_encoder(hf_model.text_model, pt_model) @@ -90,7 +90,7 @@ def copy_text_model_and_projection(hf_model, pt_model): def copy_vison_model_and_projection(hf_model, pt_model): # copy projection - hf_model.visual_projection.weight.data = pt_model.visual.proj.data.T + hf_model.visual_projection.weight.data = pt_model.visual.proj.data.T.contiguous() # copy layer norms copy_linear(hf_model.vision_model.pre_layrnorm, pt_model.visual.ln_pre)