diff --git a/src/transformers/models/csm/modeling_csm.py b/src/transformers/models/csm/modeling_csm.py index 2b133befece..6317218f46a 100644 --- a/src/transformers/models/csm/modeling_csm.py +++ b/src/transformers/models/csm/modeling_csm.py @@ -565,7 +565,7 @@ class CsmDepthDecoderForCausalLM(CsmPreTrainedModel, GenerationMixin): def get_decoder(self): return self.model - @check_model_inputs + @can_return_tuple @auto_docstring def forward( self, @@ -693,7 +693,7 @@ class CsmBackboneModel(CsmPreTrainedModel): def set_input_embeddings(self, value): self.embed_tokens = value - @can_return_tuple + @check_model_inputs @auto_docstring def forward( self, diff --git a/src/transformers/models/csm/modular_csm.py b/src/transformers/models/csm/modular_csm.py index 9a6a4942de2..82d56e6b9f9 100644 --- a/src/transformers/models/csm/modular_csm.py +++ b/src/transformers/models/csm/modular_csm.py @@ -324,7 +324,7 @@ class CsmDepthDecoderForCausalLM(LlamaForCausalLM, GenerationMixin): return model_inputs - @check_model_inputs + @can_return_tuple @auto_docstring def forward( self, @@ -413,7 +413,7 @@ class CsmBackboneModel(LlamaModel): super().__init__(config) self.embed_tokens = CsmBackboneModelEmbeddings(config) - @can_return_tuple + @check_model_inputs @auto_docstring def forward(self, **super_kwargs): r""" diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index c646c2d5cd6..de934cc515c 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -1023,7 +1023,6 @@ def check_model_inputs(func): for k in capture_flags } recordable_keys["output_cross_attentions"] = recordable_keys.get("output_attentions", None) - print(recordable_keys) if any(recordable_keys.values()): capture_tasks = [] for key, layer_specs in capture_flags.items():