diff --git a/src/transformers/models/d_fine/modeling_d_fine.py b/src/transformers/models/d_fine/modeling_d_fine.py index 1db385b52d8..573c3f3d29f 100644 --- a/src/transformers/models/d_fine/modeling_d_fine.py +++ b/src/transformers/models/d_fine/modeling_d_fine.py @@ -1248,6 +1248,10 @@ class DFineDecoder(DFinePreTrainedModel): if self.class_embed is not None and (self.training or i == self.eval_idx): scores = self.class_embed[i](hidden_states) + # Add initial logits and reference points with pre-bbox head + if i == 0: + intermediate_logits += (scores,) + intermediate_reference_points += (new_reference_points,) # Lqe does not affect the performance here. scores = self.lqe_layers[i](scores, pred_corners) intermediate_logits += (scores,) diff --git a/src/transformers/models/d_fine/modular_d_fine.py b/src/transformers/models/d_fine/modular_d_fine.py index c252f8224c9..e7c6b5a81c5 100644 --- a/src/transformers/models/d_fine/modular_d_fine.py +++ b/src/transformers/models/d_fine/modular_d_fine.py @@ -803,6 +803,10 @@ class DFineDecoder(RTDetrDecoder): if self.class_embed is not None and (self.training or i == self.eval_idx): scores = self.class_embed[i](hidden_states) + # Add initial logits and reference points with pre-bbox head + if i == 0: + intermediate_logits += (scores,) + intermediate_reference_points += (new_reference_points,) # Lqe does not affect the performance here. scores = self.lqe_layers[i](scores, pred_corners) intermediate_logits += (scores,)