enable d_fine finetuning properly (#37962)

add pre_output in the front

Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>
This commit is contained in:
Sangbum Daniel Choi 2025-05-15 00:53:04 +09:00 committed by GitHub
parent e021bf6bf8
commit aa27fa75cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 0 deletions

View File

@ -1248,6 +1248,10 @@ class DFineDecoder(DFinePreTrainedModel):
if self.class_embed is not None and (self.training or i == self.eval_idx):
scores = self.class_embed[i](hidden_states)
# Add initial logits and reference points with pre-bbox head
if i == 0:
intermediate_logits += (scores,)
intermediate_reference_points += (new_reference_points,)
# Lqe does not affect the performance here.
scores = self.lqe_layers[i](scores, pred_corners)
intermediate_logits += (scores,)

View File

@ -803,6 +803,10 @@ class DFineDecoder(RTDetrDecoder):
if self.class_embed is not None and (self.training or i == self.eval_idx):
scores = self.class_embed[i](hidden_states)
# Add initial logits and reference points with pre-bbox head
if i == 0:
intermediate_logits += (scores,)
intermediate_reference_points += (new_reference_points,)
# Lqe does not affect the performance here.
scores = self.lqe_layers[i](scores, pred_corners)
intermediate_logits += (scores,)