mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
Fix incorrect bbox_embed initialization when decoder_bbox_embed_share=False in GroundingDINO (#38238)
* A shallow copy in groundingdino Fixes #37333 * Supprimer une ligne vide dans la classe GroundingDinoForObjectDetection * Translate comments in the GroundingDinoForObjectDetection class from French to English
This commit is contained in:
parent
d0fccbf7ef
commit
98568d1e25
@ -2454,16 +2454,25 @@ class GroundingDinoForObjectDetection(GroundingDinoPreTrainedModel):
|
||||
_class_embed = GroundingDinoContrastiveEmbedding(config)
|
||||
|
||||
if config.decoder_bbox_embed_share:
|
||||
_bbox_embed = GroundingDinoMLPPredictionHead(
|
||||
# a single shared instance
|
||||
shared_head = GroundingDinoMLPPredictionHead(
|
||||
input_dim=config.d_model, hidden_dim=config.d_model, output_dim=4, num_layers=3
|
||||
)
|
||||
self.bbox_embed = nn.ModuleList([_bbox_embed for _ in range(config.decoder_layers)])
|
||||
self.bbox_embed = nn.ModuleList([shared_head] * config.decoder_layers)
|
||||
else:
|
||||
for _ in range(config.decoder_layers):
|
||||
_bbox_embed = GroundingDinoMLPPredictionHead(
|
||||
input_dim=config.d_model, hidden_dim=config.d_model, output_dim=4, num_layers=3
|
||||
# each layer has its own head (implicit deep copy through a new instance)
|
||||
self.bbox_embed = nn.ModuleList(
|
||||
[
|
||||
GroundingDinoMLPPredictionHead(
|
||||
input_dim=config.d_model,
|
||||
hidden_dim=config.d_model,
|
||||
output_dim=4,
|
||||
num_layers=3,
|
||||
)
|
||||
self.bbox_embed = nn.ModuleList([_bbox_embed for _ in range(config.decoder_layers)])
|
||||
for _ in range(config.decoder_layers)
|
||||
]
|
||||
)
|
||||
|
||||
self.class_embed = nn.ModuleList([_class_embed for _ in range(config.decoder_layers)])
|
||||
# hack for box-refinement
|
||||
self.model.decoder.bbox_embed = self.bbox_embed
|
||||
|
Loading…
Reference in New Issue
Block a user