mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
[Mask2Former] Add doc tests (#21232)
* Add doc tests * Add OneFormer resourcesé * Fix merge * Fix style Co-authored-by: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
This commit is contained in:
parent
99e7905422
commit
f83135eb76
@ -25,6 +25,10 @@ Tips:
|
||||
- Mask2Former uses the same preprocessing and postprocessing steps as [MaskFormer](maskformer). Use [`Mask2FormerImageProcessor`] or [`AutoImageProcessor`] to prepare images and optional targets for the model.
|
||||
- To get the final segmentation, depending on the task, you can call [`~Mask2FormerImageProcessor.post_process_semantic_segmentation`] or [`~Mask2FormerImageProcessor.post_process_instance_segmentation`] or [`~Mask2FormerImageProcessor.post_process_panoptic_segmentation`]. All three tasks can be solved using [`Mask2FormerForUniversalSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together.
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/mask2former_architecture.jpg" alt="drawing" width="600"/>
|
||||
|
||||
<small> Mask2Former architecture. Taken from the <a href="https://arxiv.org/abs/2112.01527">original paper.</a> </small>
|
||||
|
||||
This model was contributed by [Shivalika Singh](https://huggingface.co/shivi) and [Alara Dirik](https://huggingface.co/adirik). The original code can be found [here](https://github.com/facebookresearch/Mask2Former).
|
||||
|
||||
## Resources
|
||||
|
@ -37,6 +37,15 @@ The figure below illustrates the architecture of OneFormer. Taken from the [orig
|
||||
|
||||
This model was contributed by [Jitesh Jain](https://huggingface.co/praeclarumjj3). The original code can be found [here](https://github.com/SHI-Labs/OneFormer).
|
||||
|
||||
## Resources
|
||||
|
||||
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
|
||||
|
||||
- Demo notebooks regarding inference + fine-tuning on custom data can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/OneFormer).
|
||||
|
||||
If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
|
||||
The resource should ideally demonstrate something new instead of duplicating an existing resource.
|
||||
|
||||
## OneFormer specific outputs
|
||||
|
||||
[[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput
|
||||
|
@ -2238,23 +2238,24 @@ class Mask2FormerModel(Mask2FormerPreTrainedModel):
|
||||
>>> import requests
|
||||
>>> from transformers import AutoImageProcessor, Mask2FormerModel
|
||||
|
||||
>>> # download texting image
|
||||
>>> # load image
|
||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
>>> # Load image preprocessor and Mask2FormerModel trained on ADE20K instance segmentation dataset
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-ade-instance")
|
||||
>>> model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-small-ade-instance")
|
||||
>>> # load image preprocessor and Mask2FormerModel trained on COCO instance segmentation dataset
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-coco-instance")
|
||||
>>> model = Mask2FormerModel.from_pretrained("facebook/mask2former-swin-small-coco-instance")
|
||||
>>> inputs = image_processor(image, return_tensors="pt")
|
||||
|
||||
>>> # forward pass
|
||||
>>> with torch.no_grad():
|
||||
... outputs = model(**inputs)
|
||||
|
||||
>>> # model outputs last hidden states of shape (batch_size, num_queries, hidden_size)
|
||||
>>> print(outputs.transformer_decoder_last_hidden_state.shape)
|
||||
torch.Size([1, 100, 256])
|
||||
```
|
||||
"""
|
||||
|
||||
if pixel_values is None:
|
||||
raise ValueError("You have to specify pixel_values")
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||
@ -2387,15 +2388,51 @@ class Mask2FormerForUniversalSegmentation(Mask2FormerPreTrainedModel):
|
||||
`Mask2FormerUniversalSegmentationOutput`
|
||||
|
||||
Examples:
|
||||
|
||||
Instance segmentation example:
|
||||
|
||||
```python
|
||||
>>> from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation
|
||||
>>> from PIL import Image
|
||||
>>> import requests
|
||||
>>> import torch
|
||||
|
||||
>>> # Load Mask2Former trained on ADE20K panoptic segmentation dataset
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-ade-panoptic")
|
||||
>>> model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-small-ade-panoptic")
|
||||
>>> # Load Mask2Former trained on COCO instance segmentation dataset
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-coco-instance")
|
||||
>>> model = Mask2FormerForUniversalSegmentation.from_pretrained(
|
||||
... "facebook/mask2former-swin-small-coco-instance"
|
||||
... )
|
||||
|
||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||
>>> inputs = image_processor(image, return_tensors="pt")
|
||||
|
||||
>>> with torch.no_grad():
|
||||
... outputs = model(**inputs)
|
||||
|
||||
>>> # Model predicts class_queries_logits of shape `(batch_size, num_queries)`
|
||||
>>> # and masks_queries_logits of shape `(batch_size, num_queries, height, width)`
|
||||
>>> class_queries_logits = outputs.class_queries_logits
|
||||
>>> masks_queries_logits = outputs.masks_queries_logits
|
||||
|
||||
>>> # Perform post-processing to get instance segmentation map
|
||||
>>> pred_instance_map = image_processor.post_process_semantic_segmentation(
|
||||
... outputs, target_sizes=[image.size[::-1]]
|
||||
... )[0]
|
||||
>>> print(pred_instance_map.shape)
|
||||
torch.Size([480, 640])
|
||||
```
|
||||
|
||||
Semantic segmentation example:
|
||||
```python
|
||||
>>> from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation
|
||||
>>> from PIL import Image
|
||||
>>> import requests
|
||||
>>> import torch
|
||||
|
||||
>>> # Load Mask2Former trained on ADE20k semantic segmentation dataset
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-ade-semantic")
|
||||
>>> model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-small-ade-semantic")
|
||||
|
||||
>>> url = (
|
||||
... "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg"
|
||||
@ -2411,16 +2448,46 @@ class Mask2FormerForUniversalSegmentation(Mask2FormerPreTrainedModel):
|
||||
>>> class_queries_logits = outputs.class_queries_logits
|
||||
>>> masks_queries_logits = outputs.masks_queries_logits
|
||||
|
||||
>>> # Perform post-processing to get semantic, instance or panoptic segmentation maps
|
||||
>>> # Perform post-processing to get semantic segmentation map
|
||||
>>> pred_semantic_map = image_processor.post_process_semantic_segmentation(
|
||||
... outputs, target_sizes=[image.size[::-1]]
|
||||
... )[0]
|
||||
>>> pred_instance_map = image_processor.post_process_instance_segmentation(
|
||||
... outputs, target_sizes=[image.size[::-1]]
|
||||
... )[0]["segmentation"]
|
||||
>>> print(pred_semantic_map.shape)
|
||||
torch.Size([512, 683])
|
||||
```
|
||||
|
||||
Panoptic segmentation example:
|
||||
|
||||
```python
|
||||
>>> from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation
|
||||
>>> from PIL import Image
|
||||
>>> import requests
|
||||
>>> import torch
|
||||
|
||||
>>> # Load Mask2Former trained on CityScapes panoptic segmentation dataset
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-cityscapes-panoptic")
|
||||
>>> model = Mask2FormerForUniversalSegmentation.from_pretrained(
|
||||
... "facebook/mask2former-swin-small-cityscapes-panoptic"
|
||||
... )
|
||||
|
||||
>>> url = "https://cdn-media.huggingface.co/Inference-API/Sample-results-on-the-Cityscapes-dataset-The-above-images-show-how-our-method-can-handle.png"
|
||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||
>>> inputs = image_processor(image, return_tensors="pt")
|
||||
|
||||
>>> with torch.no_grad():
|
||||
... outputs = model(**inputs)
|
||||
|
||||
>>> # Model predicts class_queries_logits of shape `(batch_size, num_queries)`
|
||||
>>> # and masks_queries_logits of shape `(batch_size, num_queries, height, width)`
|
||||
>>> class_queries_logits = outputs.class_queries_logits
|
||||
>>> masks_queries_logits = outputs.masks_queries_logits
|
||||
|
||||
>>> # Perform post-processing to get panoptic segmentation map
|
||||
>>> pred_panoptic_map = image_processor.post_process_panoptic_segmentation(
|
||||
... outputs, target_sizes=[image.size[::-1]]
|
||||
... )[0]["segmentation"]
|
||||
>>> print(pred_panoptic_map.shape)
|
||||
torch.Size([338, 676])
|
||||
```
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
|
@ -108,6 +108,8 @@ src/transformers/models/longformer/modeling_tf_longformer.py
|
||||
src/transformers/models/longt5/modeling_longt5.py
|
||||
src/transformers/models/marian/modeling_marian.py
|
||||
src/transformers/models/markuplm/modeling_markuplm.py
|
||||
src/transformers/models/maskformer/configuration_mask2former.py
|
||||
src/transformers/models/maskformer/modeling_mask2former.py
|
||||
src/transformers/models/maskformer/configuration_maskformer.py
|
||||
src/transformers/models/maskformer/modeling_maskformer.py
|
||||
src/transformers/models/mbart/configuration_mbart.py
|
||||
|
Loading…
Reference in New Issue
Block a user