mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Add vision models to doc tests (#15905)
* Add vision models to doc tests * Apply suggestions from code review * Add more models Co-authored-by: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
This commit is contained in:
parent
742273a52a
commit
9251427c38
@ -754,6 +754,7 @@ class BeitForMaskedImageModeling(BeitPreTrainedModel):
|
||||
|
||||
```python
|
||||
>>> from transformers import BeitFeatureExtractor, BeitForMaskedImageModeling
|
||||
>>> import torch
|
||||
>>> from PIL import Image
|
||||
>>> import requests
|
||||
|
||||
@ -763,9 +764,15 @@ class BeitForMaskedImageModeling(BeitPreTrainedModel):
|
||||
>>> feature_extractor = BeitFeatureExtractor.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
|
||||
>>> model = BeitForMaskedImageModeling.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
|
||||
|
||||
>>> inputs = feature_extractor(images=image, return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
>>> logits = outputs.logits
|
||||
>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
|
||||
>>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
||||
>>> # create random boolean mask of shape (batch_size, num_patches)
|
||||
>>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool()
|
||||
|
||||
>>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
|
||||
>>> loss, logits = outputs.loss, outputs.logits
|
||||
>>> list(logits.shape)
|
||||
[1, 196, 8192]
|
||||
```"""
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
|
@ -587,19 +587,25 @@ class DeiTForMaskedImageModeling(DeiTPreTrainedModel):
|
||||
Examples:
|
||||
```python
|
||||
>>> from transformers import DeiTFeatureExtractor, DeiTForMaskedImageModeling
|
||||
>>> import torch
|
||||
>>> from PIL import Image
|
||||
>>> import requests
|
||||
|
||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
|
||||
>>> model = DeiTForMaskedImageModeling.from_pretrained("google/vit-base-patch16-224-in21k")
|
||||
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224")
|
||||
>>> model = DeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224")
|
||||
|
||||
>>> inputs = feature_extractor(images=image, return_tensors="pt")
|
||||
>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
|
||||
>>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
||||
>>> # create random boolean mask of shape (batch_size, num_patches)
|
||||
>>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool()
|
||||
|
||||
>>> outputs = model(**inputs)
|
||||
>>> last_hidden_states = outputs.last_hidden_state
|
||||
>>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
|
||||
>>> loss, reconstructed_pixel_values = outputs.loss, outputs.logits
|
||||
>>> list(reconstructed_pixel_values.shape)
|
||||
[1, 3, 224, 224]
|
||||
```"""
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
|
@ -810,6 +810,7 @@ class SwinForMaskedImageModeling(SwinPreTrainedModel):
|
||||
Examples:
|
||||
```python
|
||||
>>> from transformers import AutoFeatureExtractor, SwinForMaskedImageModeling
|
||||
>>> import torch
|
||||
>>> from PIL import Image
|
||||
>>> import requests
|
||||
|
||||
@ -819,10 +820,15 @@ class SwinForMaskedImageModeling(SwinPreTrainedModel):
|
||||
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
|
||||
>>> model = SwinForMaskedImageModeling.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
|
||||
|
||||
>>> inputs = feature_extractor(images=image, return_tensors="pt")
|
||||
>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
|
||||
>>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
||||
>>> # create random boolean mask of shape (batch_size, num_patches)
|
||||
>>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool()
|
||||
|
||||
>>> outputs = model(**inputs)
|
||||
>>> last_hidden_states = outputs.last_hidden_state
|
||||
>>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
|
||||
>>> loss, reconstructed_pixel_values = outputs.loss, outputs.logits
|
||||
>>> list(reconstructed_pixel_values.shape)
|
||||
[1, 3, 224, 224]
|
||||
```"""
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
|
@ -624,6 +624,7 @@ class ViTForMaskedImageModeling(ViTPreTrainedModel):
|
||||
Examples:
|
||||
```python
|
||||
>>> from transformers import ViTFeatureExtractor, ViTForMaskedImageModeling
|
||||
>>> import torch
|
||||
>>> from PIL import Image
|
||||
>>> import requests
|
||||
|
||||
@ -633,10 +634,15 @@ class ViTForMaskedImageModeling(ViTPreTrainedModel):
|
||||
>>> feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
|
||||
>>> model = ViTForMaskedImageModeling.from_pretrained("google/vit-base-patch16-224-in21k")
|
||||
|
||||
>>> inputs = feature_extractor(images=image, return_tensors="pt")
|
||||
>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
|
||||
>>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
||||
>>> # create random boolean mask of shape (batch_size, num_patches)
|
||||
>>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool()
|
||||
|
||||
>>> outputs = model(**inputs)
|
||||
>>> last_hidden_states = outputs.last_hidden_state
|
||||
>>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
|
||||
>>> loss, reconstructed_pixel_values = outputs.loss, outputs.logits
|
||||
>>> list(reconstructed_pixel_values.shape)
|
||||
[1, 3, 224, 224]
|
||||
```"""
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
|
@ -9,7 +9,16 @@ src/transformers/models/sew/modeling_sew.py
|
||||
src/transformers/models/sew_d/modeling_sew_d.py
|
||||
src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py
|
||||
src/transformers/models/speech_to_text/modeling_speech_to_text.py
|
||||
src/transformers/models/speech_encoder_decoder/modeling_speech_enocder_decoder.py
|
||||
src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
|
||||
src/transformers/models/data2vec/modeling_data2vec_audio.py
|
||||
src/transformers/models/vit/modeling_vit.py
|
||||
src/transformers/models/beit/modeling_beit.py
|
||||
src/transformers/models/deit/modeling_deit.py
|
||||
src/transformers/models/swin/modeling_swin.py
|
||||
src/transformers/models/convnext/modeling_convnext.py
|
||||
src/transformers/models/poolformer/modeling_poolformer.py
|
||||
src/transformers/models/vit_mae/modeling_vit_mae.py
|
||||
src/transformers/models/segformer/modeling_segformer.py
|
||||
src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py
|
||||
docs/source/quicktour.mdx
|
||||
docs/source/task_summary.mdx
|
||||
docs/source/task_summary.mdx
|
Loading…
Reference in New Issue
Block a user