mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 13:20:12 +06:00
Fix doctest for Blip2ForConditionalGeneration
(#26737)
* fix * fix * fix * fix * fix * fix * fix * fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
e1cec43415
commit
3bc65505fc
@ -1272,14 +1272,10 @@ class Blip2Model(Blip2PreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from transformers import AutoTokenizer, Blip2Model
|
>>> from transformers import AutoTokenizer, Blip2Model
|
||||||
|
|
||||||
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||||
|
|
||||||
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
|
|
||||||
|
|
||||||
>>> model.to(device) # doctest: +IGNORE_RESULT
|
|
||||||
|
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b")
|
>>> tokenizer = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||||
>>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt").to(device)
|
>>> inputs = tokenizer(["a photo of a cat"], padding=True, return_tensors="pt")
|
||||||
>>> text_features = model.get_text_features(**inputs)
|
>>> text_features = model.get_text_features(**inputs)
|
||||||
```"""
|
```"""
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
@ -1333,16 +1329,12 @@ class Blip2Model(Blip2PreTrainedModel):
|
|||||||
>>> import requests
|
>>> import requests
|
||||||
>>> from transformers import AutoProcessor, Blip2Model
|
>>> from transformers import AutoProcessor, Blip2Model
|
||||||
|
|
||||||
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||||
|
|
||||||
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
|
|
||||||
|
|
||||||
>>> model.to(device) # doctest: +IGNORE_RESULT
|
|
||||||
|
|
||||||
>>> processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
>>> processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||||
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
|
>>> inputs = processor(images=image, return_tensors="pt")
|
||||||
>>> image_outputs = model.get_image_features(**inputs)
|
>>> image_outputs = model.get_image_features(**inputs)
|
||||||
```"""
|
```"""
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
@ -1381,15 +1373,12 @@ class Blip2Model(Blip2PreTrainedModel):
|
|||||||
>>> import requests
|
>>> import requests
|
||||||
>>> from transformers import Blip2Processor, Blip2Model
|
>>> from transformers import Blip2Processor, Blip2Model
|
||||||
|
|
||||||
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
||||||
|
|
||||||
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||||
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
|
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||||
>>> model.to(device) # doctest: +IGNORE_RESULT
|
|
||||||
|
|
||||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||||
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
|
>>> inputs = processor(images=image, return_tensors="pt")
|
||||||
>>> qformer_outputs = model.get_qformer_features(**inputs)
|
>>> qformer_outputs = model.get_qformer_features(**inputs)
|
||||||
```"""
|
```"""
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
@ -1654,34 +1643,7 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
|
|||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
Image captioning (without providing a text prompt):
|
Prepare processor, model and image input
|
||||||
|
|
||||||
```python
|
|
||||||
>>> from PIL import Image
|
|
||||||
>>> import requests
|
|
||||||
>>> from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
|
||||||
>>> import torch
|
|
||||||
|
|
||||||
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
||||||
|
|
||||||
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
|
||||||
>>> model = Blip2ForConditionalGeneration.from_pretrained(
|
|
||||||
... "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16
|
|
||||||
... )
|
|
||||||
>>> model.to(device) # doctest: +IGNORE_RESULT
|
|
||||||
|
|
||||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
|
||||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
|
||||||
|
|
||||||
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
|
|
||||||
|
|
||||||
>>> generated_ids = model.generate(**inputs)
|
|
||||||
>>> generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
|
||||||
>>> print(generated_text)
|
|
||||||
two cats laying on a couch
|
|
||||||
```
|
|
||||||
|
|
||||||
Visual question answering (prompt = question):
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from PIL import Image
|
>>> from PIL import Image
|
||||||
@ -1698,7 +1660,22 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
|
|||||||
|
|
||||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||||
|
```
|
||||||
|
|
||||||
|
Image captioning (without providing a text prompt):
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
|
||||||
|
|
||||||
|
>>> generated_ids = model.generate(**inputs)
|
||||||
|
>>> generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
||||||
|
>>> print(generated_text)
|
||||||
|
two cats laying on a couch
|
||||||
|
```
|
||||||
|
|
||||||
|
Visual question answering (prompt = question):
|
||||||
|
|
||||||
|
```python
|
||||||
>>> prompt = "Question: how many cats are there? Answer:"
|
>>> prompt = "Question: how many cats are there? Answer:"
|
||||||
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.float16)
|
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.float16)
|
||||||
|
|
||||||
@ -1712,20 +1689,10 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
|
|||||||
This greatly reduces the amount of memory used by the model while maintaining the same performance.
|
This greatly reduces the amount of memory used by the model while maintaining the same performance.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from PIL import Image
|
|
||||||
>>> import requests
|
|
||||||
>>> from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
|
||||||
>>> import torch
|
|
||||||
|
|
||||||
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
|
|
||||||
>>> model = Blip2ForConditionalGeneration.from_pretrained(
|
>>> model = Blip2ForConditionalGeneration.from_pretrained(
|
||||||
... "Salesforce/blip2-flan-t5-xl", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.bfloat16
|
... "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.bfloat16
|
||||||
... ) # doctest: +IGNORE_RESULT
|
... ) # doctest: +IGNORE_RESULT
|
||||||
|
|
||||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
|
||||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
|
||||||
|
|
||||||
>>> prompt = "Question: how many cats are there? Answer:"
|
|
||||||
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.bfloat16)
|
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.bfloat16)
|
||||||
|
|
||||||
>>> generated_ids = model.generate(**inputs)
|
>>> generated_ids = model.generate(**inputs)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
docs/source/en/generation_strategies.md
|
docs/source/en/generation_strategies.md
|
||||||
docs/source/en/model_doc/ctrl.md
|
docs/source/en/model_doc/ctrl.md
|
||||||
docs/source/en/task_summary.md
|
docs/source/en/task_summary.md
|
||||||
|
src/transformers/models/blip_2/modeling_blip_2.py
|
||||||
src/transformers/models/ctrl/modeling_ctrl.py
|
src/transformers/models/ctrl/modeling_ctrl.py
|
||||||
|
Loading…
Reference in New Issue
Block a user