Fix red CI: benchmark script (#34351)

* dont'trigger always

* fux

* oups

* update

* ??

* ?

* aie
This commit is contained in:
Arthur 2024-10-23 18:33:52 +02:00 committed by GitHub
parent c42b3223db
commit e50bf61dec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 86 additions and 8 deletions

View File

@ -18,21 +18,17 @@ jobs:
name: Benchmark
runs-on:
group: aws-g5-4xlarge-cache
if: |
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
(github.event_name == 'push' && github.ref == 'refs/heads/main')
container:
image: huggingface/transformers-pytorch-gpu
options: --gpus all --privileged --ipc host
steps:
- name: Get repo
if: github.event_name == 'pull_request'
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Get repo
if: github.event_name == 'push'
uses: actions/checkout@v4
with:
ref: ${{ github.sha }}
ref: ${{ github.event.pull_request.head.sha || github.sha }}
- name: Install libpq-dev & psql
run: |

View File

@ -0,0 +1,82 @@
import torch
from transformers import pipeline, AutoTokenizer, AutoModel, AutoModelForMaskedLM
import time
test_sentence = 'Do you [MASK] the muffin man?'
# for comparison
bert = pipeline('fill-mask', model = 'bert-base-uncased')
print('\n'.join([d['sequence'] for d in bert(test_sentence)]))
deberta = pipeline('fill-mask', model = 'microsoft/deberta-v3-base', model_kwargs={"legacy": False})
print('\n'.join([d['sequence'] for d in deberta(test_sentence)]))
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
tokenized_dict = tokenizer(
["Is this working",], ["Not yet",],
return_tensors="pt"
)
deberta.model.forward = torch.compile(deberta.model.forward)
start=time.time()
deberta.model(**tokenized_dict)
end=time.time()
print(end-start)
start=time.time()
deberta.model(**tokenized_dict)
end=time.time()
print(end-start)
start=time.time()
deberta.model(**tokenized_dict)
end=time.time()
print(end-start)
model = AutoModel.from_pretrained('microsoft/deberta-base')
model.config.return_dict = False
model.config.output_hidden_states=False
input_tuple = (tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
start=time.time()
traced_model = torch.jit.trace(model, input_tuple)
end=time.time()
print(end-start)
start=time.time()
traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
end=time.time()
print(end-start)
start=time.time()
traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
end=time.time()
print(end-start)
start=time.time()
traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
end=time.time()
print(end-start)
start=time.time()
traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask'])
end=time.time()
print(end-start)
torch.jit.save(traced_model, "compiled_deberta.pt")
# my_script_module = torch.jit.script(model)