mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 21:00:08 +06:00
Docs: fix code formatting in torchao docs (#38504)
This commit is contained in:
parent
6c5d4b1dd2
commit
1285aec4cc
@ -65,13 +65,14 @@ pip install --upgrade torchao transformers
|
|||||||
</hfoption>
|
</hfoption>
|
||||||
<hfoption id="PyTorch Index">
|
<hfoption id="PyTorch Index">
|
||||||
Stable Release from the PyTorch index
|
Stable Release from the PyTorch index
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install torchao --index-url https://download.pytorch.org/whl/cu126 # options are cpu/cu118/cu126/cu128
|
pip install torchao --index-url https://download.pytorch.org/whl/cu126 # options are cpu/cu118/cu126/cu128
|
||||||
```
|
```
|
||||||
</hfoption>
|
</hfoption>
|
||||||
</hfoptions>
|
</hfoptions>
|
||||||
|
|
||||||
If your torcha version is below 0.10.0, you need to upgrade it, please refer to the [deprecation notice](#deprecation-notice) for more details.
|
If your torchao version is below 0.10.0, you need to upgrade it, please refer to the [deprecation notice](#deprecation-notice) for more details.
|
||||||
|
|
||||||
## Quantization examples
|
## Quantization examples
|
||||||
|
|
||||||
@ -88,6 +89,7 @@ We'll show examples for recommended quantization methods based on hardwares, e.g
|
|||||||
### H100 GPU
|
### H100 GPU
|
||||||
<hfoptions id="examples-H100-GPU">
|
<hfoptions id="examples-H100-GPU">
|
||||||
<hfoption id="float8-dynamic-and-weight-only">
|
<hfoption id="float8-dynamic-and-weight-only">
|
||||||
|
|
||||||
```py
|
```py
|
||||||
import torch
|
import torch
|
||||||
from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
|
from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
|
||||||
@ -148,6 +150,7 @@ print(tokenizer.decode(output[0], skip_special_tokens=True))
|
|||||||
### A100 GPU
|
### A100 GPU
|
||||||
<hfoptions id="examples-A100-GPU">
|
<hfoptions id="examples-A100-GPU">
|
||||||
<hfoption id="int8-dynamic-and-weight-only">
|
<hfoption id="int8-dynamic-and-weight-only">
|
||||||
|
|
||||||
```py
|
```py
|
||||||
import torch
|
import torch
|
||||||
from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
|
from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
|
||||||
@ -215,6 +218,7 @@ print(tokenizer.decode(output[0], skip_special_tokens=True))
|
|||||||
### CPU
|
### CPU
|
||||||
<hfoptions id="examples-CPU">
|
<hfoptions id="examples-CPU">
|
||||||
<hfoption id="int8-dynamic-and-weight-only">
|
<hfoption id="int8-dynamic-and-weight-only">
|
||||||
|
|
||||||
```py
|
```py
|
||||||
import torch
|
import torch
|
||||||
from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
|
from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
|
||||||
@ -385,6 +389,7 @@ To avoid arbitrary user code execution, torchao sets `weights_only=True` in [tor
|
|||||||
|
|
||||||
<hfoptions id="serialization-examples">
|
<hfoptions id="serialization-examples">
|
||||||
<hfoption id="save-locally">
|
<hfoption id="save-locally">
|
||||||
|
|
||||||
```py
|
```py
|
||||||
# don't serialize model with Safetensors
|
# don't serialize model with Safetensors
|
||||||
output_dir = "llama3-8b-int4wo-128"
|
output_dir = "llama3-8b-int4wo-128"
|
||||||
@ -392,6 +397,7 @@ quantized_model.save_pretrained("llama3-8b-int4wo-128", safe_serialization=False
|
|||||||
```
|
```
|
||||||
</hfoption>
|
</hfoption>
|
||||||
<hfoption id="push-to-huggingface-hub">
|
<hfoption id="push-to-huggingface-hub">
|
||||||
|
|
||||||
```py
|
```py
|
||||||
# don't serialize model with Safetensors
|
# don't serialize model with Safetensors
|
||||||
USER_ID = "your_huggingface_user_id"
|
USER_ID = "your_huggingface_user_id"
|
||||||
|
Loading…
Reference in New Issue
Block a user