mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 18:22:34 +06:00
Add training version check for AQLM quantizer. (#29142)
* training version check * warn old aqlm * aqlm 1.0.2 real * docs
This commit is contained in:
parent
ae49b218c3
commit
fc37f38915
@ -39,10 +39,14 @@ Inference support for AQLM is realised in the `aqlm` library. Make sure to insta
|
||||
pip install aqlm[gpu,cpu]
|
||||
```
|
||||
|
||||
The library provides efficient kernels for both GPU and CPU inference.
|
||||
The library provides efficient kernels for both GPU and CPU inference and training.
|
||||
|
||||
The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub [repository](https://github.com/Vahe1994/AQLM).
|
||||
|
||||
### PEFT
|
||||
|
||||
Starting with version `aqlm 1.0.2`, AQLM supports Parameter-Efficient Fine-Tuning in a form of [LoRA](https://huggingface.co/docs/peft/package_reference/lora) integrated into the [PEFT](https://huggingface.co/blog/peft) library.
|
||||
|
||||
### AQLM configurations
|
||||
|
||||
AQLM quantization setpus vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are:
|
||||
|
@ -11,8 +11,11 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import importlib
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from packaging import version
|
||||
|
||||
from .base import HfQuantizer
|
||||
|
||||
|
||||
@ -81,7 +84,14 @@ class AqlmHfQuantizer(HfQuantizer):
|
||||
|
||||
@property
|
||||
def is_trainable(self, model: Optional["PreTrainedModel"] = None):
|
||||
return False
|
||||
aqlm_supports_training = version.parse(importlib.metadata.version("aqlm")) >= version.parse("1.0.2")
|
||||
if aqlm_supports_training:
|
||||
return True
|
||||
else:
|
||||
logger.warn(
|
||||
f"Currently installed `aqlm` version ({importlib.metadata.version('aqlm')}) doesn't support training. If you wish to train a quantized model, please update `aqlm` with `pip install aqlm>=1.0.2`"
|
||||
)
|
||||
return False
|
||||
|
||||
@property
|
||||
def is_serializable(self):
|
||||
|
Loading…
Reference in New Issue
Block a user