mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-24 14:58:56 +06:00
style
This commit is contained in:
parent
c2b5b29a8d
commit
55cbb2a505
@ -767,12 +767,12 @@ if TYPE_CHECKING:
|
|||||||
EetqConfig,
|
EetqConfig,
|
||||||
FbgemmFp8Config,
|
FbgemmFp8Config,
|
||||||
FineGrainedFP8Config,
|
FineGrainedFP8Config,
|
||||||
|
FPQuantConfig,
|
||||||
GPTQConfig,
|
GPTQConfig,
|
||||||
HiggsConfig,
|
HiggsConfig,
|
||||||
HqqConfig,
|
HqqConfig,
|
||||||
QuantoConfig,
|
QuantoConfig,
|
||||||
QuarkConfig,
|
QuarkConfig,
|
||||||
FPQuantConfig,
|
|
||||||
SpQRConfig,
|
SpQRConfig,
|
||||||
TorchAoConfig,
|
TorchAoConfig,
|
||||||
VptqConfig,
|
VptqConfig,
|
||||||
|
@ -27,6 +27,7 @@ from ..utils.quantization_config import (
|
|||||||
EetqConfig,
|
EetqConfig,
|
||||||
FbgemmFp8Config,
|
FbgemmFp8Config,
|
||||||
FineGrainedFP8Config,
|
FineGrainedFP8Config,
|
||||||
|
FPQuantConfig,
|
||||||
GPTQConfig,
|
GPTQConfig,
|
||||||
HiggsConfig,
|
HiggsConfig,
|
||||||
HqqConfig,
|
HqqConfig,
|
||||||
@ -34,7 +35,6 @@ from ..utils.quantization_config import (
|
|||||||
QuantizationMethod,
|
QuantizationMethod,
|
||||||
QuantoConfig,
|
QuantoConfig,
|
||||||
QuarkConfig,
|
QuarkConfig,
|
||||||
FPQuantConfig,
|
|
||||||
SpQRConfig,
|
SpQRConfig,
|
||||||
TorchAoConfig,
|
TorchAoConfig,
|
||||||
VptqConfig,
|
VptqConfig,
|
||||||
@ -50,12 +50,12 @@ from .quantizer_compressed_tensors import CompressedTensorsHfQuantizer
|
|||||||
from .quantizer_eetq import EetqHfQuantizer
|
from .quantizer_eetq import EetqHfQuantizer
|
||||||
from .quantizer_fbgemm_fp8 import FbgemmFp8HfQuantizer
|
from .quantizer_fbgemm_fp8 import FbgemmFp8HfQuantizer
|
||||||
from .quantizer_finegrained_fp8 import FineGrainedFP8HfQuantizer
|
from .quantizer_finegrained_fp8 import FineGrainedFP8HfQuantizer
|
||||||
|
from .quantizer_fp_quant import FPQuantHfQuantizer
|
||||||
from .quantizer_gptq import GptqHfQuantizer
|
from .quantizer_gptq import GptqHfQuantizer
|
||||||
from .quantizer_higgs import HiggsHfQuantizer
|
from .quantizer_higgs import HiggsHfQuantizer
|
||||||
from .quantizer_hqq import HqqHfQuantizer
|
from .quantizer_hqq import HqqHfQuantizer
|
||||||
from .quantizer_quanto import QuantoHfQuantizer
|
from .quantizer_quanto import QuantoHfQuantizer
|
||||||
from .quantizer_quark import QuarkHfQuantizer
|
from .quantizer_quark import QuarkHfQuantizer
|
||||||
from .quantizer_fp_quant import FPQuantHfQuantizer
|
|
||||||
from .quantizer_spqr import SpQRHfQuantizer
|
from .quantizer_spqr import SpQRHfQuantizer
|
||||||
from .quantizer_torchao import TorchAoHfQuantizer
|
from .quantizer_torchao import TorchAoHfQuantizer
|
||||||
from .quantizer_vptq import VptqHfQuantizer
|
from .quantizer_vptq import VptqHfQuantizer
|
||||||
|
@ -13,7 +13,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||||
|
|
||||||
from ..utils.logging import tqdm
|
|
||||||
from .base import HfQuantizer
|
from .base import HfQuantizer
|
||||||
from .quantizers_utils import get_module_from_name
|
from .quantizers_utils import get_module_from_name
|
||||||
|
|
||||||
@ -21,7 +20,7 @@ from .quantizers_utils import get_module_from_name
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from ..modeling_utils import PreTrainedModel
|
from ..modeling_utils import PreTrainedModel
|
||||||
|
|
||||||
from ..utils import is_fp_quant_available, is_fp_quant_available, is_qutlass_available, is_torch_available, logging
|
from ..utils import is_fp_quant_available, is_qutlass_available, is_torch_available, logging
|
||||||
from ..utils.quantization_config import QuantizationConfigMixin
|
from ..utils.quantization_config import QuantizationConfigMixin
|
||||||
|
|
||||||
|
|
||||||
@ -126,7 +125,9 @@ class FPQuantHfQuantizer(HfQuantizer):
|
|||||||
def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
||||||
from fp_quant import FPQuantLinear
|
from fp_quant import FPQuantLinear
|
||||||
|
|
||||||
fp_quant_modules = {name: module for name, module in model.named_modules() if isinstance(module, FPQuantLinear)}
|
fp_quant_modules = {
|
||||||
|
name: module for name, module in model.named_modules() if isinstance(module, FPQuantLinear)
|
||||||
|
}
|
||||||
for name, module in fp_quant_modules.items():
|
for name, module in fp_quant_modules.items():
|
||||||
if not self.quantization_config.store_master_weights and module.weight is not None:
|
if not self.quantization_config.store_master_weights and module.weight is not None:
|
||||||
module.weight = None
|
module.weight = None
|
||||||
|
@ -155,6 +155,7 @@ from .import_utils import (
|
|||||||
is_flash_attn_greater_or_equal_2_10,
|
is_flash_attn_greater_or_equal_2_10,
|
||||||
is_flax_available,
|
is_flax_available,
|
||||||
is_flute_available,
|
is_flute_available,
|
||||||
|
is_fp_quant_available,
|
||||||
is_fsdp_available,
|
is_fsdp_available,
|
||||||
is_ftfy_available,
|
is_ftfy_available,
|
||||||
is_g2p_en_available,
|
is_g2p_en_available,
|
||||||
@ -199,7 +200,6 @@ from .import_utils import (
|
|||||||
is_pytest_available,
|
is_pytest_available,
|
||||||
is_pytorch_quantization_available,
|
is_pytorch_quantization_available,
|
||||||
is_quark_available,
|
is_quark_available,
|
||||||
is_fp_quant_available,
|
|
||||||
is_qutlass_available,
|
is_qutlass_available,
|
||||||
is_rich_available,
|
is_rich_available,
|
||||||
is_rjieba_available,
|
is_rjieba_available,
|
||||||
|
Loading…
Reference in New Issue
Block a user