Fix ONNX test_quantize unittest (#6716)

2025-07-31 02:02:21 +06:00 · 2020-08-25 19:24:40 +02:00 · 2020-08-25 19:24:40 +02:00 · ac9702c284
commit ac9702c284
parent 074340339a
4 changed files with 22 additions and 24 deletions
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@ -45,7 +45,7 @@ jobs:
        source .env/bin/activate
        pip install --upgrade pip
        pip install torch!=1.6.0
-        pip install .[sklearn,testing]
+        pip install .[sklearn,testing,onnxruntime]

    - name: Are GPUs recognized by our DL frameworks
      run: |
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -42,7 +42,7 @@ jobs:
        source .env/bin/activate
        pip install --upgrade pip
        pip install torch!=1.6.0
-        pip install .[sklearn,testing]
+        pip install .[sklearn,testing,onnxruntime]

    - name: Are GPUs recognized by our DL frameworks
      run: |
--- a/setup.py
+++ b/setup.py
@ -74,16 +74,17 @@ extras["tf"] = [
    # "onnxconverter-common",
    # "keras2onnx"
    "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common",
-    "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx"
+    "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx",
 ]
 extras["tf-cpu"] = [
    "tensorflow-cpu",
    # "onnxconverter-common",
    # "keras2onnx"
    "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common",
-    "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx"
+    "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx",
 ]
 extras["torch"] = ["torch"]
+extras["onnxruntime"] = ["onnxruntime>=1.4.0", "onnxruntime-tools>=1.4.2"]

 extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"]
 extras["all"] = extras["serving"] + ["tensorflow", "torch"]
--- a/src/transformers/convert_graph_to_onnx.py
+++ b/src/transformers/convert_graph_to_onnx.py
@ -364,32 +364,29 @@ def quantize(onnx_model_path: Path) -> Path:

    Returns: The Path generated for the quantized
    """
-    try:
-        import onnx
-        from onnxruntime.quantization import QuantizationMode, quantize
+    import onnx
+    from onnxruntime.quantization import QuantizationMode, quantize

-        onnx_model = onnx.load(onnx_model_path.as_posix())
+    onnx_model = onnx.load(onnx_model_path.as_posix())

-        # Discussed with @yufenglee from ONNX runtime, this will be address in the next release of onnxruntime
-        print(
-            "As of onnxruntime 1.4.0, models larger than 2GB will fail to quantize due to protobuf constraint.\n"
-            "This limitation will be removed in the next release of onnxruntime."
-        )
+    # Discussed with @yufenglee from ONNX runtime, this will be address in the next release of onnxruntime
+    print(
+        "As of onnxruntime 1.4.0, models larger than 2GB will fail to quantize due to protobuf constraint.\n"
+        "This limitation will be removed in the next release of onnxruntime."
+    )

-        quantized_model = quantize(
-            model=onnx_model, quantization_mode=QuantizationMode.IntegerOps, force_fusions=True, symmetric_weight=True,
-        )
+    quantized_model = quantize(
+        model=onnx_model, quantization_mode=QuantizationMode.IntegerOps, force_fusions=True, symmetric_weight=True,
+    )

-        # Append "-quantized" at the end of the model's name
-        quantized_model_path = generate_identified_filename(onnx_model_path, "-quantized")
+    # Append "-quantized" at the end of the model's name
+    quantized_model_path = generate_identified_filename(onnx_model_path, "-quantized")

-        # Save model
-        print(f"Quantized model has been written at {quantized_model_path}: \N{heavy check mark}")
-        onnx.save_model(quantized_model, quantized_model_path.as_posix())
+    # Save model
+    print(f"Quantized model has been written at {quantized_model_path}: \N{heavy check mark}")
+    onnx.save_model(quantized_model, quantized_model_path.as_posix())

-        return quantized_model_path
-    except Exception as ie:
-        print(f"Error while quantizing the model:\n{str(ie)}")
+    return quantized_model_path


 def verify(path: Path):