Merge pull request #25644 from DaniAffCH:blockwise-quantization

[GSoC] dnn: Blockwise quantization support #25644 This PR introduces blockwise quantization in DNN allowing the parsing of ONNX models quantized in blockwise style. In particular it modifies the `Quantize` and `Dequantize` operations. The related PR opencv/opencv_extra#1181 contains the test data. Additional notes: - The original quantization issue has been fixed. Previously, for 1D scale and zero-point, the operation applied was $y = int8(x/s - z)$ instead of $y = int8(x/s + z)$. Note that the operation was already correctly implemented when the scale and zero-point were scalars. The previous implementation failed the ONNX test cases, but now all have passed successfully. [Reference](https://github.com/onnx/onnx/blob/main/docs/Operators.md#QuantizeLinear) - the function `block_repeat` broadcasts scale and zero-point to the input shape. It repeats all the elements of a given axis n times. This function generalizes the behavior of `repeat` from the core module which is defined just for 2 axis assuming `Mat` has 2 dimensions. If appropriate and useful, you might consider moving `block_repeat` to the core module. - Now, the scale and zero-point can be taken as layer inputs. This increases the ONNX layers' coverage and enables us to run the ONNX test cases (previously disabled) being fully compliant with ONNX standards. Since they are now supported, I have enabled the test cases for: `test_dequantizelinear`, `test_dequantizelinear_axis`, `test_dequantizelinear_blocked`, `test_quantizelinear`, `test_quantizelinear_axis`, `test_quantizelinear_blocked` just in CPU backend. All of them pass successfully. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-07-30 01:06:38 +08:00 · 2024-07-30 13:16:08 +02:00 · 2024-07-30 13:16:08 +02:00 · 2a333a6c86
commit 2a333a6c86
parent 89fff355c8
8 changed files with 215 additions and 46 deletions
--- a/modules/dnn/src/int8layers/quantization_utils.cpp
+++ b/modules/dnn/src/int8layers/quantization_utils.cpp
@ -15,7 +15,10 @@ namespace dnn
 static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int axis)
 {
    // The data is the 1-D scales or zeropoints.
-    CV_Assert(axis >= 0 && targetShape.size() > axis && data.total() == targetShape[axis]);
+    CV_CheckGE(axis, 0, "Quantization axis must be non-negative.");
+    CV_CheckGT((int)targetShape.size(),axis,"Quantization axis must be within the valid range of target shape dimensions.");
+    CV_CheckEQ((int)data.total(), (int)targetShape[axis], "Data total size must match the size of the specified target dimension.");
+
    std::vector<int> broadcast_axes;
    for (int i = 0; i < targetShape.size(); i++)
    {
@ -35,29 +38,98 @@ static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int ax
    }
 }

+static void block_repeat(InputArray src, const MatShape& srcShape, int axis, int repetitions, OutputArray dst)
+{
+    CV_Assert(src.getObj() != dst.getObj());
+    CV_Check(axis, axis >= 0 && axis < src.dims(), "Axis out of range");
+    CV_CheckGT(repetitions, 1, "More than one repetition expected");
+
+    Mat src_mat = src.getMat();
+    Mat dst_mat;
+
+    if (src_mat.depth() != CV_32F)
+        src_mat.convertTo(src_mat, CV_32F);
+
+    MatShape sshape = srcShape;
+    MatShape dshape = srcShape;
+
+    size_t dtype_bytes = src_mat.elemSize();
+    int chunk_size = dtype_bytes;
+    int num_chunks = 1;
+
+    dshape[axis] *= repetitions;
+
+    for (int i = axis+1; i < sshape.size(); ++i)
+        chunk_size*=sshape[i];
+
+    for (int i = 0; i <= axis; ++i)
+        num_chunks*=sshape[i];
+
+    dst.create(dshape.size(), dshape.data(), src_mat.type());
+    dst_mat = dst.getMat();
+
+    CV_Assert(dst_mat.isContinuous());
+    CV_Assert(src_mat.isContinuous());
+
+    for (int i = 0; i < repetitions; ++i) {
+        size_t src_offset = 0;
+        size_t dst_offset = i * chunk_size;
+
+        for (int j = 0; j < num_chunks; ++j) {
+            memcpy(dst_mat.data + dst_offset, src_mat.data + src_offset, chunk_size);
+            src_offset += chunk_size;
+            dst_offset += chunk_size * repetitions;
+        }
+    }
+}
+
+template <typename T>
+static void copyVecToMat(Mat& mat, const std::vector<T>& data){
+    float * matPtr = mat.ptr<float>(0);
+    const int len = data.size();
+
+    for (int i = 0; i < len; i++)
+        matPtr[i] = (float) data[i];
+}
+
+template <typename T>
+static void broadcastBlockedMatrix(Mat& mat, const std::vector<T>& data, const MatShape& targetShape, int axis, int block_size){
+    CV_Check(block_size, targetShape[axis] % block_size == 0 && block_size <= targetShape[axis], "Block size must be a divisor of the target dimension size and not exceed it.");
+
+    MatShape subTargetShape(targetShape);
+    subTargetShape[axis] = static_cast<int>(subTargetShape[axis] / block_size);
+
+    block_repeat(data, subTargetShape, axis, block_size, mat);
+}
+
+template <typename T>
+static void broadcastStandardMatrix(Mat& mat, const std::vector<T>& data, const MatShape& targetShape, int axis)
+{
+    MatShape subTargetShape(targetShape.size(), 1);
+    subTargetShape[axis] = data.size();
+    mat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
+
+    copyVecToMat(mat,data);
+
+    broadcast1D2TargetMat(mat, targetShape, axis);
+}
+
+
 static void broadcastScaleAndZeropoint(Mat& scalesMat, Mat& zeropointsMat, const std::vector<float>& scales,
-                                       const std::vector<int>& zeropoints, const MatShape& targetShape, int axis)
+                                       const std::vector<int>& zeropoints, const MatShape& targetShape, int axis, int block_size)
 {
    // broad cast the scales and zeropoint to the input shape.
-    MatShape subTargetShape(targetShape.size(), 1);
-    subTargetShape[axis] = scales.size();

-    zeropointsMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
-    scalesMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
-
-    const int len = scales.size();
-    // Deep copy the scales and zeropoint data and prevent the original data from being changed.
-
-    float * scalePtr = scalesMat.ptr<float>(0);
-    for (int i = 0; i < len; i++)
-        scalePtr[i] = scales[i];
-
-    float * zpPtr = zeropointsMat.ptr<float>(0);
-    for (int i = 0; i < len; i++)
-        zpPtr[i] = (float )zeropoints[i];
-
-    broadcast1D2TargetMat(scalesMat, targetShape, axis);
-    broadcast1D2TargetMat(zeropointsMat, targetShape, axis);
+    if (block_size == 0)
+    {
+        broadcastStandardMatrix(zeropointsMat, zeropoints, targetShape, axis);
+        broadcastStandardMatrix(scalesMat, scales, targetShape, axis);
+    }
+    else
+    {
+        broadcastBlockedMatrix(zeropointsMat, zeropoints, targetShape, axis, block_size);
+        broadcastBlockedMatrix(scalesMat, scales, targetShape, axis, block_size);
+    }
 }

 // Quantize FP32/FP16 Inputs to INT8
@ -65,13 +137,17 @@ class QuantizeLayerImpl CV_FINAL : public QuantizeLayer
 {
 public:
    int axis;
+    int block_size;
    bool is1D;
-    Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data.
+    Mat scalesMat, zeropointsMat; // Saving the broadcasted scales data.
+    bool quantParamExternal = true;  // Indicates if the quantization parameters (scale and zero point) are provided as inputs to the node.

    QuantizeLayerImpl(const LayerParams& params)
    {
        is1D = params.get<bool>("is1D", false);
        axis = params.get<int>("axis", 1);
+        block_size = params.get<int>("block_size", 0);
+
        if (!is1D)
        {
            scales.push_back(params.get<float>("scales", 1.0f));
@ -82,7 +158,7 @@ public:
            DictValue paramScales = params.get("scales");
            int i, n = paramScales.size();

-            CV_Assert(n > 0);
+            CV_CheckGT(n, 0, "Scale missing.");
            scales.resize(n, 0.);
            for (i = 0; i < n; i++)
                scales[i] = paramScales.get<float>(i);
@ -108,7 +184,7 @@ public:
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const CV_OVERRIDE
    {
-        CV_Assert(inputs.size() == 1);
+        CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive.");
        Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
        return false;
    }
@ -124,7 +200,7 @@ public:
        if (is1D)
        {
            MatShape inputShape = shape(inputs[0]);
-            broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis);
+            broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size);
        }
    }

@ -146,6 +222,39 @@ public:
        return true;
    }
 #endif
+    void processInputOutput(std::vector<Mat>& inputs, std::vector<Mat>& outputs)
+    {
+        CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive.");
+        quantParamExternal &= inputs.size() > 1;
+
+        // Scale and zeropoint taken as input
+        if (quantParamExternal)
+        {
+            quantParamExternal = false;
+            scalesMat = inputs[1];
+
+            scalesMat.reshape(1, 1).copyTo(scales);
+
+            if(scalesMat.total() > 1) is1D = true;
+
+
+            if (inputs.size() > 2)
+            {
+                zeropointsMat = inputs[2];
+                CV_CheckEQ((int)zeropointsMat.total(), (int)scalesMat.total(), "Scale and zero point elements number must match.");
+                zeropointsMat.reshape(1, 1).copyTo(zeropoints);
+            }
+
+            if (is1D)
+            {
+                MatShape inputShape = shape(inputs[0]);
+                broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size);
+            }
+        }
+
+        if (outputs[0].depth() != CV_8S)
+            outputs[0].convertTo(outputs[0], CV_8S);
+    }

    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
@ -159,14 +268,13 @@ public:
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);

-        if (outputs[0].depth() != CV_8S)
-            outputs[0].convertTo(outputs[0], CV_8S);
+        processInputOutput(inputs, outputs);

        if (is1D)
        {
            Mat inputTmp;
            divide(inputs[0], scalesMat, inputTmp);
-            subtract(inputTmp, zeropointsMat, inputTmp);
+            add(inputTmp, zeropointsMat, inputTmp);

            inputTmp.convertTo(outputs[0], CV_8S);
        }
@ -190,13 +298,16 @@ class DequantizeLayerImpl CV_FINAL : public DequantizeLayer
 {
 public:
    int axis;
+    int block_size;
    bool is1D;
    Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data.
+    bool quantParamExternal = true;

    DequantizeLayerImpl(const LayerParams& params)
    {
        is1D = params.get<bool>("is1D", false);
        axis = params.get<int>("axis", 1);
+        block_size = params.get<int>("block_size", 0);

        if (!is1D)
        {
@ -208,7 +319,7 @@ public:
            DictValue paramScales = params.get("scales");
            int i, n = paramScales.size();

-            CV_Assert(n > 0);
+            CV_CheckGT(n, 0, "Scale missing.");
            scales.resize(n);
            for (i = 0; i < n; i++)
                scales[i] = paramScales.get<float>(i);
@ -234,7 +345,7 @@ public:
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const CV_OVERRIDE
    {
-        CV_Assert(inputs.size() == 1);
+        CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive.");
        Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
        return false;
    }
@ -250,7 +361,7 @@ public:
        if (is1D)
        {
            MatShape inputShape = shape(inputs[0]);
-            broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis);
+            broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size);
        }
    }

@ -269,6 +380,39 @@ public:
    }
 #endif

+    void processInputOutput(std::vector<Mat>& inputs, std::vector<Mat>& outputs)
+    {
+        CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive.");
+
+        quantParamExternal &= inputs.size() > 1;
+        // Scale and zeropoint taken as input
+        if (quantParamExternal)
+        {
+            quantParamExternal = false;
+            scalesMat = inputs[1];
+
+            scalesMat.reshape(1, 1).copyTo(scales);
+
+            if(scalesMat.total() > 1) is1D = true;
+
+            if (inputs.size() > 2)
+            {
+                zeropointsMat = inputs[2];
+                CV_CheckEQ((int)zeropointsMat.total(), (int)scalesMat.total(), "Scale and zero point elements number must match.");
+                zeropointsMat.reshape(1, 1).copyTo(zeropoints);
+            }
+
+            if (is1D)
+            {
+                MatShape inputShape = shape(inputs[0]);
+                broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size);
+            }
+        }
+
+        if (outputs[0].depth() != CV_32F)
+            outputs[0].convertTo(outputs[0], CV_32F);
+    }
+
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
@ -281,8 +425,7 @@ public:
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);

-        if (outputs[0].depth() != CV_32F)
-            outputs[0].convertTo(outputs[0], CV_32F);
+        processInputOutput(inputs, outputs);

        if (is1D)
        {
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -3239,6 +3239,17 @@ void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx
    // or 1-D tensor (per-channel quantized).
    bool is1D = false;

+    if (layerParams.type == "Quantize")
+        layerParams.set("depth", CV_8S);
+    else // Dequantize
+        layerParams.set("depth", CV_32F);
+
+    // If scale is not defined as a constant blob, it is considered an external input.
+    if(constBlobs.find(node_proto.input(1)) == constBlobs.end()){
+        addLayer(layerParams, node_proto);
+        return;
+    }
+
    Mat scaleMat = getBlob(node_proto, 1);
    if(scaleMat.total() > 1) is1D = true;

@ -3280,11 +3291,6 @@ void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx
        layerParams.set("zeropoints", zeropoint);
    }

-    if (layerParams.type == "Quantize")
-        layerParams.set("depth", CV_8S);
-    else // Dequantize
-        layerParams.set("depth", CV_32F);
-
    if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) // Variable input.
    {
        std::vector<Mat> inputs, outputs;
--- a/modules/dnn/test/test_onnx_conformance.cpp
+++ b/modules/dnn/test/test_onnx_conformance.cpp
@ -224,6 +224,7 @@ static const TestCase testConformanceConfig[] = {
    {"test_depthtospace_example", 1, 1},
    {"test_dequantizelinear", 3, 1},
    {"test_dequantizelinear_axis", 3, 1},
+    {"test_dequantizelinear_blocked", 3, 1},
    {"test_det_2d", 1, 1},
    {"test_det_nd", 1, 1},
    {"test_div", 2, 1},
@ -569,6 +570,7 @@ static const TestCase testConformanceConfig[] = {
    {"test_qlinearmatmul_3D", 8, 1},
    {"test_quantizelinear", 3, 1},
    {"test_quantizelinear_axis", 3, 1},
+    {"test_quantizelinear_blocked", 3, 1},
    {"test_range_float_type_positive_delta", 3, 1},
    {"test_range_float_type_positive_delta_expanded", 3, 1},
    {"test_range_int32_type_negative_delta", 3, 1},
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
@ -565,9 +565,11 @@ CASE(test_depthtospace_dcr_mode)
 CASE(test_depthtospace_example)
    // no filter
 CASE(test_dequantizelinear)
-    // no filter
+    SKIP;
 CASE(test_dequantizelinear_axis)
-    // no filter
+    SKIP;
+CASE(test_dequantizelinear_blocked)
+    SKIP;
 CASE(test_det_2d)
    // no filter
 CASE(test_det_nd)
@ -1348,9 +1350,11 @@ CASE(test_qlinearmatmul_2D)
 CASE(test_qlinearmatmul_3D)
    // no filter
 CASE(test_quantizelinear)
-    // no filter
+    SKIP;
 CASE(test_quantizelinear_axis)
-    // no filter
+    SKIP;
+CASE(test_quantizelinear_blocked)
+    SKIP;
 CASE(test_range_float_type_positive_delta)
    // no filter
 CASE(test_range_float_type_positive_delta_expanded)
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp
@ -48,6 +48,9 @@
 "test_cumsum_2d_axis_1",
 "test_cumsum_2d_negative_axis",
 "test_concat_1d_axis_negative_1",
+"test_dequantizelinear",
+"test_dequantizelinear_axis",
+"test_dequantizelinear_blocked",
 "test_div_uint8",
 "test_flatten_axis0",
 "test_flatten_axis2",
@ -71,6 +74,9 @@
 "test_pow_types_float32_int32", // vulkan backend does not take tensor other than float32 data type
 "test_pow_types_float32_int64", // vulkan backend does not take tensor other than float32 data type
 "test_pow_types_int", // vulkan backend does not take tensor other than float32 data type
+"test_quantizelinear",
+"test_quantizelinear_axis",
+"test_quantizelinear_blocked",
 "test_softmax_default_axis",
 "test_sub_bcast",
 "test_sub_uint8",
--- a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp
@ -1,4 +1,7 @@
 "test_averagepool_3d_default",
+"test_dequantizelinear",
+"test_dequantizelinear_axis",
+"test_dequantizelinear_blocked",
 "test_dropout_default_ratio",
 "test_globalmaxpool",
 "test_globalmaxpool_precomputed",
@ -14,7 +17,10 @@
 "test_maxpool_2d_same_upper",
 "test_maxpool_2d_strides",
 "test_maxpool_3d_default",
-"test_pow", // fp16 accuracy issue
+"test_pow",
+"test_quantizelinear",
+"test_quantizelinear_axis",
+"test_quantizelinear_blocked",
 "test_softmax_large_number",
 "test_softmax_large_number_expanded",
 "test_split_equal_parts_1d",
--- a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp32_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp32_denylist.inl.hpp
@ -1,5 +1,11 @@
 "test_averagepool_3d_default",
+"test_dequantizelinear",
+"test_dequantizelinear_axis",
+"test_dequantizelinear_blocked",
 "test_maxpool_3d_default",
+"test_quantizelinear",
+"test_quantizelinear_axis",
+"test_quantizelinear_blocked",
 "test_scatter_elements_with_axis",
 "test_scatter_elements_with_duplicate_indices",
 "test_scatter_elements_with_negative_indices",
--- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
@ -89,8 +89,6 @@
 "test_convtranspose_pad",
 "test_convtranspose_pads",
 "test_convtranspose_with_kernel",
-"test_dequantizelinear",
-"test_dequantizelinear_axis",
 "test_det_2d",
 "test_det_nd",
 "test_dropout_default_mask",
@ -290,8 +288,6 @@
 "test_qlinearconv",
 "test_qlinearmatmul_2D",
 "test_qlinearmatmul_3D",
-"test_quantizelinear",
-"test_quantizelinear_axis",
 "test_range_float_type_positive_delta",
 "test_range_float_type_positive_delta_expanded",
 "test_range_int32_type_negative_delta",