diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index cfe6595d78..9fde7ad4c4 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -387,6 +387,13 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS RequantizeLayer : public Layer + { + public: + float scale, shift; + static Ptr create(const LayerParams ¶ms); + }; + class CV_EXPORTS ConcatLayer : public Layer { public: diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 4e38b0374f..24daeb249f 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -4055,6 +4055,9 @@ int Net::addLayer(const String &name, const String &type, const int &dtype, Laye if (params.get("has_dynamic_shapes", false)) impl->hasDynamicShapes = true; + if (dtype == CV_8S) + impl->netWasQuantized = true; + return id; } @@ -4389,7 +4392,7 @@ Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtyp // Layers with multiple outputs. Number of outputs is equal to number of inputs if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" || ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" || - ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || + ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" || (ld.type == "ReLU" && !ld.params.get("negative_slope", 0.f)) /* ReLU with negative slope 0 */) { for (int i = 0; i < ld.outputBlobs.size(); i++) diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 9d8a3783a2..123cb170b7 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -144,6 +144,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Quantize, QuantizeLayer); CV_DNN_REGISTER_LAYER_CLASS(Dequantize, DequantizeLayer); + CV_DNN_REGISTER_LAYER_CLASS(Requantize, RequantizeLayer); CV_DNN_REGISTER_LAYER_CLASS(ConvolutionInt8, ConvolutionLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(InnerProductInt8, InnerProductLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(PoolingInt8, PoolingLayerInt8); @@ -173,6 +174,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(SilenceInt8, BlankLayer); CV_DNN_REGISTER_LAYER_CLASS(ConstInt8, ConstLayer); CV_DNN_REGISTER_LAYER_CLASS(ReshapeInt8, ReshapeLayer); + CV_DNN_REGISTER_LAYER_CLASS(ResizeInt8, ResizeLayer); CV_DNN_REGISTER_LAYER_CLASS(SplitInt8, SplitLayer); CV_DNN_REGISTER_LAYER_CLASS(SliceInt8, SliceLayer); CV_DNN_REGISTER_LAYER_CLASS(CropInt8, CropLayer); diff --git a/modules/dnn/src/int8layers/quantize_dequantize_layer.cpp b/modules/dnn/src/int8layers/quantization_utils.cpp similarity index 73% rename from modules/dnn/src/int8layers/quantize_dequantize_layer.cpp rename to modules/dnn/src/int8layers/quantization_utils.cpp index 2ddb76a0e8..0346f147ba 100644 --- a/modules/dnn/src/int8layers/quantize_dequantize_layer.cpp +++ b/modules/dnn/src/int8layers/quantization_utils.cpp @@ -10,6 +10,7 @@ namespace cv namespace dnn { +// Quantize FP32/FP16 Inputs to INT8 class QuantizeLayerImpl CV_FINAL : public QuantizeLayer { public: @@ -77,6 +78,7 @@ public: } }; +// Dequantize INT8 Inputs to FP32/FP16 class DequantizeLayerImpl CV_FINAL : public DequantizeLayer { public: @@ -143,6 +145,52 @@ public: } }; +// Rescale/Requantize INT8 Inputs from (scale1, zeropoint1) to (scale2, zeropoint2) +class RequantizeLayerImpl CV_FINAL : public RequantizeLayer +{ +public: + RequantizeLayerImpl(const LayerParams& params) + { + scale = params.get("scale", 1.f); + shift = params.get("shift", 0.f); + setParamsFrom(params); + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() == 1); + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + return false; + } + + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + inputs[0].convertTo(outputs[0], CV_8S, scale, shift); + } +}; + Ptr QuantizeLayer::create(const LayerParams& params) { return Ptr(new QuantizeLayerImpl(params)); @@ -153,5 +201,10 @@ Ptr DequantizeLayer::create(const LayerParams& params) return Ptr(new DequantizeLayerImpl(params)); } +Ptr RequantizeLayer::create(const LayerParams& params) +{ + return Ptr(new RequantizeLayerImpl(params)); +} + } } diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index f24c823807..47ff0719fe 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -49,6 +49,8 @@ public: alignCorners = params.get("align_corners", false); halfPixelCenters = params.get("half_pixel_centers", false); + if (interpolation == "opencv_linear") + halfPixelCenters = true; } bool getMemoryShapes(const std::vector &inputs, @@ -131,8 +133,11 @@ public: Mat& inp = inputs[0]; Mat& out = outputs[0]; - if ((interpolation == "nearest" && !alignCorners && !halfPixelCenters) || interpolation == "opencv_linear" || (interpolation == "bilinear" && halfPixelCenters)) + int depth = inp.depth(); + if ((interpolation == "nearest" && !alignCorners && !halfPixelCenters) || (interpolation == "opencv_linear" && depth != CV_8S) || + (interpolation == "bilinear" && halfPixelCenters && depth != CV_8S)) { + // INTER_LINEAR Resize mode does not support INT8 inputs InterpolationFlags mode = interpolation == "nearest" ? INTER_NEAREST : INTER_LINEAR; for (size_t n = 0; n < inputs[0].size[0]; ++n) { @@ -164,34 +169,66 @@ public: widthOffset = 0.5f * scaleWidth; } - for (int y = 0; y < outHeight; ++y) + if (depth == CV_8S) { - float input_y = y * scaleHeight + heightOffset; - int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y); - y0 = std::min(y0, inpHeight - 1); - - const float* inpData_row = inpPlanes.ptr(y0); - - for (int x = 0; x < outWidth; ++x) + for (int y = 0; y < outHeight; ++y) { - float input_x = x * scaleWidth + widthOffset; - int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x); - x0 = std::min(x0, inpWidth - 1); + float input_y = y * scaleHeight + heightOffset; + int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y); + y0 = std::min(y0, inpHeight - 1); - float* outData = outPlanes.ptr(y, x); - const float* inpData_row_c = inpData_row; + const int8_t* inpData_row = inpPlanes.ptr(y0); - for (int c = 0; c < numPlanes; ++c) + for (int x = 0; x < outWidth; ++x) { - *outData = inpData_row_c[x0]; + float input_x = x * scaleWidth + widthOffset; + int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x); + x0 = std::min(x0, inpWidth - 1); - inpData_row_c += inpSpatialSize; - outData += outSpatialSize; + int8_t* outData = outPlanes.ptr(y, x); + const int8_t* inpData_row_c = inpData_row; + + for (int c = 0; c < numPlanes; ++c) + { + *outData = inpData_row_c[x0]; + + inpData_row_c += inpSpatialSize; + outData += outSpatialSize; + } + } + } + } + else + { + for (int y = 0; y < outHeight; ++y) + { + float input_y = y * scaleHeight + heightOffset; + int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y); + y0 = std::min(y0, inpHeight - 1); + + const float* inpData_row = inpPlanes.ptr(y0); + + for (int x = 0; x < outWidth; ++x) + { + float input_x = x * scaleWidth + widthOffset; + int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x); + x0 = std::min(x0, inpWidth - 1); + + float* outData = outPlanes.ptr(y, x); + const float* inpData_row_c = inpData_row; + + for (int c = 0; c < numPlanes; ++c) + { + *outData = inpData_row_c[x0]; + + inpData_row_c += inpSpatialSize; + outData += outSpatialSize; + } } } } } - else if (interpolation == "bilinear") + else if (interpolation == "bilinear" || interpolation == "opencv_linear") { const int inpHeight = inp.size[2]; const int inpWidth = inp.size[3]; @@ -202,31 +239,65 @@ public: Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight); Mat outPlanes = out.reshape(1, numPlanes * outHeight); - for (int y = 0; y < outHeight; ++y) + if (depth == CV_8S) { - float input_y = y * scaleHeight; - int y0 = static_cast(input_y); - const float* inpData_row0 = inpPlanes.ptr(y0); - const float* inpData_row1 = inpPlanes.ptr(std::min(y0 + 1, inpHeight - 1)); - for (int x = 0; x < outWidth; ++x) + for (int y = 0; y < outHeight; ++y) { - float input_x = x * scaleWidth; - int x0 = static_cast(input_x); - int x1 = std::min(x0 + 1, inpWidth - 1); - - float* outData = outPlanes.ptr(y, x); - const float* inpData_row0_c = inpData_row0; - const float* inpData_row1_c = inpData_row1; - for (int c = 0; c < numPlanes; ++c) + float input_y = halfPixelCenters ? std::max((y + 0.5f) * scaleHeight - 0.5f, 0.0f) : y * scaleHeight; + int y0 = static_cast(input_y); + const int8_t* inpData_row0 = inpPlanes.ptr(y0); + const int8_t* inpData_row1 = inpPlanes.ptr(std::min(y0 + 1, inpHeight - 1)); + for (int x = 0; x < outWidth; ++x) { - *outData = inpData_row0_c[x0] + - (input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) + - (input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] + - (input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0])); + float input_x = halfPixelCenters ? std::max((x + 0.5f) * scaleWidth - 0.5f, 0.0f) : x * scaleWidth; + int x0 = static_cast(input_x); + int x1 = std::min(x0 + 1, inpWidth - 1); - inpData_row0_c += inpSpatialSize; - inpData_row1_c += inpSpatialSize; - outData += outSpatialSize; + int8_t* outData = outPlanes.ptr(y, x); + const int8_t* inpData_row0_c = inpData_row0; + const int8_t* inpData_row1_c = inpData_row1; + for (int c = 0; c < numPlanes; ++c) + { + *outData = static_cast(inpData_row0_c[x0] + + (input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) + + (input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] + + (input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]))); + + inpData_row0_c += inpSpatialSize; + inpData_row1_c += inpSpatialSize; + outData += outSpatialSize; + } + } + } + } + else + { + for (int y = 0; y < outHeight; ++y) + { + float input_y = y * scaleHeight; + int y0 = static_cast(input_y); + const float* inpData_row0 = inpPlanes.ptr(y0); + const float* inpData_row1 = inpPlanes.ptr(std::min(y0 + 1, inpHeight - 1)); + for (int x = 0; x < outWidth; ++x) + { + float input_x = x * scaleWidth; + int x0 = static_cast(input_x); + int x1 = std::min(x0 + 1, inpWidth - 1); + + float* outData = outPlanes.ptr(y, x); + const float* inpData_row0_c = inpData_row0; + const float* inpData_row1_c = inpData_row1; + for (int c = 0; c < numPlanes; ++c) + { + *outData = inpData_row0_c[x0] + + (input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) + + (input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] + + (input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0])); + + inpData_row0_c += inpSpatialSize; + inpData_row1_c += inpSpatialSize; + outData += outSpatialSize; + } } } } @@ -363,6 +434,11 @@ public: } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return true; + } protected: int outWidth, outHeight; diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index 7caa5ed54d..c4ebcd1c63 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -594,7 +594,8 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto) { if (tensor_proto.raw_data().empty() && tensor_proto.float_data().empty() && - tensor_proto.double_data().empty() && tensor_proto.int64_data().empty()) + tensor_proto.double_data().empty() && tensor_proto.int64_data().empty() && + tensor_proto.int32_data().empty()) return Mat(); opencv_onnx::TensorProto_DataType datatype = tensor_proto.data_type(); @@ -663,6 +664,24 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto) convertInt64ToInt32(src, dst, blob.total()); } } + else if (datatype == opencv_onnx::TensorProto_DataType_INT8 || + datatype == opencv_onnx::TensorProto_DataType_UINT8) + { + // TODO : Add support for uint8 weights and acitvations. For now, converting uint8 tensors to int8. + int offset = datatype == opencv_onnx::TensorProto_DataType_INT8 ? 0 : -128; + int depth = datatype == opencv_onnx::TensorProto_DataType_INT8 ? CV_8S : CV_8U; + + if (!tensor_proto.int32_data().empty()) + { + const ::google::protobuf::RepeatedField field = tensor_proto.int32_data(); + Mat(sizes, CV_32SC1, (void*)field.data()).convertTo(blob, CV_8S, 1.0, offset); + } + else + { + char* val = const_cast(tensor_proto.raw_data().c_str()); + Mat(sizes, depth, val).convertTo(blob, CV_8S, 1.0, offset); + } + } else { std::string errorMsg = "Unsupported data type: " + diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 9ef947c645..7c230f28c8 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -63,6 +63,8 @@ class ONNXImporter void addConstant(const std::string& name, const Mat& blob); void addLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void handleQuantizedNode(LayerParams& layerParams, + const opencv_onnx::NodeProto& node_proto); void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto, const std::string& input, size_t n); @@ -142,6 +144,14 @@ private: void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseQuantDequant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseQConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseQMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseQEltwise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseQLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseQSigmoid (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseQAvgPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); }; @@ -242,7 +252,7 @@ void runLayer(LayerParams& params, const std::vector& inputs, CV_Assert((bool)layer); std::vector inpShapes(inputs.size()); - int ddepth = CV_32F; + int ddepth = params.get("depth", CV_32F); for (size_t i = 0; i < inputs.size(); ++i) { inpShapes[i] = shape(inputs[i]); @@ -458,7 +468,8 @@ Mat ONNXImporter::getBlob(const std::string& input_name) void ONNXImporter::addLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { - int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); + int depth = layerParams.get("depth", CV_32F); + int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams); for (int i = 0; i < node_proto.output_size(); ++i) { layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i))); @@ -525,6 +536,51 @@ void ONNXImporter::addConstant(const std::string& name, const Mat& blob) outShapes.insert(std::make_pair(name, shape(blob))); } +void ONNXImporter::handleQuantizedNode(LayerParams& layerParams, + const opencv_onnx::NodeProto& node_proto) +{ + // Quantized nodes have output names ending with 'quantized' + std::string outName = node_proto.output(0); + int len = outName.length(); + if (len <= 9) + return; + + if (outName.substr(len - 9) == "quantized") + { + outName = outName.substr(0, len - 9); + Mat scale, zeropoint; + + if (constBlobs.find(outName + "scale") != constBlobs.end() && + constBlobs.find(outName + "zero_point") != constBlobs.end()) + { + scale = getBlob(outName + "scale"); + zeropoint = getBlob(outName + "zero_point"); + } + else + { + std::string inpName = node_proto.input(0); + inpName = inpName.substr(0, inpName.length() - 9); + scale = getBlob(inpName + "scale"); + zeropoint = getBlob(inpName + "zero_point"); + + for (int i = 0; i < node_proto.output_size(); i++) + { + std::string out = node_proto.output(i); + out = out.substr(0, out.length() - 9); + addConstant(out + "scale", scale); + addConstant(out + "zero_point", zeropoint); + } + } + + if (scale.total() != 1 || zeropoint.total() != 1) + CV_Error(Error::StsNotImplemented, "Per-channel scales/zeropoints are not supported"); + + layerParams.set("depth", CV_8S); + layerParams.set("scales", DictValue::arrayReal(scale.ptr(), 1)); + layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr(), 1)); + } +} + void ONNXImporter::populateNet() { CV_Assert(model_proto.has_graph()); @@ -623,6 +679,8 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) layerParams.type = layer_type; layerParams.set("has_dynamic_shapes", hasDynamicShapes); + handleQuantizedNode(layerParams, node_proto); + DispatchMap::const_iterator iter = dispatch.find(layer_type); if (iter != dispatch.end()) { @@ -684,7 +742,8 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) void ONNXImporter::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { - layerParams.type = "Pooling"; + int depth = layerParams.get("depth", CV_32F); + layerParams.type = (depth == CV_8S) ? "PoolingInt8" : "Pooling"; layerParams.set("pool", "MAX"); layerParams.set("ceil_mode", layerParams.has("pad_mode")); addLayer(layerParams, node_proto); @@ -988,7 +1047,8 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP { layerParams.set("num_split", node_proto.output_size()); } - layerParams.type = "Slice"; + int depth = layerParams.get("depth", CV_32F); + layerParams.type = (depth == CV_8S) ? "SliceInt8" : "Slice"; addLayer(layerParams, node_proto); } @@ -1743,7 +1803,8 @@ void ONNXImporter::parseConvTranspose(LayerParams& layerParams, const opencv_onn void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { - layerParams.type = "Permute"; + int depth = layerParams.get("depth", CV_32F); + layerParams.type = (depth == CV_8S) ? "PermuteInt8" : "Permute"; replaceLayerParam(layerParams, "perm", "order"); CV_Assert(node_proto.input_size() == 1); @@ -1807,6 +1868,8 @@ void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::Nod addConstant(layerParams.name, out); return; } + int depth = layerParams.get("depth", CV_32F); + layerParams.type += (depth == CV_8S) ? "Int8" : ""; addLayer(layerParams, node_proto); } @@ -1862,12 +1925,14 @@ void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::N if (axes.size() != 1) CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze"); + int depth = layerParams.get("depth", CV_32F); + MatShape inpShape = outShapes[node_proto.input(0)]; int axis = axes.getIntValue(0); CV_Assert(0 <= axis && axis <= inpShape.size()); std::vector outShape = inpShape; outShape.insert(outShape.begin() + axis, 1); - layerParams.type = "Reshape"; + layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape"; layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); if (hasDynamicShapes) { @@ -2004,6 +2069,8 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); + int depth = layerParams.get("depth", CV_32F); + layerParams.type += (depth == CV_8S) ? "Int8" : ""; if (node_proto.input_size() == 2) { Mat blob = getBlob(node_proto, 1); @@ -2038,7 +2105,8 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { - layerParams.type = "Padding"; + int depth = layerParams.get("depth", CV_32F); + layerParams.type = (depth == CV_8S) ? "PaddingInt8" : "Padding"; replaceLayerParam(layerParams, "mode", "type"); if (node_proto.input_size() == 3 || node_proto.input_size() == 2) { @@ -2051,7 +2119,8 @@ void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodePro if (node_proto.input_size() == 3) { Mat value = getBlob(node_proto, 2); - layerParams.set("value", value.ptr()[0]); + float padValue = (depth == CV_8S) ? (float)value.ptr()[0] : value.ptr()[0]; + layerParams.set("value", padValue); } } addLayer(layerParams, node_proto); @@ -2270,6 +2339,9 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node for (int i = 1; i < node_proto.input_size(); i++) CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end()); + int depth = layerParams.get("depth", CV_32F); + layerParams.type += (depth == CV_8S) ? "Int8" : ""; + if (layerParams.has("coordinate_transformation_mode")) { String interp_mode = layerParams.get("coordinate_transformation_mode"); @@ -2419,6 +2491,396 @@ void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx: addLayer(layerParams, node_proto); } +void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 3); + layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize"; + + if (node_proto.op_type() == "DequantizeLinear") + { + Mat scale = getBlob(node_proto, 1); + Mat zeropoint = getBlob(node_proto, 2); + + layerParams.set("scales", DictValue::arrayReal(scale.ptr(), 1)); + layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr(), 1)); + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + int ninputs = node_proto.input_size(); + CV_Assert(ninputs == 8 || ninputs == 9); + + Mat inp_sc = getBlob(node_proto, 1); + Mat inp_zp = getBlob(node_proto, 2); + + Mat weights = getBlob(node_proto, 3); + int outCn = weights.size[0]; + Mat w_scale = getBlob(node_proto, 4); + CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn); + Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at(0))); + + Mat out_sc = getBlob(node_proto, 6); + Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S); + + Mat weights_2d = weights.reshape(1, outCn); + Mat biasFused(1, outCn, CV_32S); + Mat outputMultiplier(1, outCn, CV_32F); + for (int i = 0; i < outCn; i++) + { + biasFused.at(i) = bias.at(i) - inp_zp.at(0)*(cv::sum(weights_2d.row(i))[0]); + outputMultiplier.at(i) = (inp_sc.at(0) * wt_sc.at(i)) / out_sc.at(0); + } + + layerParams.type = "ConvolutionInt8"; + layerParams.set("num_output", outCn); + layerParams.set("input_zeropoint", inp_zp.at(0)); + layerParams.blobs.push_back(weights); + layerParams.blobs.push_back(biasFused); + layerParams.blobs.push_back(outputMultiplier); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + int ninputs = node_proto.input_size(); + CV_Assert(ninputs == 8); + + if (constBlobs.find(node_proto.input(3)) == constBlobs.end()) + CV_Error(Error::StsNotImplemented, "Variable weights is not supported"); + + int firstInpDims = outShapes[node_proto.input(0)].size(); + + Mat inp_sc = getBlob(node_proto, 1); + Mat inp_zp = getBlob(node_proto, 2); + + Mat weights = getBlob(node_proto, 3).t(); + int outCn = weights.size[0]; + int secondInpDims = weights.dims; + + Mat w_scale = getBlob(node_proto, 4); + CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn); + Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at(0))); + Mat out_sc = getBlob(node_proto, 6); + + Mat bias(1, outCn, CV_32S); + Mat outputMultiplier(1, outCn, CV_32F); + for (int i = 0; i < outCn; i++) + { + bias.at(i) = -inp_zp.at(0)*(cv::sum(weights.row(i))[0]); + outputMultiplier.at(i) = (inp_sc.at(0) * wt_sc.at(i)) / out_sc.at(0); + } + + layerParams.type = "InnerProductInt8"; + layerParams.set("num_output", outCn); + layerParams.set("axis", firstInpDims - secondInpDims + 1); + layerParams.blobs.push_back(weights); + layerParams.blobs.push_back(bias); + layerParams.blobs.push_back(outputMultiplier); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_Assert(node_proto.input_size() == 8); + std::string op = (node_proto.op_type() == "QLinearAdd") ? "sum" : "prod"; + int constId = -1; + for (int i = 0; i < 4; i += 3) + { + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) + constId = i; + } + + Mat inp_0_sc = getBlob(node_proto, 1); + Mat inp_0_zp = getBlob(node_proto, 2); + + Mat inp_1_sc = getBlob(node_proto, 4); + Mat inp_1_zp = getBlob(node_proto, 5); + + // Set 2nd input as the const input + if (constId == 0) + { + cv::swap(inp_0_sc, inp_1_sc); + cv::swap(inp_0_zp, inp_1_zp); + } + + float out_sc = getBlob(node_proto, 6).at(0); + int8_t out_zp = getBlob(node_proto, 7).at(0); + + std::vector inp_scales = {inp_0_sc.at(0), inp_1_sc.at(0)}; + std::vector inp_zps = {inp_0_zp.at(0), inp_1_zp.at(0)}; + + std::vector coeffs; + float offset; + if (op == "sum") + { + coeffs = {inp_scales[0]/out_sc, inp_scales[1]/out_sc}; + offset = out_zp - coeffs[0]*inp_zps[0] - coeffs[1]*inp_zps[1]; + } + else + { + coeffs = {inp_scales[0]/out_sc, inp_scales[1]}; + offset = out_zp; + } + + if (constId != -1) + { + Mat blob = getBlob(node_proto, constId); + if (blob.total() == 1) + { + float val = inp_scales[1] * (blob.at(0) - inp_zps[1]); + float scale = inp_scales[0] / out_sc; + if (op == "prod") + scale *= val; + + float shift = out_zp - scale*inp_zps[0]; + if (op == "sum") + shift += (val/out_sc); + + LayerParams rescaleParams; + rescaleParams.name = layerParams.name; + rescaleParams.type = "Requantize"; + rescaleParams.set("depth", CV_8S); + rescaleParams.set("scale", scale); + rescaleParams.set("shift", shift); + addLayer(rescaleParams, node_proto); + return; + } + else + { + MatShape inpShape = outShapes[node_proto.input(3 - constId)]; + if (blob.dims == 2) + blob = blob.t(); + + if (shape(blob) == inpShape) + { + LayerParams constParams; + constParams.name = layerParams.name + "/const"; + constParams.type = "ConstInt8"; + constParams.set("depth", CV_8S); + constParams.set("scales", DictValue::arrayReal(inp_1_sc.ptr(), 1)); + constParams.set("zeropoints", DictValue::arrayInt(inp_1_zp.ptr(), 1)); + constParams.blobs.push_back(blob); + + int id = dstNet.addLayer(constParams.name, constParams.type, CV_8S, constParams); + layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0))); + outShapes[constParams.name] = shape(blob); + node_proto.set_input(constId, constParams.name); + + layerParams.type = "EltwiseInt8"; + layerParams.set("operation", op); + layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size())); + layerParams.set("offset", offset); + } + else + { + layerParams.type = "ScaleInt8"; + layerParams.set("bias_term", op == "sum"); + int axis = 1; + for (int i = 0; i < graph_proto.initializer_size(); i++) + { + opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i); + if (tensor_proto.name() == node_proto.input(constId)) + { + axis = inpShape.size() - tensor_proto.dims_size(); + break; + } + } + layerParams.set("axis", axis); + blob = blob.reshape(1, 1); + Mat blob_dequantized; + blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1])); + layerParams.blobs.push_back(blob_dequantized); + layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); + } + } + } + else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(3)]) + { + layerParams.type = "EltwiseInt8"; + layerParams.set("operation", op); + layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size())); + layerParams.set("offset", offset); + } + else + { + layerParams.type = "ScaleInt8"; + layerParams.set("bias_term", op == "sum"); + layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); + } + + layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size())); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 5); + + float slope = layerParams.get("alpha"); + float inp_sc = getBlob(node_proto, 1).at(0); + int8_t inp_zp = getBlob(node_proto, 2).at(0); + float out_sc = getBlob(node_proto, 3).at(0); + int8_t out_zp = getBlob(node_proto, 4).at(0); + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inp_sc*(i - inp_zp); + float y = x >= 0.f ? x : slope*x; + int quantized = out_zp + cvRound(y/out_sc); + table[i+128] = saturate_cast(quantized); + } + + layerParams.type = "ReLUInt8"; + layerParams.blobs.push_back(lookUpTable); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 5); + + float inp_sc = getBlob(node_proto, 1).at(0); + int8_t inp_zp = getBlob(node_proto, 2).at(0); + float out_sc = getBlob(node_proto, 3).at(0); + int8_t out_zp = getBlob(node_proto, 4).at(0); + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inp_sc*(i - inp_zp); + float y = 1.f/(1.f + std::exp(-x)); + int quantized = out_zp + cvRound(y/out_sc); + table[i+128] = saturate_cast(quantized); + } + + layerParams.type = "SigmoidInt8"; + layerParams.blobs.push_back(lookUpTable); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 5); + float inp_sc = getBlob(node_proto, 1).at(0); + int8_t inp_zp = getBlob(node_proto, 2).at(0); + float out_sc = getBlob(node_proto, 3).at(0); + + layerParams.type = "PoolingInt8"; + layerParams.set("pool", "ave"); + layerParams.set("global_pooling", node_proto.op_type() == "QLinearGlobalAveragePool"); + layerParams.set("multiplier", inp_sc/out_sc); + layerParams.set("input_zeropoint", inp_zp); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + layerParams.type = "ConcatInt8"; + int num_inputs = node_proto.input_size(); + + float out_scale = getBlob(node_proto, 0).at(0); + int out_zp = getBlob(node_proto, 1).at(0); + + for (int i = 2; i < num_inputs; i += 3) + { + float inp_scale = getBlob(node_proto, i + 1).at(0); + int inp_zp = getBlob(node_proto, i + 2).at(0); + + if (inp_scale != out_scale || inp_zp != out_zp) + { + float scale = inp_scale/out_scale; + float shift = out_zp - scale*inp_zp; + + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) + { + Mat blob = getBlob(node_proto, i); + Mat blob_rescaled; + blob.convertTo(blob_rescaled, CV_8S, scale, shift); + constBlobs[node_proto.input(i)] = blob_rescaled; + } + else + { + LayerParams rescaleParams; + rescaleParams.name = node_proto.input(i) + "/rescale"; + rescaleParams.type = "Requantize"; + rescaleParams.set("depth", CV_8S); + rescaleParams.set("scale", scale); + rescaleParams.set("shift", shift); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(i)); + proto.add_output(rescaleParams.name); + addLayer(rescaleParams, proto); + node_proto.set_input(i, rescaleParams.name); + } + } + } + + bool hasVariableInps = false; + for (int i = 2; i < num_inputs; i += 3) + { + if (layer_id.find(node_proto.input(i)) != layer_id.end()) + { + hasVariableInps = true; + break; + } + } + + if (!hasVariableInps) + { + std::vector inputs, concatenated; + MatShape inputShape; + for (size_t i = 2; i < num_inputs; i += 3) + { + Mat blob = getBlob(node_proto, i); + if (blob.size.dims() > inputShape.size()) + { + inputShape = shape(blob); + } + inputs.push_back(blob); + } + + int axis = layerParams.get("axis", 1); + for (size_t i = 0; i < inputs.size(); ++i) + { + MatShape targetShape = inputShape; + targetShape[axis] = shape(inputs[i])[axis]; + CV_CheckEQ(total(targetShape), total(shape(inputs[i])), ""); + inputs[i] = inputs[i].reshape(0, targetShape); + } + runLayer(layerParams, inputs, concatenated); + CV_Assert(concatenated.size() == 1); + addConstant(layerParams.name, concatenated[0]); + return; + } + else + { + for (int i = 2; i < num_inputs; i += 3) + { + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) + { + LayerParams constParams; + constParams.name = node_proto.input(i); + constParams.type = "ConstInt8"; + constParams.blobs.push_back(getBlob(node_proto, i)); + constParams.set("depth", CV_8S); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); + } + } + } + addLayer(layerParams, node_proto); +} + const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap() { DispatchMap dispatch; @@ -2468,6 +2930,14 @@ const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap() dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax; dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput; dispatch["CumSum"] = &ONNXImporter::parseCumSum; + dispatch["QuantizeLinear"] = dispatch["DequantizeLinear"] = &ONNXImporter::parseQuantDequant; + dispatch["QLinearConv"] = &ONNXImporter::parseQConv; + dispatch["QLinearMatMul"] = &ONNXImporter::parseQMatMul; + dispatch["QLinearAdd"] = dispatch["QLinearMul"] = &ONNXImporter::parseQEltwise; + dispatch["QLinearLeakyRelu"] = &ONNXImporter::parseQLeakyRelu; + dispatch["QLinearSigmoid"] = &ONNXImporter::parseQSigmoid; + dispatch["QLinearAveragePool"] = dispatch["QLinearGlobalAveragePool"] = &ONNXImporter::parseQAvgPool; + dispatch["QLinearConcat"] = &ONNXImporter::parseQConcat; return dispatch; } diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp index 1fcb1d0dba..5e6c05c716 100644 --- a/modules/dnn/test/test_int8_layers.cpp +++ b/modules/dnn/test/test_int8_layers.cpp @@ -583,7 +583,7 @@ TEST_P(Test_Int8_nets, ResNet50) Mat blob = blobFromImage(inp, 1.0, Size(224, 224), Scalar(), false); Mat ref = blobFromNPY(_tf("resnet50_prob.npy")); - float l1 = 3e-4, lInf = 0.035; + float l1 = 3e-4, lInf = 0.04; testClassificationNet(net, blob, ref, l1, lInf); } @@ -714,7 +714,7 @@ TEST_P(Test_Int8_nets, MobileNet_v1_SSD_PPN) Mat blob = blobFromImage(inp, 1.0, Size(300, 300), Scalar(), true, false); Mat ref = blobFromNPY(_tf("tensorflow/ssd_mobilenet_v1_ppn_coco.detection_out.npy")); - float confThreshold = 0.51, scoreDiff = 0.04, iouDiff = 0.06; + float confThreshold = 0.51, scoreDiff = 0.05, iouDiff = 0.06; testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); } @@ -815,7 +815,7 @@ TEST_P(Test_Int8_nets, FasterRCNN_resnet50) Mat blob = blobFromImage(inp, 1.0, Size(800, 600), Scalar(), true, false); Mat ref = blobFromNPY(_tf("tensorflow/faster_rcnn_resnet50_coco_2018_01_28.detection_out.npy")); - float confThreshold = 0.5, scoreDiff = 0.025, iouDiff = 0.15; + float confThreshold = 0.5, scoreDiff = 0.05, iouDiff = 0.15; testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); } @@ -1127,7 +1127,7 @@ TEST_P(Test_Int8_nets, YOLOv4) std::string config_file = "yolov4.cfg"; std::string weights_file = "yolov4.weights"; - double scoreDiff = 0.1, iouDiff = 0.17; + double scoreDiff = 0.15, iouDiff = 0.2; { SCOPED_TRACE("batch size 1"); testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 4334da2ad6..5d324b8aac 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -991,6 +991,112 @@ TEST_P(Test_ONNX_layers, ConvResizePool1d) testONNXModels("conv_resize_pool_1d"); } +TEST_P(Test_ONNX_layers, Quantized_Convolution) +{ + testONNXModels("quantized_conv_uint8_weights", npy, 0.004, 0.02); + testONNXModels("quantized_conv_int8_weights", npy, 0.03, 0.5); + testONNXModels("quantized_conv_per_channel_weights", npy, 0.06, 0.4); +} + +TEST_P(Test_ONNX_layers, Quantized_MatMul) +{ + testONNXModels("quantized_matmul_uint8_weights", npy, 0.005, 0.007); + testONNXModels("quantized_matmul_int8_weights", npy, 0.06, 0.2); + testONNXModels("quantized_matmul_per_channel_weights", npy, 0.06, 0.22); +} + +TEST_P(Test_ONNX_layers, Quantized_MatMul_Variable_Weights) +{ + // Unsupported + EXPECT_THROW( + { + testONNXModels("quantized_matmul_variable_inputs"); + }, cv::Exception); +} + +TEST_P(Test_ONNX_layers, Quantized_Eltwise) +{ + testONNXModels("quantized_eltwise"); +} + +TEST_P(Test_ONNX_layers, Quantized_Eltwise_Scalar) +{ + testONNXModels("quantized_eltwise_scalar"); +} + +TEST_P(Test_ONNX_layers, Quantized_Eltwise_Broadcast) +{ + testONNXModels("quantized_eltwise_broadcast"); +} + +TEST_P(Test_ONNX_layers, Quantized_LeakyReLU) +{ + testONNXModels("quantized_leaky_relu"); +} + +TEST_P(Test_ONNX_layers, Quantized_Sigmoid) +{ + testONNXModels("quantized_sigmoid"); +} + +TEST_P(Test_ONNX_layers, Quantized_MaxPool) +{ + testONNXModels("quantized_maxpool"); +} + +TEST_P(Test_ONNX_layers, Quantized_AvgPool) +{ + testONNXModels("quantized_avgpool"); +} + +TEST_P(Test_ONNX_layers, Quantized_Split) +{ + testONNXModels("quantized_split"); +} + +TEST_P(Test_ONNX_layers, Quantized_Pad) +{ + testONNXModels("quantized_padding"); +} + +TEST_P(Test_ONNX_layers, Quantized_Reshape) +{ + testONNXModels("quantized_reshape"); +} + +TEST_P(Test_ONNX_layers, Quantized_Transpose) +{ + testONNXModels("quantized_transpose"); +} + +TEST_P(Test_ONNX_layers, Quantized_Squeeze) +{ + testONNXModels("quantized_squeeze"); +} + +TEST_P(Test_ONNX_layers, Quantized_Unsqueeze) +{ + testONNXModels("quantized_unsqueeze"); +} + +TEST_P(Test_ONNX_layers, Quantized_Resize) +{ + testONNXModels("quantized_resize_nearest"); + testONNXModels("quantized_resize_bilinear", npy, 2e-4, 0.003); + testONNXModels("quantized_resize_bilinear_align", npy, 3e-4, 0.003); +} + +TEST_P(Test_ONNX_layers, Quantized_Concat) +{ + testONNXModels("quantized_concat"); + testONNXModels("quantized_concat_const_blob"); +} + +TEST_P(Test_ONNX_layers, Quantized_Constant) +{ + testONNXModels("quantized_constant", npy, 0.002, 0.008); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); class Test_ONNX_nets : public Test_ONNX_layers @@ -1127,6 +1233,11 @@ TEST_P(Test_ONNX_nets, ResNet50v1) testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD); } +TEST_P(Test_ONNX_nets, ResNet50_Int8) +{ + testONNXModels("resnet50_int8", pb, default_l1, default_lInf, true); +} + TEST_P(Test_ONNX_nets, ResNet101_DUC_HDC) { applyTestTag(CV_TEST_TAG_VERYLONG);