// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // Copyright (C) 2018, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. #include "../precomp.hpp" #include #include #include #include #undef CV_LOG_STRIP_LEVEL #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1 #include #include #ifdef HAVE_PROTOBUF #include #include #include #include #include #if defined _MSC_VER && _MSC_VER < 1910/*MSVS 2017*/ #pragma warning(push) #pragma warning(disable: 4503) // decorated name length exceeded, name was truncated #endif #if defined(__GNUC__) && __GNUC__ >= 5 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsuggest-override" #endif #include "opencv-onnx.pb.h" #if defined(__GNUC__) && __GNUC__ >= 5 #pragma GCC diagnostic pop #endif #include "onnx_graph_simplifier.hpp" namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN extern bool DNN_DIAGNOSTICS_RUN; class ONNXLayerHandler; template static T getScalarFromMat(Mat m) { CV_Assert(m.total() == 1); return m.at(0); } class ONNXImporter { FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; opencv_onnx::ModelProto model_proto; struct LayerInfo { int layerId; int outputId; int depth; LayerInfo(int _layerId = 0, int _outputId = 0, int _depth = CV_32F) :layerId(_layerId), outputId(_outputId), depth(_depth) {} }; struct TensorInfo { int real_ndims; TensorInfo(int _real_ndims = 0) : real_ndims(_real_ndims) {} }; std::map getGraphTensors( const opencv_onnx::GraphProto& graph_proto); Mat getBlob(const opencv_onnx::NodeProto& node_proto, int index); Mat getBlob(const std::string& input_name); TensorInfo getBlobExtraInfo(const opencv_onnx::NodeProto& node_proto, int index); TensorInfo getBlobExtraInfo(const std::string& input_name); LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto); void addConstant(const std::string& name, const Mat& blob); void addLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void setParamsDtype(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto, const std::string& input, size_t n); void lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size); void lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n); std::string lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n); std::string lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name, const int index); void lstm_add_transform(int num_directions, int batch_size, int hidden_size, int index, const std::string& input_name, const std::string& output_name); public: ONNXImporter(Net& net, const char *onnxFile); ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer); void populateNet(); protected: std::unique_ptr layerHandler; Net& dstNet; opencv_onnx::GraphProto graph_proto; std::string framework_name; std::map constBlobs; std::map constBlobsExtraInfo; std::map outShapes; // List of internal blobs shapes. bool hasDynamicShapes; // Whether the model has inputs with dynamic shapes typedef std::map::iterator IterShape_t; std::map layer_id; typedef std::map::iterator IterLayerId_t; typedef std::map::const_iterator ConstIterLayerId_t; void handleNode(const opencv_onnx::NodeProto& node_proto); private: friend class ONNXLayerHandler; typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); typedef std::map DispatchMap; typedef std::map DomainDispatchMap; DomainDispatchMap domain_dispatch_map; std::string getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto); const DispatchMap& getDispatchMap(const opencv_onnx::NodeProto& node_proto); void buildDispatchMap_ONNX_AI(int opset_version); void buildDispatchMap_COM_MICROSOFT(int opset_version); // Domain: 'ai.onnx' (default) // URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md void parseArg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseMaxUnpool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseGlobalPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseGRU (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseImageScaler (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseClip (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseElu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseTanh (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseAbs (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parsePRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseLRN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseBatchNormalization (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseConvTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseFlatten (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseUnsqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseExpand (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseReshape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parsePad (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseShape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseElementWise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseDepthToSpace (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseRange (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseScatter (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseTile (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); // Domain: com.microsoft // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md void parseQuantDequant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseQConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseQMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseQEltwise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseQLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseQSigmoid (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseQAvgPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseQGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); // '???' domain or '???' layer type void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); int onnx_opset; // OperatorSetIdProto for 'onnx' domain std::map onnx_opset_map; // map from OperatorSetIdProto void parseOperatorSet(); const std::string str_domain_ai_onnx = "ai.onnx"; bool useLegacyNames; bool getParamUseLegacyNames() { bool param = utils::getConfigurationParameterBool("OPENCV_DNN_ONNX_USE_LEGACY_NAMES", false); return param; } std::string extractNodeName(const opencv_onnx::NodeProto& node_proto); }; class ONNXLayerHandler : public detail::LayerHandler { public: explicit ONNXLayerHandler(ONNXImporter* importer_); void fillRegistry(const opencv_onnx::GraphProto& net); protected: ONNXImporter* importer; }; ONNXLayerHandler::ONNXLayerHandler(ONNXImporter* importer_) : importer(importer_){} void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net) { int layersSize = net.node_size(); for (int li = 0; li < layersSize; li++) { const opencv_onnx::NodeProto &node_proto = net.node(li); const std::string& name = node_proto.output(0); const std::string& type = node_proto.op_type(); const std::string& layer_type_domain = importer->getLayerTypeDomain(node_proto); const auto& dispatch = importer->getDispatchMap(node_proto); if (dispatch.find(type) == dispatch.end()) { addMissing(name, cv::format("%s.%s", layer_type_domain.c_str(), type.c_str())); } } printMissing(); } ONNXImporter::ONNXImporter(Net& net, const char *onnxFile) : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr) , dstNet(net) , onnx_opset(0) , useLegacyNames(getParamUseLegacyNames()) { hasDynamicShapes = false; CV_Assert(onnxFile); CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile); std::fstream input(onnxFile, std::ios::in | std::ios::binary); if (!input) { CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile)); } if (!model_proto.ParseFromIstream(&input)) { CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile)); } populateNet(); } ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr) , dstNet(net) , onnx_opset(0) , useLegacyNames(getParamUseLegacyNames()) { hasDynamicShapes = false; CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); struct _Buf : public std::streambuf { _Buf(const char* buffer, size_t sizeBuffer) { char* p = const_cast(buffer); setg(p, p, p + sizeBuffer); } }; _Buf buf(buffer, sizeBuffer); std::istream input(&buf); if (!model_proto.ParseFromIstream(&input)) CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array."); populateNet(); } inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey) { if (layerParams.has(oldKey)) { layerParams.set(newKey, layerParams.get(oldKey)); layerParams.erase(oldKey); } } static void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix) { CV_Assert(valueInfoProto.has_name()); CV_Assert(valueInfoProto.has_type()); const opencv_onnx::TypeProto& typeProto = valueInfoProto.type(); CV_Assert(typeProto.has_tensor_type()); const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type(); CV_Assert(tensor.has_shape()); const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape(); int dim_size = tensorShape.dim_size(); CV_CheckGE(dim_size, 0, ""); MatShape shape(dim_size); for (int j = 0; j < dim_size; ++j) { const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j); if (dimension.has_dim_param()) { CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)"); } // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition if (dimension.has_denotation()) { CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'"); } shape[j] = dimension.dim_value(); } CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape)); } static void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix) { if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE) return; int dim_size = tensorProto.dims_size(); CV_CheckGE(dim_size, 0, ""); MatShape shape(dim_size); for (int j = 0; j < dim_size; ++j) { int sz = static_cast(tensorProto.dims(j)); shape[j] = sz; } CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type()); } void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto) { if (!tensor_proto.raw_data().empty()) { delete tensor_proto.release_raw_data(); } } void runLayer(LayerParams& params, const std::vector& inputs, std::vector& outputs) { Ptr layer = LayerFactory::createLayerInstance(params.type, params); CV_Assert((bool)layer); std::vector inpShapes(inputs.size()); int ddepth = params.get("depth", CV_32F); for (size_t i = 0; i < inputs.size(); ++i) { inpShapes[i] = shape(inputs[i]); if (i > 0 && ddepth != inputs[i].depth()) CV_Error(Error::StsNotImplemented, "Mixed input data types."); // Quantize and Dequantize layer have different output type than input. if (params.type != "Quantize" && params.type != "Dequantize") ddepth = inputs[i].depth(); } std::vector outShapes, internalShapes; layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes); std::vector internals(internalShapes.size()); outputs.resize(outShapes.size()); for (size_t i = 0; i < outShapes.size(); ++i) outputs[i].create(outShapes[i], ddepth); for (size_t i = 0; i < internalShapes.size(); ++i) internals[i].create(internalShapes[i], ddepth); layer->finalize(inputs, outputs); layer->forward(inputs, outputs, internals); } std::map ONNXImporter::getGraphTensors( const opencv_onnx::GraphProto& graph_proto) { std::map layers_weights; for (int i = 0; i < graph_proto.initializer_size(); i++) { const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i); dumpTensorProto(i, tensor_proto, "initializer"); Mat mat = getMatFromTensor(tensor_proto); releaseONNXTensor(const_cast(tensor_proto)); // drop already loaded data if (DNN_DIAGNOSTICS_RUN && mat.empty()) continue; layers_weights.insert(std::make_pair(tensor_proto.name(), mat)); constBlobsExtraInfo.insert(std::make_pair(tensor_proto.name(), TensorInfo(tensor_proto.dims_size()))); } return layers_weights; } static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) { std::vector dst(src.size()); convertInt64ToInt32(src, dst, src.size()); return DictValue::arrayInt(&dst[0], src.size()); } static DictValue parseStr(const ::google::protobuf::RepeatedPtrField< ::std::string>& src) { return DictValue::arrayString(src.begin(), static_cast(src.size())); } LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto) { LayerParams lp; for(int i = 0; i < node_proto.attribute_size(); i++) { opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i); std::string attribute_name = attribute_proto.name(); try { if(attribute_name == "kernel_shape") { CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); lp.set("kernel_size", parse(attribute_proto.ints())); } else if(attribute_name == "strides") { CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); lp.set("stride", parse(attribute_proto.ints())); } else if(attribute_name == "pads") { if (node_proto.op_type() == "Pad") { // Padding layer. // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN. // We need to shuffle it to begin0, end0, begin1, end1, ... CV_Assert(attribute_proto.ints_size() % 2 == 0); const int dims = attribute_proto.ints_size() / 2; std::vector paddings; paddings.reserve(attribute_proto.ints_size()); for (int i = 0; i < dims; ++i) { paddings.push_back(attribute_proto.ints(i)); paddings.push_back(attribute_proto.ints(dims + i)); } lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); } else { // Convolution or pooling. CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6); lp.set("pad", parse(attribute_proto.ints())); } } else if(attribute_name == "auto_pad") { if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") { lp.set("pad_mode", "SAME"); } else if (attribute_proto.s() == "VALID") { lp.set("pad_mode", "VALID"); } } else if(attribute_name == "dilations") { CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); lp.set("dilation", parse(attribute_proto.ints())); } else if(attribute_name == "activations" && node_proto.op_type() == "LSTM") { lp.set(attribute_name, parseStr(attribute_proto.strings())); } else if (attribute_proto.has_i()) { ::google::protobuf::int64 src = attribute_proto.i(); if (src < std::numeric_limits::min() || src > std::numeric_limits::max()) CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range"); else lp.set(attribute_name, saturate_cast(src)); } else if (attribute_proto.has_f()) { lp.set(attribute_name, attribute_proto.f()); } else if (attribute_proto.has_s()) { lp.set(attribute_name, attribute_proto.s()); } else if (attribute_proto.floats_size() > 0) { lp.set(attribute_name, DictValue::arrayReal( attribute_proto.floats().data(), attribute_proto.floats_size())); } else if (attribute_proto.ints_size() > 0) { lp.set(attribute_name, parse(attribute_proto.ints())); } else if (attribute_proto.has_t()) { opencv_onnx::TensorProto tensor = attribute_proto.t(); Mat blob = getMatFromTensor(tensor); lp.blobs.push_back(blob); lp.set("original_dims_of_mat", tensor.dims_size()); } else if (attribute_proto.has_g()) { CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str())); } else if (attribute_proto.graphs_size() > 0) { CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported", attribute_name.c_str(), attribute_proto.graphs_size()) ); } else if (attribute_proto.strings_size() > 0) { std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported", attribute_name.c_str(), attribute_proto.strings_size()); CV_LOG_ERROR(NULL, msg); for (int i = 0; i < attribute_proto.strings_size(); i++) { CV_LOG_ERROR(NULL, " Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'"); } CV_Error(Error::StsNotImplemented, msg); } else if (attribute_proto.tensors_size() > 0) { CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported", attribute_name.c_str(), attribute_proto.tensors_size()) ); } else { CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str())); } } catch (const cv::Exception& e) { CV_UNUSED(e); if (DNN_DIAGNOSTICS_RUN) { CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem with processing attributes for node " << node_proto.name() << " Attribute " << attribute_name.c_str() ); continue; } throw; } } return lp; } Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, int index) { CV_Assert(index < node_proto.input_size()); const std::string& input_name = node_proto.input(index); return getBlob(input_name); } Mat ONNXImporter::getBlob(const std::string& input_name) { std::map::const_iterator constBlob = constBlobs.find(input_name); if (constBlob == constBlobs.end()) { CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs"); } return constBlob->second; } ONNXImporter::TensorInfo ONNXImporter::getBlobExtraInfo(const opencv_onnx::NodeProto &node_proto, int index) { CV_Assert(index < node_proto.input_size()); const std::string& input_name = node_proto.input(index); return getBlobExtraInfo(input_name); } ONNXImporter::TensorInfo ONNXImporter::getBlobExtraInfo(const std::string& input_name) { std::map::const_iterator constBlobExtraInfo = constBlobsExtraInfo.find(input_name); if (constBlobExtraInfo == constBlobsExtraInfo.end()) { CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs of extra info"); } return constBlobExtraInfo->second; } void ONNXImporter::addLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { int depth = layerParams.get("depth", CV_32F); int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams); for (int i = 0; i < node_proto.output_size(); ++i) { const std::string& output_name = node_proto.output(i); if (!output_name.empty()) { layer_id.insert(std::make_pair(output_name, LayerInfo(id, i, depth))); } } std::vector layerInpShapes, layerOutShapes, layerInternalShapes; int inpNum = 0; for (int j = 0; j < node_proto.input_size(); j++) { const std::string& input_name = node_proto.input(j); IterLayerId_t layerId = layer_id.find(input_name); if (layerId != layer_id.end()) { dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); ++inpNum; // Collect input shapes. IterShape_t shapeIt = outShapes.find(input_name); CV_Assert(shapeIt != outShapes.end()); layerInpShapes.push_back(shapeIt->second); } } // Compute shape of output blob for this layer. Ptr layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i) { const std::string& output_name = node_proto.output(i); if (!output_name.empty()) { outShapes[node_proto.output(i)] = layerOutShapes[i]; } } } /** @brief Make N copies of input layer and set them as input to node_proto. * @param prefix prefix of new layers' names * @param node_proto node which will contain all copies as inputs * @param input name of the node to copy * @param n number of copies */ void ONNXImporter::expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto, const std::string& input, size_t n) { std::vector input_names; input_names.reserve(n); for (size_t j = 0; j < n; j++) { LayerParams copyLP; copyLP.name = format("%s/copy_%zu", prefix.c_str(), j); copyLP.type = "Identity"; CV_Assert((layer_id.find(copyLP.name) == layer_id.end()) && "Couldn't copy the node: generated name already exists in the graph."); input_names.push_back(copyLP.name); node_proto.set_input(0, input); node_proto.set_output(0, copyLP.name); addLayer(copyLP, node_proto); } node_proto.clear_input(); for (size_t i = 0; i < input_names.size(); i++) { node_proto.add_input(input_names[i]); } } void ONNXImporter::addConstant(const std::string& name, const Mat& blob) { CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob)); constBlobs.insert(std::make_pair(name, blob)); outShapes.insert(std::make_pair(name, shape(blob))); } void ONNXImporter::parseOperatorSet() { int ir_version = model_proto.has_ir_version() ? static_cast(model_proto.ir_version()) : -1; if (ir_version < 3) return; int opset_size = model_proto.opset_import_size(); if (opset_size <= 0) { CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information") return; } for (int i = 0; i < opset_size; ++i) { const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i); const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string(); int version = opset_entry.has_version() ? opset_entry.version() : -1; if (domain.empty() || domain == str_domain_ai_onnx) { // ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md onnx_opset = std::max(onnx_opset, version); onnx_opset_map[str_domain_ai_onnx] = onnx_opset; } else { CV_LOG_DEBUG(NULL, "DNN/ONNX: using non-standard ONNX opset[" << i << "]: domain='" << domain << "' version=" << version); onnx_opset_map[domain] = onnx_opset; } } CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset); buildDispatchMap_ONNX_AI(onnx_opset); for (const auto& pair : onnx_opset_map) { if (pair.first == str_domain_ai_onnx) { continue; // done above } else if (pair.first == "com.microsoft") { buildDispatchMap_COM_MICROSOFT(pair.second); } else { CV_LOG_INFO(NULL, "DNN/ONNX: unknown domain='" << pair.first << "' version=" << pair.second << ". No dispatch map, you may need to register 'custom' layers."); } } } static bool ifInt8Output(const String& layerType) { // Contains all node types whose output should be int8 when it get int8 input. // ai.onnx opset 15 static std::vector input8output8List = { "QuantizeLinear", "QLinearAdd", "QLinearMul", "QLinearAveragePool", "QLinearGlobalAveragePool", "QLinearLeakyRelu", "QLinearSigmoid", "QLinearConcat", "QGemm", "QLinearConv", "QLinearMatMul", "MaxPool", "ReduceMax", "ReduceMin", "Split", "Clip", "Abs", "Transpose", "Squeeze", "Flatten", "Unsqueeze", "Expand", "Reshape", "Pad", "Gather", "Concat", "Resize", "SpaceToDepth", "DepthToSpace", "Pow", "Add", "Sub", "Mul", "Div" }; auto layerIt = std::find(input8output8List.begin(), input8output8List.end(), layerType); return layerIt != input8output8List.end(); } void ONNXImporter::setParamsDtype(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { // If the current layer should output the same data type as the input, and it's input type is int8, we think the current // layer should also output int8. // Check if the layer has int8 input. const std::string& layer_type = node_proto.op_type(); for (int i = 0; i < node_proto.input_size(); ++i) { if (layer_id.find(node_proto.input(i)) != layer_id.end()) { LayerInfo layerInfo = layer_id.find(node_proto.input(i))->second; if (layerInfo.depth == CV_8S && ifInt8Output(layer_type)) { layerParams.set("depth", CV_8S); return; } } } layerParams.set("depth", CV_32F); } void ONNXImporter::populateNet() { CV_Assert(model_proto.has_graph()); graph_proto = model_proto.graph(); std::string framework_version; if (model_proto.has_producer_name()) framework_name = model_proto.producer_name(); if (model_proto.has_producer_version()) framework_version = model_proto.producer_version(); CV_LOG_INFO(NULL, "DNN/ONNX: loading ONNX" << (model_proto.has_ir_version() ? cv::format(" v%d", (int)model_proto.ir_version()) : cv::String()) << " model produced by '" << framework_name << "'" << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str())) << ". Number of nodes = " << graph_proto.node_size() << ", initializers = " << graph_proto.initializer_size() << ", inputs = " << graph_proto.input_size() << ", outputs = " << graph_proto.output_size() ); parseOperatorSet(); simplifySubgraphs(graph_proto); const int layersSize = graph_proto.node_size(); CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes"); constBlobs = getGraphTensors(graph_proto); // scan GraphProto.initializer std::vector netInputs; // map with network inputs (without const blobs) // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes. for (int i = 0; i < graph_proto.input_size(); ++i) { const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i); CV_Assert(valueInfoProto.has_name()); const std::string& name = valueInfoProto.name(); CV_Assert(valueInfoProto.has_type()); const opencv_onnx::TypeProto& typeProto = valueInfoProto.type(); CV_Assert(typeProto.has_tensor_type()); const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type(); CV_Assert(tensor.has_shape()); const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape(); int dim_size = tensorShape.dim_size(); CV_CheckGE(dim_size, 0, ""); // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test MatShape inpShape(dim_size); for (int j = 0; j < dim_size; ++j) { const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j); if (dimension.has_dim_param()) { CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)"); } // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition if (dimension.has_denotation()) { CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'"); } inpShape[j] = dimension.dim_value(); // NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3)) { hasDynamicShapes = true; } } bool isInitialized = ((constBlobs.find(name) != constBlobs.end())); CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape)); CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape)); if (dim_size > 0 && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs { inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size } outShapes[valueInfoProto.name()] = inpShape; // fill map: push layer name, layer id and output id if (!isInitialized) { netInputs.push_back(name); layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1))); } } dstNet.setInputsNames(netInputs); // dump outputs for (int i = 0; i < graph_proto.output_size(); ++i) { dumpValueInfoProto(i, graph_proto.output(i), "output"); } if (DNN_DIAGNOSTICS_RUN) { CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!"); layerHandler->fillRegistry(graph_proto); } for(int li = 0; li < layersSize; li++) { const opencv_onnx::NodeProto& node_proto = graph_proto.node(li); handleNode(node_proto); } // register outputs for (int i = 0; i < graph_proto.output_size(); ++i) { const std::string& output_name = graph_proto.output(i).name(); if (output_name.empty()) { CV_LOG_ERROR(NULL, "DNN/ONNX: can't register output without name: " << i); continue; } ConstIterLayerId_t layerIt = layer_id.find(output_name); if (layerIt == layer_id.end()) { CV_LOG_ERROR(NULL, "DNN/ONNX: can't find layer for output name: '" << output_name << "'. Does model imported properly?"); continue; } const LayerInfo& li = layerIt->second; int outputId = dstNet.registerOutput(output_name, li.layerId, li.outputId); CV_UNUSED(outputId); // no need to duplicate message from engine: CV_LOG_DEBUG(NULL, "DNN/ONNX: registered output='" << output_name << "' with id=" << outputId); } CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!")); } std::string ONNXImporter::getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto) { if (!node_proto.has_domain()) return str_domain_ai_onnx; const std::string& domain = node_proto.domain(); if (domain.empty()) return str_domain_ai_onnx; return domain; } const ONNXImporter::DispatchMap& ONNXImporter::getDispatchMap(const opencv_onnx::NodeProto& node_proto) { static DispatchMap empty_map; const std::string& layer_type_domain = getLayerTypeDomain(node_proto); auto it = domain_dispatch_map.find(layer_type_domain); if (it == domain_dispatch_map.end()) { return empty_map; } return it->second; } std::string ONNXImporter::extractNodeName(const opencv_onnx::NodeProto& node_proto) { // We need to rework DNN outputs API, this is a workaround for #21698 if (node_proto.has_name() && !node_proto.name().empty()) { if (useLegacyNames) return node_proto.name(); return cv::format("onnx_node!%s", node_proto.name().c_str()); } for (int i = 0; i < node_proto.output_size(); ++i) { const std::string& name = node_proto.output(i); // There are two ways to leave an optional input or output unspecified: // the first, available only for trailing inputs and outputs, is to simply not provide that input; // the second method is to use an empty string in place of an input or output name. if (!name.empty()) { if (useLegacyNames) return name.c_str(); return cv::format("onnx_node_output_%d!%s", i, name.c_str()); } } CV_Error(Error::StsAssert, "Couldn't deduce Node name."); } void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.output_size() >= 1); const std::string& name = extractNodeName(node_proto); const std::string& layer_type = node_proto.op_type(); const std::string& layer_type_domain = getLayerTypeDomain(node_proto); const auto& dispatch = getDispatchMap(node_proto); CV_LOG_INFO(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) << cv::format(" from %sdomain='", onnx_opset_map.count(layer_type_domain) == 1 ? "" : "undeclared ") << layer_type_domain << "'" ); if (dispatch.empty()) { CV_LOG_WARNING(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'"); } LayerParams layerParams; try { // FIXIT not all cases can be repacked into "LayerParams". Importer should handle such cases directly for each "layer_type" layerParams = getLayerParams(node_proto); layerParams.name = name; layerParams.type = layer_type; layerParams.set("has_dynamic_shapes", hasDynamicShapes); setParamsDtype(layerParams, node_proto); DispatchMap::const_iterator iter = dispatch.find(layer_type); if (iter != dispatch.end()) { CALL_MEMBER_FN(*this, iter->second)(layerParams, node_proto); } else { parseCustomLayer(layerParams, node_proto); } } catch (const cv::Exception& e) { if (DNN_DIAGNOSTICS_RUN) { CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) << " from domain='" << layer_type_domain << "'" << "\n" << e.msg ); cv::AutoLock lock(getLayerFactoryMutex()); auto registeredLayers = getLayerFactoryImpl(); if (registeredLayers.find(layerParams.type) != registeredLayers.end()) { try { Ptr layer = LayerFactory::createLayerInstance(layerParams.type, layerParams); } catch (const std::exception& e) { CV_LOG_ERROR(NULL, "DNN/ONNX: Layer of type " << layerParams.type << "(" << layer_type << ") cannot be created with parameters " << layerParams << ". Error: " << e.what() ); } } } else { CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) << " from domain='" << layer_type_domain << "'" ); } for (int i = 0; i < node_proto.input_size(); i++) { CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'"); } for (int i = 0; i < node_proto.output_size(); i++) { CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'"); } if (DNN_DIAGNOSTICS_RUN) { for (int i = 0; i < node_proto.output_size(); ++i) { layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(0, i))); outShapes[node_proto.output(i)] = outShapes[node_proto.input(0)]; } } else CV_Error(Error::StsError, cv::format("Node [%s@%s]:(%s) parse error: %s", layer_type.c_str(), layer_type_domain.c_str(), name.c_str(), e.what())); } } void ONNXImporter::parseArg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { const std::string& layer_type = node_proto.op_type(); layerParams.type = "Arg"; layerParams.set("op", layer_type == "ArgMax" ? "max" : "min"); addLayer(layerParams, node_proto); } void setCeilMode(LayerParams& layerParams) { // auto_pad attribute is deprecated and uses ceil if (layerParams.has("pad_mode")) { layerParams.set("ceil_mode", true); } else if (!layerParams.has("ceil_mode")) { layerParams.set("ceil_mode", false); } } void ONNXImporter::parseMaxUnpool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "MaxUnpool"; DictValue kernel_shape = layerParams.get("kernel_size"); CV_Assert(kernel_shape.size() == 2); layerParams.set("pool_k_w", kernel_shape.get(0)); layerParams.set("pool_k_h", kernel_shape.get(1)); int pool_pad_w = 0, pool_pad_h = 0; if (layerParams.has("pad")) { DictValue pads = layerParams.get("pad"); CV_CheckEQ(pads.size(), 2, ""); pool_pad_w = pads.get(0); pool_pad_h = pads.get(1); } layerParams.set("pool_pad_w", pool_pad_w); layerParams.set("pool_pad_h", pool_pad_h); int pool_stride_w = 1, pool_stride_h = 1; if (layerParams.has("stride")) { DictValue strides = layerParams.get("stride"); CV_CheckEQ(strides.size(), 2, ""); pool_stride_w = strides.get(0); pool_stride_h = strides.get(1); } layerParams.set("pool_stride_w", pool_stride_w); layerParams.set("pool_stride_h", pool_stride_h); addLayer(layerParams, node_proto); } void ONNXImporter::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { int depth = layerParams.get("depth", CV_32F); layerParams.type = (depth == CV_8S) ? "PoolingInt8" : "Pooling"; layerParams.set("pool", "MAX"); setCeilMode(layerParams); addLayer(layerParams, node_proto); } void ONNXImporter::parseAveragePool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "Pooling"; layerParams.set("pool", "AVE"); setCeilMode(layerParams); layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); addLayer(layerParams, node_proto); } void ONNXImporter::parseGlobalPool(LayerParams &layerParams, const opencv_onnx::NodeProto &node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; const std::string& layer_type = node_proto.op_type(); const std::string output_name = node_proto.output(0); CV_Assert(node_proto.input_size() == 1); layerParams.type = "Pooling"; String pool; if (layer_type == "GlobalMaxPool") pool = "MAX"; else if (layer_type == "GlobalAveragePool") pool = "AVE"; else CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation."); CV_Assert(!layerParams.has("axes")); layerParams.set("global_pooling", true); layerParams.set("pool", pool); addLayer(layerParams, node_proto); } void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; const std::string& layer_type = node_proto.op_type(); const std::string output_name = node_proto.output(0); int depth = layerParams.get("depth", CV_32F); CV_Assert(node_proto.input_size() <= 2); String reduceType; if (layer_type == "ReduceMax") reduceType = "MAX"; else if (layer_type == "ReduceMin") reduceType = "MIN"; else if (layer_type == "ReduceSum") reduceType = "SUM"; else if (layer_type == "ReduceSumSquare") reduceType = "SUM_SQUARE"; else if (layer_type == "ReduceProd") reduceType = "PROD"; else if (layer_type == "ReduceL1") reduceType = "L1"; else if (layer_type == "ReduceL2") reduceType = "L2"; else if (layer_type == "ReduceLogSum") reduceType = "LOG_SUM"; else if (layer_type == "ReduceLogSumExp") reduceType = "LOG_SUM_EXP"; else if (layer_type == "ReduceMean") reduceType = "AVE"; else CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation."); // The ReduceInt8 can only support "MAX" and "MIN". if (depth == CV_8S) { CV_Assert(reduceType == "MAX" || reduceType == "MIN"); } layerParams.type = (depth == CV_8S) ? "ReduceInt8" : "Reduce"; layerParams.set("reduce", reduceType); bool keepdims = layerParams.get("keepdims", 1) == 1; MatShape inpShape = outShapes[node_proto.input(0)]; std::vector shouldDelete(inpShape.size(), false); if (layer_type == "ReduceSum" && node_proto.input_size() == 2) { if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) { Mat axesMat = getBlob(node_proto, 1); int axesNum = axesMat.total(); for (int i = 0; i < axesNum; i++) { int axis = normalize_axis(axesMat.at(i), inpShape.size()); shouldDelete[axis] = true; } } else // in opset 13, the ReduceSum has two input, it takes axes as input instead of attribute // details:https://github.com/onnx/onnx/issues/3420#issuecomment-844295687 CV_Error(Error::StsNotImplemented, "Non-constant axis values in ReduceSum are not supported."); } else { if (layerParams.has("axes")) { DictValue axes = layerParams.get("axes"); for (int i = 0; i < axes.size(); i++) { int axis = normalize_axis(axes.get(i), inpShape.size()); shouldDelete[axis] = true; } } else { for (int i = 0; i < inpShape.size(); i++) { shouldDelete[i] = true; } } } std::vector targetShape; for (int i = 0; i < inpShape.size(); ++i) { if (!shouldDelete[i]) { targetShape.push_back(inpShape[i]); } else if (keepdims) { targetShape.push_back(1); } } if (targetShape.empty()) targetShape.push_back(1); // Using PermuteLayer to move the deleted axis to the last. std::vector perm(inpShape.size(), 0); for (int i = 0; i < inpShape.size(); i++) perm[i] = i; bool needPermuet = false; for (int i = 0; i < inpShape.size(); i++) { if (shouldDelete[i]) { // find the first not deleted element. std::vector::iterator iter = std::find(shouldDelete.begin() + i, shouldDelete.end(), false); if (iter != shouldDelete.end()) { int index = iter - shouldDelete.begin(); bool temp = shouldDelete[index]; shouldDelete[index] = shouldDelete[i]; shouldDelete[i] = temp; std::swap(perm[index], perm[i]); std::swap(inpShape[index], inpShape[i]); needPermuet = true; } else break; } } auto inputString= node_proto.input(0); if (needPermuet) { LayerParams permuteLp; permuteLp.name = layerParams.name + "/permute"; permuteLp.type = (depth == CV_8S) ? "PermuteInt8" : "Permute"; permuteLp.set("order", DictValue::arrayInt(perm.data(), perm.size())); opencv_onnx::NodeProto protoPermute; protoPermute.add_input(inputString); protoPermute.add_output(permuteLp.name); addLayer(permuteLp, protoPermute); inputString = permuteLp.name; } std::vector deletedDims; for (int axis_i = 0; axis_i < inpShape.size(); ++axis_i) { if (shouldDelete[axis_i]) { deletedDims.push_back(inpShape[axis_i]); } } layerParams.set("deleted_dims", DictValue::arrayInt(&deletedDims[0], deletedDims.size())); layerParams.set("target_dims", DictValue::arrayInt(&targetShape[0], targetShape.size())); node_proto.set_input(0, inputString); node_proto.set_output(0, output_name); addLayer(layerParams, node_proto); } void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { MatShape inpShape = outShapes[node_proto.input(0)]; int dims = inpShape.size(); std::vector begin(dims, 0); std::vector end(dims, INT_MAX); std::vector steps; int inp_size = node_proto.input_size(); int axis = 0; bool has_axes = false; DictValue starts_, ends_, axes_, steps_; // opset = 1 if (inp_size == 1) { starts_ = layerParams.get("starts"); ends_ = layerParams.get("ends"); CV_Assert(starts_.size() == ends_.size()); if (layerParams.has("axes")) { axes_ = layerParams.get("axes"); CV_Assert(axes_.size() == starts_.size()); axis = axes_.getIntValue(0) < 0 ? axes_.getIntValue(0) + dims : axes_.getIntValue(0); has_axes = true; } } // opset > 1 else { CV_Assert(inp_size >= 3); for (int i = 1; i < inp_size; ++i) { CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end()); } Mat start_blob = getBlob(node_proto, 1); Mat end_blob = getBlob(node_proto, 2); CV_Assert(start_blob.total() == end_blob.total()); starts_ = DictValue::arrayInt(start_blob.begin(), start_blob.total()); ends_ = DictValue::arrayInt(end_blob.begin(), end_blob.total()); if (inp_size > 3) { Mat axes_blob = getBlob(node_proto, 3); CV_Assert(axes_blob.total() == start_blob.total()); axes_ = DictValue::arrayInt(axes_blob.begin(), axes_blob.total()); axis = axes_.getIntValue(0) < 0 ? axes_.getIntValue(0) + dims : axes_.getIntValue(0); has_axes = true; } if (inp_size == 5) { Mat step_blob = getBlob(node_proto, 4); CV_Assert(step_blob.total() == start_blob.total()); steps_ = DictValue::arrayInt(step_blob.begin(), step_blob.total()); steps.resize(dims, 1); // Very strange application for Slice op with tensor reversing. // We just workaround it for 2d constants. if (constBlobs.find(node_proto.input(0)) != constBlobs.end() && axis == 0 && start_blob.at(0) == -1 && step_blob.at(0) == -1 && end_blob.at(0) == std::numeric_limits::min()) { Mat inp = getBlob(node_proto, 0); if (inp.dims == 2) { Mat flipped; flip(inp, flipped, 0); addConstant(node_proto.output(0), flipped); return; } } } } if (!has_axes) { // make a default axes [0, 1, 2...] Mat axes_tmp(1, starts_.size(), CV_32S); std::iota(axes_tmp.begin(), axes_tmp.end(), 0); axes_ = DictValue::arrayInt(axes_tmp.begin(), axes_tmp.total()); } int cur_axe; std::vector flag(dims, false); Mat axes(1, starts_.size(), CV_32S); auto axes_ptr = axes.ptr(); // resize begin and end for (int i = 0; i < axes_.size(); ++i) { // dims should be added to the negative axes cur_axe = axes_.getIntValue(i) < 0 ? axes_.getIntValue(i) + dims : axes_.getIntValue(i); CV_CheckGE(cur_axe, 0, "Axes should be grater or equal to '-dims'."); CV_CheckLT(cur_axe, dims, "Axes should be less than 'dim'."); CV_CheckEQ(flag[cur_axe], false, "Axes shouldn't have duplicated values."); flag[cur_axe] = true; // change axis to the minimum axe if (cur_axe < axis) axis = cur_axe; axes_ptr[i] = cur_axe; begin[cur_axe] = starts_.getIntValue(i); end[cur_axe] = ends_.getIntValue(i); } layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); layerParams.set("axis", axis); if (!steps.empty()) { for (int i = 0; i < axes.total(); ++i) steps[axes_ptr[i]] = steps_.getIntValue(i); layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size())); } if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { Mat inp = getBlob(node_proto, 0); std::vector inputs, sliced; inputs.push_back(inp); runLayer(layerParams, inputs, sliced); CV_Assert(sliced.size() == 1); addConstant(node_proto.output(0), sliced[0]); return; } addLayer(layerParams, node_proto); } void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { if (layerParams.has("split")) { DictValue splits = layerParams.get("split"); const int numSplits = splits.size(); CV_Assert(numSplits > 1); std::vector slicePoints(numSplits - 1, splits.get(0)); for (int i = 1; i < splits.size() - 1; ++i) { slicePoints[i] = slicePoints[i - 1] + splits.get(i); } layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); } else { layerParams.set("num_split", node_proto.output_size()); } int depth = layerParams.get("depth", CV_32F); layerParams.type = (depth == CV_8S) ? "SliceInt8" : "Slice"; layerParams.set("axis", layerParams.get("axis", 0)); addLayer(layerParams, node_proto); } void ONNXImporter::parseNeg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "Power"; layerParams.set("scale", -1); addLayer(layerParams, node_proto); } void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 0); CV_Assert(layerParams.blobs.size() == 1); addConstant(node_proto.output(0), layerParams.blobs[0]); // add constant for constBlobsExtraInfo if (layerParams.has("original_dims_of_mat")) { int original_dims_of_mat = layerParams.get("original_dims_of_mat"); constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), TensorInfo(original_dims_of_mat))); } } void transformBlobs(std::vector& blobs) { Mat Wx = blobs[0]; Mat Wh = blobs[1]; Mat b = blobs[2]; std::vector cudaWorkaround; cudaWorkaround.push_back(Wx.clone()); cudaWorkaround.push_back(Wh.clone()); cudaWorkaround.push_back(b.clone()); const int numHidden = Wh.size[2]; Mat h0 = blobs[3]; h0 = h0.reshape(1, h0.size[0] * h0.size[1]); Mat c0 = blobs[4]; c0 = c0.reshape(1, c0.size[0] * c0.size[1]); b = b.reshape(1, b.size[0]); Mat bx = b.colRange(0, b.cols / 2); Mat bh = b.colRange(b.cols / 2, b.cols); b = bx + bh; auto toIFOC = [] (Mat& in) { int first = in.size[0]; int rest = in.total() / first / 4; // every weight blob contains weights for Input, Output, Forget and Cell gates Mat m = in.reshape(1, {first, 4, rest}); Mat outputGate = m.col(1); Mat forgetGate = m.col(2); std::swap_ranges(outputGate.begin(), outputGate.end(), forgetGate.begin()); }; toIFOC(Wx); toIFOC(Wh); toIFOC(b); Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); blobs[0] = Wh; blobs[1] = Wx; blobs[2] = b.reshape(1, 1); blobs[3] = h0; blobs[4] = c0; if (blobs.size() == 5) { // so that future patch removing copies can leave all indexing as is blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end()); return; } Mat P = blobs[5]; blobs[5] = P.colRange(0, numHidden); blobs[5] = blobs[5].clone().reshape(1, blobs[5].total()); // Single column. blobs[5] = Mat::diag(blobs[5]); blobs.push_back(P.colRange(numHidden, 2 * numHidden)); blobs[6] = blobs[6].clone().reshape(1, blobs[6].total()); // Single column. blobs[6] = Mat::diag(blobs[6]); blobs.push_back(P.colRange(2 * numHidden, 3 * numHidden)); blobs[7] = blobs[7].clone().reshape(1, blobs[7].total()); // Single column. blobs[7] = Mat::diag(blobs[7]); // so that future patch removing copies can leave all indexing as is blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end()); } void ONNXImporter::lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size) { MatShape blobShape(blobShape_, blobShape_ + size); Mat blob; if (idx < lstm_proto.input_size() && !lstm_proto.input(idx).empty()) { blob = getBlob(lstm_proto, idx); CV_Assert(shape(blob) == blobShape); } else { blob = Mat(blobShape, CV_32FC1, 0.); } layerParams.blobs.push_back(blob); }; void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n) { LayerParams reshapeLp; reshapeLp.name = cv::format("%s/reshape", input_name.c_str()); reshapeLp.type = "Reshape"; CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); reshapeLp.set("dim", DictValue::arrayInt(layerShape, n)); opencv_onnx::NodeProto reshape_proto; reshape_proto.add_input(input_name); reshape_proto.add_output(output_name); addLayer(reshapeLp, reshape_proto); }; std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n) { LayerParams sliceLP; sliceLP.name = cv::format("%s/slice_%d", input_name.c_str(), index); sliceLP.type = "Slice"; CV_Assert(layer_id.find(sliceLP.name) == layer_id.end()); sliceLP.set("begin", DictValue::arrayInt(begin, n)); sliceLP.set("end", DictValue::arrayInt(end, n)); sliceLP.set("axis", 0); opencv_onnx::NodeProto slice_proto; slice_proto.add_input(input_name); slice_proto.add_output(sliceLP.name); addLayer(sliceLP, slice_proto); return slice_proto.output(0); }; std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name, const int index) { std::string reshape_output = cv::format("%s/reshape_%d", layerParams.name.c_str(), index); // reshape from Seq, Batch, Dirs*Hidden to Seq, Batch, Dirs, Hidden // to not confuse reshape with dynamic first dimension, zero means 'leave unchanged' int layerShape[] = {0, batch_size, num_directions, hidden_size}; lstm_add_reshape(lstm_proto.output(index), reshape_output, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); // permute from Seq, Batch, Dirs, Hidden to Seq, Dirs, Batch, Hidden LayerParams permuteLP; permuteLP.name = reshape_output + "/permute"; permuteLP.type = "Permute"; CV_Assert(layer_id.find(permuteLP.name) == layer_id.end()); int order[] = {0, 2, 1, 3}; permuteLP.set("order", DictValue::arrayInt(order, 4)); opencv_onnx::NodeProto permute_proto; permute_proto.add_input(reshape_output); permute_proto.add_output((need_y && index == 0) ? y_name : static_cast(permuteLP.name)); addLayer(permuteLP, permute_proto); return permute_proto.output(0); }; void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hidden_size, int index, const std::string& input_name, const std::string& output_name) { if (num_directions == 1) { // Slice: Yh = Y[-1, :, :, :] int begin[] = {-1}, end[] = {INT_MAX}; std::string slice_output = lstm_add_slice(index, input_name, begin, end, sizeof(begin) / sizeof(begin[0])); // Reshape: 1x1xBxH -> 1xBxH int layerShape[] = {1, batch_size, hidden_size}; lstm_add_reshape(slice_output, output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); } else { // Slice: SxDxBxH -> last sequence, first direction int begin0[] = {-1, 0}, end0[] = {INT_MAX, 1}; std::string slice_0 = lstm_add_slice(0, input_name, begin0, end0, sizeof(begin0) / sizeof(begin0[0])); // Slice: SxDxBxH -> first sequence, last direction int begin1[] = {0, -1}, end1[] = {1, INT_MAX}; std::string slice_1 = lstm_add_slice(1, input_name, begin1, end1, sizeof(begin1) / sizeof(begin1[0])); LayerParams concatLP; concatLP.name = cv::format("%s/concat", input_name.c_str()); concatLP.type = "Concat"; CV_Assert(layer_id.find(concatLP.name) == layer_id.end()); concatLP.set("axis", 1); // 1x1xBxH -> 1x2xBxH opencv_onnx::NodeProto concat_proto; concat_proto.add_input(slice_0); concat_proto.add_input(slice_1); concat_proto.add_output(concatLP.name); addLayer(concatLP, concat_proto); // Reshape: 1x2xBxH -> 2xBxH int layerShape[] = {2, batch_size, hidden_size}; lstm_add_reshape(concat_proto.output(0), output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); } }; void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto lstm_proto = node_proto_; layerParams.name += "/lstm"; // https://github.com/onnx/onnx/blob/main/docs/Operators.md#LSTM CV_Assert(lstm_proto.input_size() >= 3); for (size_t i = 1; i < 3; ++i) { const std::string& name = lstm_proto.input(i); CV_Assert(!name.empty() && constBlobs.count(name) == 1); } IterShape_t shapeIt = outShapes.find(lstm_proto.input(0)); CV_Assert(shapeIt != outShapes.end()); const MatShape x_shape = shapeIt->second; const int seq_length = x_shape[0]; const int batch_size = x_shape[1]; const int input_size = x_shape[2]; const int hidden_size = layerParams.get("hidden_size"); const int num_directions = constBlobs[lstm_proto.input(1)].size[0]; int w_size[] = {num_directions, 4*hidden_size, input_size}; lstm_extractConsts(layerParams, lstm_proto, 1, w_size, sizeof(w_size) / sizeof(w_size[0])); // W int r_size[] = {num_directions, 4*hidden_size, hidden_size}; lstm_extractConsts(layerParams, lstm_proto, 2, r_size, sizeof(r_size) / sizeof(r_size[0])); // R int b_size[] = {num_directions, 8*hidden_size}; lstm_extractConsts(layerParams, lstm_proto, 3, b_size, sizeof(b_size) / sizeof(b_size[0])); // B if (4 < lstm_proto.input_size() && !lstm_proto.input(4).empty()) { Mat blob = getBlob(lstm_proto, 4); CV_Assert(blob.total() == batch_size); for (MatIterator_ it = blob.begin(); it != blob.end(); ++it) { CV_Assert(*it == seq_length); } } int h_size[] = {num_directions, batch_size, hidden_size}; lstm_extractConsts(layerParams, lstm_proto, 5, h_size, sizeof(h_size) / sizeof(h_size[0])); // initial_h int c_size[] = {num_directions, batch_size, hidden_size}; lstm_extractConsts(layerParams, lstm_proto, 6, c_size, sizeof(c_size) / sizeof(c_size[0])); // initial_c if (lstm_proto.input_size() > 7 && !lstm_proto.input(7).empty()) { layerParams.set("use_peephole", true); int p_size[] = {num_directions, 3 * hidden_size}; lstm_extractConsts(layerParams, lstm_proto, 7, p_size, sizeof(p_size) / sizeof(p_size[0])); // P } transformBlobs(layerParams.blobs); layerParams.set("is_onnx", true); layerParams.set("reverse", layerParams.get("direction", "") == "reverse"); layerParams.set("bidirectional", layerParams.get("direction", "") == "bidirectional"); bool need_yc = lstm_proto.output_size() > 2 && !lstm_proto.output(2).empty(); bool need_yh = lstm_proto.output_size() > 1 && !lstm_proto.output(1).empty(); bool need_y = lstm_proto.output_size() > 0 && !lstm_proto.output(0).empty(); const std::string y_name = need_y ? lstm_proto.output(0) : ""; const std::string yh_name = need_yh ? lstm_proto.output(1) : ""; const std::string yc_name = need_yc ? lstm_proto.output(2) : ""; layerParams.set("produce_cell_output", need_yc); lstm_proto.clear_output(); if (need_y || need_yh) { // give random names to LSTMLayer's outputs because every output needs postprocessing lstm_proto.add_output(cv::format("%s_y", layerParams.name.c_str())); } if (need_yc) { lstm_proto.add_output(yc_name); } addLayer(layerParams, lstm_proto); std::string y_output = lstm_fix_dims(layerParams, lstm_proto, batch_size, num_directions, hidden_size, need_y, y_name, 0); if (need_yh) { lstm_add_transform(num_directions, batch_size, hidden_size, 0, y_output, yh_name); } } void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; const std::string output_name = node_proto.output(0); LayerParams gruParams = layerParams; gruParams.name += "/gru"; // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru# CV_Assert(node_proto.input_size() == 6); Mat Wx = getBlob(node_proto, 1); Mat Wh = getBlob(node_proto, 2); Mat b = getBlob(node_proto, 3); Mat h0 = getBlob(node_proto, 5); Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); h0 = h0.reshape(1, h0.size[0] * h0.size[1]); b = b.reshape(1, b.size[0]); gruParams.blobs.resize(4); gruParams.blobs[0] = Wh; gruParams.blobs[1] = Wx; gruParams.blobs[2] = b; gruParams.blobs[3] = h0; gruParams.set("bidirectional", gruParams.get("direction", "") == "bidirectional"); node_proto.set_output(0, gruParams.name); // set different name so output shapes will be registered on that name addLayer(gruParams, node_proto); MatShape gruShape = outShapes[node_proto.output(0)]; // Add fake 1 as it is done in ONNX gruShape.insert(gruShape.begin() + 1, 1); layerParams.type = "Reshape"; layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size())); node_proto.set_input(0, gruParams.name); // redirect input to GRU node_proto.set_output(0, output_name); // keep origin GRU's name addLayer(layerParams, node_proto); } void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { const float scale = layerParams.has("scale") ? layerParams.get("scale") : 1.0f; layerParams.erase("scale"); if (layerParams.has("bias")) { layerParams.type = "Scale"; layerParams.blobs.push_back( Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale)); layerParams.set("bias_term", true); Mat bias(1, layerParams.get("bias").size(), CV_32FC1); for (int j = 0; j < bias.total(); j++) { bias.at(0, j) = layerParams.get("bias").getRealValue(j); } layerParams.blobs.push_back(bias); layerParams.erase("bias"); } else { layerParams.set("scale", scale); layerParams.type = "Power"; } addLayer(layerParams, node_proto); } void ONNXImporter::parseClip(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "ReLU6"; float min_value = -FLT_MAX, max_value = FLT_MAX; int input_size = node_proto.input_size(); CV_Check(input_size, 1 <= input_size && input_size <= 3, ""); if (input_size >= 2 && !node_proto.input(1).empty()) { if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) min_value = getBlob(node_proto, 1).at(0); else CV_Error(Error::StsNotImplemented, "Non-constant min values in Clip are not supported"); } if (input_size == 3 && !node_proto.input(2).empty()) { if (constBlobs.find(node_proto.input(2)) != constBlobs.end()) max_value = getBlob(node_proto, 2).at(0); else CV_Error(Error::StsNotImplemented, "Non-constant max values in Clip are not supported"); } layerParams.set("min_value", layerParams.get("min", min_value)); layerParams.set("max_value", layerParams.get("max", max_value)); addLayer(layerParams, node_proto); } void ONNXImporter::parseLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "ReLU"; layerParams.set("negative_slope", layerParams.get("alpha", 0.01)); addLayer(layerParams, node_proto); } void ONNXImporter::parseRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "ReLU"; addLayer(layerParams, node_proto); } void ONNXImporter::parseElu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "ELU"; addLayer(layerParams, node_proto); } void ONNXImporter::parseTanh(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "TanH"; addLayer(layerParams, node_proto); } void ONNXImporter::parseAbs(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "AbsVal"; addLayer(layerParams, node_proto); } void ONNXImporter::parsePRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "PReLU"; layerParams.blobs.push_back(getBlob(node_proto, 1)); addLayer(layerParams, node_proto); } void ONNXImporter::parseLRN(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { replaceLayerParam(layerParams, "size", "local_size"); addLayer(layerParams, node_proto); } void ONNXImporter::parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; if (node_proto.input_size() != 3) CV_Error(Error::StsNotImplemented, "Expected input, scale, bias"); layerParams.blobs.resize(4); layerParams.blobs[2] = getBlob(node_proto, 1); // weightData layerParams.blobs[3] = getBlob(node_proto, 2); // biasData layerParams.set("has_bias", true); layerParams.set("has_weight", true); // Get number of channels in input int size = layerParams.blobs[2].total(); layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std LayerParams mvnParams; mvnParams.name = layerParams.name + "/MVN"; mvnParams.type = "MVN"; mvnParams.set("eps", layerParams.get("epsilon")); layerParams.erase("epsilon"); //Create MVN layer int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); //Connect to input IterLayerId_t layerId = layer_id.find(node_proto.input(0)); CV_Assert(layerId != layer_id.end()); dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0))); outShapes[mvnParams.name] = outShapes[node_proto.input(0)]; //Replace Batch Norm's input to MVN node_proto.set_input(0, mvnParams.name); layerParams.type = "BatchNorm"; addLayer(layerParams, node_proto); } void ONNXImporter::parseBatchNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { if (node_proto.input_size() != 5) CV_Error(Error::StsNotImplemented, "Expected input, scale, bias, mean and var"); layerParams.type = "BatchNorm"; replaceLayerParam(layerParams, "epsilon", "eps"); replaceLayerParam(layerParams, "spatial", "use_global_stats"); Mat meanData = getBlob(node_proto, 3); Mat stdData = getBlob(node_proto, 4); layerParams.blobs.push_back(meanData); layerParams.blobs.push_back(stdData); if (!node_proto.input(1).empty()) { layerParams.set("has_weight", true); layerParams.blobs.push_back(getBlob(node_proto, 1)); // weightData } else { layerParams.set("has_weight", false); } if (!node_proto.input(2).empty()) { layerParams.set("has_bias", true); layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData } else { layerParams.set("has_bias", false); } addLayer(layerParams, node_proto); } // A * B + C = Y, we require that the dimension of A is [m, k], and the dimension of B is [n, k]. // And the dim of output Y is [m, n] void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() >= 2); layerParams.type = "InnerProduct"; int transA = layerParams.get("transA", 0); layerParams.set("transA", transA == 1); if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { Mat inputBuf = getBlob(node_proto, 0); LayerParams constParams; constParams.name = node_proto.input(0); constParams.type = "Const"; constParams.blobs.push_back(inputBuf); opencv_onnx::NodeProto proto; proto.add_output(constParams.name); addLayer(constParams, proto); } int transB = layerParams.get("transB", 0); if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) { Mat weights = getBlob(node_proto, 1); if (transA == 0) // optimized barnch, for now, we can only optimize the Gemm when transA = 0. { if (transB == 0) { transpose(weights, weights); } layerParams.set("transB", false); layerParams.blobs.push_back(weights); layerParams.set("num_output", layerParams.blobs[0].size[0]); } else // no optimized branch, TODO! optimize when the transA==1. { LayerParams constParams; constParams.name = node_proto.input(1); constParams.type = "Const"; constParams.blobs.push_back(weights); opencv_onnx::NodeProto proto; proto.add_output(constParams.name); addLayer(constParams, proto); layerParams.set("transB", transB == 1); } } else layerParams.set("transB", transB == 1); if (node_proto.input_size() == 3) { Mat bias = getBlob(node_proto, 2); layerParams.blobs.push_back(bias); } layerParams.set("bias_term", node_proto.input_size() == 3); layerParams.set("is_matmul", true); addLayer(layerParams, node_proto); } void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; CV_Assert(node_proto.input_size() == 2); layerParams.type = "InnerProduct"; layerParams.set("bias_term", false); int firstInpDims, secondInpDims; if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { Mat blob = getBlob(node_proto, 0); firstInpDims = blob.dims; LayerParams constParams; constParams.name = layerParams.name + "/const_0"; constParams.type = "Const"; constParams.blobs.push_back(blob); opencv_onnx::NodeProto tmpProto; tmpProto.add_output(constParams.name); addLayer(constParams, tmpProto); node_proto.set_input(0, constParams.name); } else firstInpDims = outShapes[node_proto.input(0)].size(); if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) { Mat blob = getBlob(node_proto, 1); Mat transBlob; secondInpDims = blob.dims; // create order transposing last 2 dimensions std::vector order(secondInpDims); std::iota(order.begin(), order.end(), 0); std::swap(order[secondInpDims - 2], order[secondInpDims - 1]); transposeND(blob, order, transBlob); layerParams.blobs.push_back(transBlob); int numOutput = layerParams.blobs[0].total(0, secondInpDims - 1); layerParams.set("num_output", numOutput); layerParams.set("is_matmul", true); } else secondInpDims = outShapes[node_proto.input(1)].size(); layerParams.set("axis", firstInpDims - 1); addLayer(layerParams, node_proto); } void ONNXImporter::parseConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; CV_Assert(node_proto.input_size() >= 2); layerParams.type = "Convolution"; for (int j = 1; j < node_proto.input_size(); j++) { if (constBlobs.find(node_proto.input(j)) != constBlobs.end()) { layerParams.blobs.push_back(getBlob(node_proto, j)); } } int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0]; layerParams.set("num_output", outCn); addLayer(layerParams, node_proto); } void ONNXImporter::parseConvTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() >= 2); layerParams.type = "Deconvolution"; for (int j = 1; j < node_proto.input_size(); j++) { layerParams.blobs.push_back(getBlob(node_proto, j)); } layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get("group", 1)); layerParams.set("bias_term", node_proto.input_size() == 3); if (!layerParams.has("kernel_size")) CV_Error(Error::StsNotImplemented, "Required attribute 'kernel_size' is not present."); if (layerParams.has("output_shape")) { const DictValue& outShape = layerParams.get("output_shape"); DictValue strides = layerParams.get("stride"); DictValue kernel = layerParams.get("kernel_size"); String padMode; std::vector adjust_pads; if (layerParams.has("pad_mode")) { padMode = toUpperCase(layerParams.get("pad_mode")); if (padMode != "SAME" && padMode != "VALID") CV_Error(Error::StsError, "Unsupported padding mode " + padMode); for (int i = 0; i < strides.size(); i++) { int sz = outShape.get(2 + i); int stride = strides.get(i); adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride : (sz - kernel.get(i)) % stride); } layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size())); } } else if (layerParams.has("output_padding")) { replaceLayerParam(layerParams, "output_padding", "adj"); } addLayer(layerParams, node_proto); } void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { int depth = layerParams.get("depth", CV_32F); layerParams.type = (depth == CV_8S) ? "PermuteInt8" : "Permute"; replaceLayerParam(layerParams, "perm", "order"); if (!layerParams.has("order")) { MatShape inpShape = outShapes[node_proto.input(0)]; size_t dims = inpShape.size(); std::vector perm(dims); for (size_t d = 0; d < dims; ++d) { perm[d] = static_cast(dims - 1 - d); } layerParams.set("order", DictValue::arrayInt(perm.data(), perm.size())); } CV_Assert(node_proto.input_size() == 1); if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { std::vector inputs(1, getBlob(node_proto, 0)), transposed; runLayer(layerParams, inputs, transposed); CV_Assert(transposed.size() == 1); addConstant(node_proto.output(0), transposed[0]); return; } addLayer(layerParams, node_proto); } void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() <= 2); MatShape inpShape = outShapes[node_proto.input(0)]; std::vector maskedAxes(inpShape.size(), false); if (layerParams.has("axes")) { DictValue axes_dict = layerParams.get("axes"); for (int i = 0; i < axes_dict.size(); ++i) { int axis = axes_dict.getIntValue(i); CV_CheckLE(axis, static_cast(inpShape.size()), "Squeeze axis"); maskedAxes[axis] = inpShape[axis] == 1; } } else if (node_proto.input_size() == 2) { if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) { Mat axesMat = getBlob(node_proto, 1); if (axesMat.depth() == CV_32F) axesMat.convertTo(axesMat, CV_32S); size_t axesLen = axesMat.total(); for (int i = 0; i < axesLen; i++) { int axis = axesMat.at(i); CV_CheckLE(axis, static_cast(inpShape.size()), "Squeeze axis"); maskedAxes[axis] = inpShape[axis] == 1; } } else CV_Error(Error::StsNotImplemented, cv::format("ONNX/Squeeze: doesn't support non-constant 'axes' input")); } MatShape outShape; for (int i = 0; i < inpShape.size(); ++i) { if (!maskedAxes[i]) outShape.push_back(inpShape[i]); } if (outShape.size() != inpShape.size()) { layerParams.type = "Reshape"; layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); if (hasDynamicShapes) { std::vector dynamicAxes; std::vector inputIndices; for (int index = 0; index < inpShape.size(); ++index) { if (!maskedAxes[index]) inputIndices.push_back(index); } for (int index = 0; index < outShape.size(); ++index) dynamicAxes.push_back(index); layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); } } else layerParams.type = "Identity"; if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { Mat inp = getBlob(node_proto, 0); Mat out = inp.reshape(1, outShape); out.dims = outShape.size(); // to workaround dims == 1 addConstant(node_proto.output(0), out); return; } int depth = layerParams.get("depth", CV_32F); layerParams.type += (depth == CV_8S) ? "Int8" : ""; addLayer(layerParams, node_proto); } void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; CV_CheckEQ(node_proto.input_size(), 1, ""); int axis_ = layerParams.get("axis", 1); if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { Mat input = getBlob(node_proto, 0); if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end()) { constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), getBlobExtraInfo(node_proto, 0))); } int axis = normalize_axis(axis_, input.dims); int out_size[2] = {1, 1}; for (int i = 0; i < axis; ++i) { out_size[0] *= input.size[i]; } for (int i = axis; i < input.dims; ++i) { out_size[1] *= input.size[i]; } Mat output = input.reshape(1, 2, out_size); addConstant(node_proto.output(0), output); return; } IterShape_t shapeIt = outShapes.find(node_proto.input(0)); CV_Assert(shapeIt != outShapes.end()); MatShape inpShape = shapeIt->second; int axis = normalize_axis(axis_, inpShape.size()); if (axis == 0 || axis == inpShape.size()) { LayerParams reshapeLp; reshapeLp.name = layerParams.name + "/reshape"; reshapeLp.type = "Reshape"; CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); inpShape.insert(axis == 0 ? inpShape.begin() : inpShape.end(), 1); reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); opencv_onnx::NodeProto proto; proto.add_input(node_proto.input(0)); proto.add_output(reshapeLp.name); addLayer(reshapeLp, proto); node_proto.set_input(0, reshapeLp.name); axis += 1; } LayerParams first_pass; first_pass.name = layerParams.name + "/flatten"; CV_Assert(layer_id.find(first_pass.name) == layer_id.end()); first_pass.type = "Flatten"; first_pass.set("axis", 0); first_pass.set("end_axis", axis - 1); opencv_onnx::NodeProto proto; proto.add_input(node_proto.input(0)); proto.add_output(first_pass.name); addLayer(first_pass, proto); layerParams.set("axis", 1); node_proto.set_input(0, first_pass.name); addLayer(layerParams, node_proto); } void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 1 || node_proto.input_size() == 2); DictValue axes; if (node_proto.input_size() == 2) { Mat blob = getBlob(node_proto, 1); axes = DictValue::arrayInt(blob.ptr(), blob.total()); } else axes = layerParams.get("axes"); if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { // Constant input. Mat input = getBlob(node_proto, 0); int input_dims = input.dims; if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end()) if (getBlobExtraInfo(node_proto, 0).real_ndims == 1) input_dims = 1; std::vector dims; for (int j = 0; j < input_dims; j++) { dims.push_back(input.size[j]); } // CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size()); for (int j = 0; j < axes.size(); j++) { int idx = axes.getIntValue(j); idx = idx < 0 ? idx + input_dims + 1 : idx; CV_Assert(0 <= idx && idx <= dims.size()); dims.insert(dims.begin() + idx, 1); } Mat out = input.reshape(0, dims); addConstant(node_proto.output(0), out); return; } // Variable input. if (axes.size() != 1) CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze"); int depth = layerParams.get("depth", CV_32F); MatShape inpShape = outShapes[node_proto.input(0)]; int axis = axes.getIntValue(0); axis = axis < 0 ? axis + (int)inpShape.size() + 1 : axis; CV_Assert(0 <= axis && axis <= inpShape.size()); std::vector outShape = inpShape; outShape.insert(outShape.begin() + axis, 1); layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape"; layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); if (hasDynamicShapes) { std::vector dynamicAxes; std::vector inputIndices; for (int index = 0; index < outShape.size(); ++index) { if (index != axis) dynamicAxes.push_back(index); } for (int index = 0; index < inpShape.size(); ++index) inputIndices.push_back(index); layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); } addLayer(layerParams, node_proto); } void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; CV_CheckEQ(node_proto.input_size(), 2, ""); const std::string& input0 = node_proto.input(0); const std::string& input1 = node_proto.input(1); const std::string output_name = node_proto.output(0); Mat newShapeMat = getBlob(input1); MatShape targetShape(newShapeMat.ptr(), newShapeMat.ptr() + newShapeMat.total()); MatShape inpShape; bool haveVariables = constBlobs.find(input0) == constBlobs.end(); if (haveVariables) { IterShape_t shapeIt = outShapes.find(input0); CV_Assert(shapeIt != outShapes.end()); inpShape = shapeIt->second; } else { inpShape = shape(getBlob(input0)); } String srcName = input0; // Unsqueeze and repeat along new axis if (targetShape.size() == inpShape.size() + 1) { inpShape.insert(inpShape.begin(), targetShape.size() - inpShape.size(), 1); for (int i = 0; i < targetShape.size(); i++) { if (abs(targetShape[i]) == 1) targetShape[i] = inpShape[i]; } if (haveVariables) { LayerParams reshapeLp; reshapeLp.name = layerParams.name + "/reshape"; reshapeLp.type = "Reshape"; CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); opencv_onnx::NodeProto proto; proto.add_input(node_proto.input(0)); proto.add_output(reshapeLp.name); addLayer(reshapeLp, proto); srcName = reshapeLp.name; } } CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims"); std::vector broadcast_axes; // shapes aren't right-aligned here because targetShape.size() == inpShape.size() for (int i = 0; i < targetShape.size(); i++) { if (targetShape[i] != inpShape[i]) { if (inpShape[i] == 1) { broadcast_axes.push_back(i); } else if (targetShape[i] != 1) { CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i)); } } } if (!haveVariables) { if (broadcast_axes.empty()) { addConstant(output_name, getBlob(node_proto, 0)); return; } Mat input = getBlob(node_proto, 0); MatShape subTargetShape = inpShape; for (auto broadcast_axis : broadcast_axes) { subTargetShape[broadcast_axis] = targetShape[broadcast_axis]; input = input.reshape(0, total(inpShape, 0, broadcast_axis)); Mat output = cv::repeat(input, 1, subTargetShape[broadcast_axis]); input = output.reshape(0, subTargetShape); } addConstant(output_name, input); return; } if (broadcast_axes.size() == 2 && broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1) { LayerParams constParams; constParams.name = layerParams.name + "/const"; CV_Assert(layer_id.find(constParams.name) == layer_id.end()); constParams.type = "Const"; Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr(), CV_32F); constParams.blobs.push_back(inp); opencv_onnx::NodeProto proto; proto.add_output(constParams.name); addLayer(constParams, proto); layerParams.type = "Scale"; layerParams.set("bias_term", false); node_proto.set_input(0, constParams.name); node_proto.set_input(1, srcName); } else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1) { expandMid(layerParams.name, node_proto, srcName, targetShape[broadcast_axes[0]]); layerParams.set("axis", broadcast_axes[0]); layerParams.type = "Concat"; node_proto.set_output(0, output_name); } else if (broadcast_axes.empty()) { layerParams.type = "Identity"; } else CV_Error(Error::StsNotImplemented, "Unsupported Expand op"); addLayer(layerParams, node_proto); } void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); int depth = layerParams.get("depth", CV_32F); layerParams.type += (depth == CV_8S) ? "Int8" : ""; if (node_proto.input_size() == 2) { Mat blob = getBlob(node_proto, 1); CV_Assert(blob.type() == CV_32SC1); layerParams.set("dim", DictValue::arrayInt(blob.ptr(), blob.total())); if (layer_id.find(node_proto.input(0)) == layer_id.end()) { std::vector inputs(1, getBlob(node_proto, 0)), outputs; runLayer(layerParams, inputs, outputs); addConstant(node_proto.output(0), outputs[0]); if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end()) { const int real_ndims_input0 = getBlobExtraInfo(node_proto, 0).real_ndims; if (real_ndims_input0 == 1 && blob.total() == 1 && blob.at() == -1) // 1D tensor as input0 (data), and shape is -1 constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), TensorInfo(1))); } return; } } else { DictValue shape = layerParams.get("shape"); std::vector dim; for (int j = 0; j < shape.size(); j++) { dim.push_back(shape.getIntValue(j)); } if (layer_id.find(node_proto.input(0)) == layer_id.end()) { Mat input = getBlob(node_proto, 0); Mat out = input.reshape(0, dim); addConstant(node_proto.output(0), out); return; } replaceLayerParam(layerParams, "shape", "dim"); } addLayer(layerParams, node_proto); } void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { int depth = layerParams.get("depth", CV_32F); layerParams.type = (depth == CV_8S) ? "PaddingInt8" : "Padding"; replaceLayerParam(layerParams, "mode", "type"); if (node_proto.input_size() == 3 || node_proto.input_size() == 2) { // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN. // We need to shuffle it to begin0, end0, begin1, end1, ... Mat paddings = getBlob(node_proto, 1).reshape(1, 2); paddings = paddings.t(); layerParams.set("paddings", DictValue::arrayInt(paddings.ptr(), paddings.total())); if (node_proto.input_size() == 3) { Mat value = getBlob(node_proto, 2); float padValue = (depth == CV_8S) ? (float)value.ptr()[0] : value.ptr()[0]; layerParams.set("value", padValue); } } addLayer(layerParams, node_proto); } void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 1); IterShape_t shapeIt = outShapes.find(node_proto.input(0)); CV_Assert(shapeIt != outShapes.end()); const MatShape& inpShape = shapeIt->second; bool isInput1D = false; if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end()) if (getBlobExtraInfo(node_proto, 0).real_ndims == 1) isInput1D = true; int dims = static_cast(inpShape.size()); if (isInput1D) dims = 1; Mat shapeMat(dims, 1, CV_32S); bool isDynamicShape = false; for (int j = 0; j < dims; ++j) { int sz = inpShape[j]; isDynamicShape |= (sz == 0); shapeMat.at(j) = sz; } shapeMat.dims = 1; // FIXIT Mat 1D if (isDynamicShape) { CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0))); CV_Assert(!isDynamicShape); // not supported } addConstant(node_proto.output(0), shapeMat); } void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { Mat blob = getBlob(node_proto, 0); if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end()) { constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), getBlobExtraInfo(node_proto, 0))); } int type; switch (layerParams.get("to")) { case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break; case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break; case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break; case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break; case opencv_onnx::TensorProto_DataType_INT8: case opencv_onnx::TensorProto_DataType_INT16: case opencv_onnx::TensorProto_DataType_INT32: case opencv_onnx::TensorProto_DataType_INT64: type = CV_32S; break; default: type = blob.type(); } Mat dst; blob.convertTo(dst, type); dst.dims = blob.dims; addConstant(node_proto.output(0), dst); return; } else layerParams.type = "Identity"; addLayer(layerParams, node_proto); } void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { int depth = CV_32F; float fill_value; if (!layerParams.blobs.empty()) { CV_Assert(!layerParams.has("value")); depth = layerParams.blobs[0].depth(); Mat floats; layerParams.blobs[0].convertTo(floats, CV_32F); fill_value = floats.at(0, 0); } else fill_value = layerParams.get("value", 0); MatShape inpShape = getBlob(node_proto, 0); for (int i = 0; i < inpShape.size(); i++) CV_CheckGT(inpShape[i], 0, ""); Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value)); addConstant(node_proto.output(0), tensor); } void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_CheckEQ(node_proto.input_size(), 2, ""); // TODO: get rid of the type conversions and 1-d/0-d special-casing when the time comes if (layer_id.find(node_proto.input(1)) == layer_id.end()) { int real_ndims = getBlobExtraInfo(node_proto.input(1)).real_ndims; layerParams.set("real_ndims", real_ndims); if (layer_id.find(node_proto.input(0)) == layer_id.end()) { std::vector inputs, output; Mat input = getBlob(node_proto, 0); int input_real_ndims = input.dims; int type = input.type(); input.convertTo(input, CV_32FC1); inputs.push_back(input); Mat indices = getBlob(node_proto, 1); indices.convertTo(indices, CV_32FC1); inputs.push_back(indices); runLayer(layerParams, inputs, output); output.back().convertTo(output.back(), type); output.back().dims = std::max(input_real_ndims - real_ndims, 1); addConstant(node_proto.output(0), output.back()); return; } } for (int i = 0; i < node_proto.input_size(); ++i) { if (layer_id.find(node_proto.input(i)) == layer_id.end()) { LayerParams constParams; constParams.name = node_proto.input(i); constParams.type = "Const"; Mat blob = getBlob(node_proto, i); if (i == 1) { blob.convertTo(blob, CV_32FC1); } constParams.blobs.push_back(blob); opencv_onnx::NodeProto proto; proto.add_output(constParams.name); addLayer(constParams, proto); } } addLayer(layerParams, node_proto); } void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { bool hasVariableInps = false; for (int i = 0; i < node_proto.input_size(); ++i) { if (layer_id.find(node_proto.input(i)) != layer_id.end()) { hasVariableInps = true; break; } } if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end()) { constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), getBlobExtraInfo(node_proto, 0))); } if (!hasVariableInps) { std::vector inputs(node_proto.input_size()), concatenated; // Due constant folding we can get inputs with different number of dimensions // Insert the missing dimension to inputs MatShape inputShape; for (size_t i = 0; i < inputs.size(); ++i) { inputs[i] = getBlob(node_proto, i); if (inputs[i].size.dims() > inputShape.size()) { inputShape = shape(inputs[i]); } } // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1 int axis = layerParams.get("axis", 1); for (size_t i = 0; i < inputs.size(); ++i) { MatShape targetShape = inputShape; targetShape[axis] = shape(inputs[i])[axis]; CV_CheckEQ(total(targetShape), total(shape(inputs[i])), ""); inputs[i] = inputs[i].reshape(0, targetShape); } runLayer(layerParams, inputs, concatenated); CV_Assert(concatenated.size() == 1); addConstant(node_proto.output(0), concatenated[0]); return; } else { for (int i = 0; i < node_proto.input_size(); ++i) { if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) { LayerParams constParams; constParams.name = node_proto.input(i); constParams.type = "Const"; constParams.blobs.push_back(getBlob(node_proto, i)); opencv_onnx::NodeProto proto; proto.add_output(constParams.name); addLayer(constParams, proto); } } } addLayer(layerParams, node_proto); } // https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { for (int i = 1; i < node_proto.input_size(); i++) CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end()); int depth = layerParams.get("depth", CV_32F); layerParams.type += (depth == CV_8S) ? "Int8" : ""; if (layerParams.has("coordinate_transformation_mode")) { String interp_mode = layerParams.get("coordinate_transformation_mode"); CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); layerParams.set("align_corners", interp_mode == "align_corners"); if (layerParams.get("mode") == "linear") { layerParams.set("mode", interp_mode == "pytorch_half_pixel" || interp_mode == "half_pixel" ? "opencv_linear" : "bilinear"); } } if (layerParams.get("mode") == "linear" && framework_name == "pytorch") layerParams.set("mode", "opencv_linear"); // opset-10: input = [X, scales] // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes] // opset-13: may have empty input, [X, "", "", sizes] or [x, "", scales] int scalesInputId = node_proto.input_size() == 2 ? 1 : 2; const std::string& scale_name = node_proto.input(scalesInputId); Mat scales; if(!scale_name.empty()) scales = getBlob(node_proto, scalesInputId); if (!scales.empty()) { CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected"); layerParams.set("zoom_factor_y", scales.at(2)); layerParams.set("zoom_factor_x", scales.at(3)); } else if (node_proto.input_size() >= 4) // opset-11 [x, roi, scales, sizes] or opset-13: input = [X, "", "", sizes] { const std::string& inputSizes = node_proto.input(3); if (constBlobs.find(inputSizes) != constBlobs.end()) { Mat shapes = getBlob(inputSizes); CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected"); CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, ""); if (shapes.depth() == CV_32F) shapes.convertTo(shapes, CV_32S); layerParams.set("width", shapes.at(3)); layerParams.set("height", shapes.at(2)); } else { CV_Error(Error::StsNotImplemented, cv::format("ONNX/Resize: doesn't support dynamic non-constant 'sizes' input: %s", inputSizes.c_str())); } } else { CV_Error(Error::StsNotImplemented, "ONNX/Resize: can't find neither 'scale' nor destination sizes parameters"); } replaceLayerParam(layerParams, "mode", "interpolation"); addLayer(layerParams, node_proto); } void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { //fused from Resize Subgraph if (layerParams.has("coordinate_transformation_mode")) { String interp_mode = layerParams.get("coordinate_transformation_mode"); CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); layerParams.set("align_corners", interp_mode == "align_corners"); if (layerParams.get("mode") == "linear") { layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? "opencv_linear" : "bilinear"); } } if (layerParams.get("mode") == "linear" && framework_name == "pytorch") layerParams.set("mode", "opencv_linear"); layerParams.type = "Resize"; if (layerParams.has("scales")) { // Pytorch layer DictValue scales = layerParams.get("scales"); CV_Assert(scales.size() == 4); layerParams.set("zoom_factor_y", scales.getIntValue(2)); layerParams.set("zoom_factor_x", scales.getIntValue(3)); } else if (layerParams.has("height_scale") && layerParams.has("width_scale")) { // Caffe2 layer replaceLayerParam(layerParams, "height_scale", "zoom_factor_y"); replaceLayerParam(layerParams, "width_scale", "zoom_factor_x"); } else { // scales as input const std::string& input1 = node_proto.input(1); if (constBlobs.find(input1) != constBlobs.end()) { Mat scales = getBlob(input1); CV_Assert(scales.total() == 4); layerParams.set("zoom_factor_y", scales.at(2)); layerParams.set("zoom_factor_x", scales.at(3)); } } replaceLayerParam(layerParams, "mode", "interpolation"); addLayer(layerParams, node_proto); } void ONNXImporter::parseSoftMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { const std::string& layer_type = node_proto.op_type(); layerParams.type = "Softmax"; layerParams.set("log_softmax", layer_type == "LogSoftmax"); addLayer(layerParams, node_proto); } void ONNXImporter::parseDetectionOutput(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; CV_CheckEQ(node_proto.input_size(), 3, ""); if (constBlobs.find(node_proto.input(2)) != constBlobs.end()) { Mat priors = getBlob(node_proto, 2); LayerParams constParams; constParams.name = layerParams.name + "/priors"; constParams.type = "Const"; constParams.blobs.push_back(priors); opencv_onnx::NodeProto priorsProto; priorsProto.add_output(constParams.name); addLayer(constParams, priorsProto); node_proto.set_input(2, constParams.name); } addLayer(layerParams, node_proto); } void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { layerParams.type = "CumSum"; // Get axis. const std::string& input1 = node_proto.input(1); if (constBlobs.find(input1) != constBlobs.end()) { Mat axis_blob = getBlob(input1); CV_Assert(axis_blob.total() == 1u); layerParams.set("axis", axis_blob.at(0)); } addLayer(layerParams, node_proto); } // "Equal" "Greater" "Less" "Pow" "Add" "Sub" "Mul" "Div" "Sum" "Min" "Max" "GreaterOrEqual" "LessOrEqual" void ONNXImporter::parseElementWise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; String op_type = toLowerCase(node_proto.op_type()); layerParams.type = "NaryEltwise"; layerParams.set("operation", toLowerCase(node_proto.op_type())); // element-wise layers that can have >=1 inputs but actually have one input if (node_proto.input_size() == 1 && (op_type == "max" || op_type == "min" || op_type == "mean" || op_type == "sum")) { layerParams.type = "Identity"; addLayer(layerParams, node_proto); return; } auto pre_broadcast_transform = [](Mat& t, int t_real_ndims) { if (t.dims == 2 && t_real_ndims == 1 && t.size[1] == 1) transpose(t, t); }; size_t consts = 0; for (size_t i = 0; i < node_proto.input_size(); ++i) { if (layer_id.find(node_proto.input(i)) == layer_id.end()) { ++consts; } } if (consts == node_proto.input_size()) { std::vector inputs, output; for (size_t i = 0; i < node_proto.input_size(); ++i) { inputs.push_back(getBlob(node_proto, i)); } runLayer(layerParams, inputs, output); CV_Assert(output.size() == 1); addConstant(node_proto.output(0), output[0]); return; } else if (consts > 0) { for (size_t i = 0; i < node_proto.input_size(); ++i) { if (layer_id.find(node_proto.input(i)) == layer_id.end()) { Mat inp = getBlob(node_proto, i); // for cases like a tensor of shape (2,), it will be loaded as shape (2, 1) in OpenCV Mat, // but for correct broadcast, we need to make it of shape (1, 2) if (constBlobsExtraInfo.find(node_proto.input(i)) != constBlobsExtraInfo.end()) pre_broadcast_transform(inp, getBlobExtraInfo(node_proto, i).real_ndims); // carry the constant by adding a Const node LayerParams constParams; constParams.name = node_proto.input(i); constParams.type = "Const"; // Non-constant propagated layers cannot output 1-d or 0-d tensors. inp.dims = std::max(inp.dims, 2); constParams.blobs.push_back(inp); opencv_onnx::NodeProto proto; proto.add_output(constParams.name); addLayer(constParams, proto); } } } // add element-wise layer addLayer(layerParams, node_proto); } void ONNXImporter::parseDepthToSpace(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { // We parse "DepthToSpace" and "SpaceToDepth" in this function. opencv_onnx::NodeProto node_proto = node_proto_; const std::string& layer_type = node_proto.op_type(); CV_Assert(layer_type == "DepthToSpace" || layer_type == "SpaceToDepth"); // Get blocksize CV_Assert(layerParams.has("blocksize")); int blocksize = layerParams.get("blocksize"); CV_Assert(blocksize > 0); // Get mode, only for "DepthToSpace" std::string modeType = layerParams.get("mode", "DCR"); MatShape inpShape = outShapes[node_proto.input(0)]; CV_Assert(inpShape.size() == 4); int N = inpShape[0], C = inpShape[1], H = inpShape[2], W = inpShape[3]; // Implement DepthToSpace and SpaceToDepth by the Reshape and Permute layer. std::array shape0, perm; std::array shape1; if (layer_type == "DepthToSpace") { if (modeType == "DCR") { shape0 = {N, blocksize, blocksize, C/(blocksize * blocksize), H, W}; perm = {0, 3, 4, 1, 5, 2}; shape1 = {N, C/(blocksize * blocksize), H * blocksize, W * blocksize}; } else if (modeType == "CRD") { shape0 = {N, C/(blocksize * blocksize), blocksize, blocksize, H, W}; perm = {0, 1, 4, 2, 5, 3}; shape1 = {N, C/(blocksize * blocksize), H * blocksize, W * blocksize}; } else CV_Error(Error::StsNotImplemented, "The mode of " + modeType + " in " + layer_type + " Layer is not supported"); } else // SpaceToDepth { shape0 = {N, C, H/blocksize, blocksize, W/blocksize, blocksize}; perm = {0, 3, 5, 1, 2, 4}; shape1 = {N, C * blocksize * blocksize, H/blocksize, W/blocksize}; } // Step1: Reshape LayerParams reshapeLp; reshapeLp.name = layerParams.name + "/reshape"; reshapeLp.type = "Reshape"; CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); reshapeLp.set("dim", DictValue::arrayInt(shape0.data(), shape0.size())); opencv_onnx::NodeProto protoReshape; protoReshape.add_input(node_proto.input(0)); protoReshape.add_output(reshapeLp.name); addLayer(reshapeLp, protoReshape); // Step2: Transpose LayerParams permuteLp; permuteLp.name = layerParams.name + "/permute"; permuteLp.type = "Permute"; CV_Assert(layer_id.find(permuteLp.name) == layer_id.end()); permuteLp.set("order", DictValue::arrayInt(perm.data(), perm.size())); opencv_onnx::NodeProto protoPermute; protoPermute.add_input(reshapeLp.name); protoPermute.add_output(permuteLp.name); addLayer(permuteLp, protoPermute); // Step3: Reshape layerParams.type = "Reshape"; layerParams.set("dim", DictValue::arrayInt(shape1.data(), shape1.size())); node_proto.set_input(0, permuteLp.name); addLayer(layerParams, node_proto); } // Currently we only support range with all constant inputs void ONNXImporter::parseRange(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 3); // 0 - start, 1 - limit, 2 - delta layerParams.type = "Range"; std::vector const_id; for (int i = 0; i < node_proto.input_size(); i++) if (layer_id.find(node_proto.input(i)) == layer_id.end()) const_id.push_back(i); // only supports the case which all inputs are constant CV_Assert(const_id.size() == 3); Mat startMat = getBlob(node_proto, 0); CV_Assert(startMat.type() == CV_32SC1); int start = startMat.at(0); Mat limitMat = getBlob(node_proto, 1); CV_Assert(limitMat.type() == CV_32SC1); int limit = limitMat.at(0); Mat deltaMat = getBlob(node_proto, 2); CV_Assert(deltaMat.type() == CV_32SC1); int delta = deltaMat.at(0); int number_of_elements = std::max(int(std::ceil((limit - start) / delta)), 0); Mat r(number_of_elements, 1, CV_32SC1); for (int i = 0; i < number_of_elements; i++) { r.at(i) = start + (i * delta); } addConstant(node_proto.output(0), r); constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), TensorInfo(1))); } void ONNXImporter::parseScatter(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_CheckEQ(node_proto.input_size(), 3, "Scatter: three inputs are required."); layerParams.type = "Scatter"; if (node_proto.op_type() == "ScatterND") layerParams.type = "ScatterND"; size_t consts = 0; for (size_t i = 0; i < node_proto.input_size(); ++i) if (layer_id.find(node_proto.input(i)) == layer_id.end()) ++consts; if (consts == node_proto.input_size()) { std::vector inputs, output; for (size_t i = 0; i < node_proto.input_size(); i++) { Mat blob = getBlob(node_proto, i); if (i == 1) // indices blob.convertTo(blob, CV_32F); inputs.push_back(blob); } runLayer(layerParams, inputs, output); CV_Assert(output.size() == 1); addConstant(node_proto.output(0), output[0]); return; } else if (consts > 0) { for (size_t i = 0; i < node_proto.input_size(); i++) { if (layer_id.find(node_proto.input(i)) == layer_id.end()) { Mat blob = getBlob(node_proto, i); if (i == 1) // indices, from int32/int64 to float32 blob.convertTo(blob, CV_32F); LayerParams constParams; constParams.name = node_proto.input(i); constParams.type = "Const"; constParams.blobs.push_back(blob); opencv_onnx::NodeProto proto; proto.add_output(constParams.name); addLayer(constParams, proto); } } } addLayer(layerParams, node_proto); } void ONNXImporter::parseTile(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { // for Tile>1, only the case of 'repeats' being constant is supported. // 'repeats' is treated as a parameter instead of an input to determine shape in pre-run. CV_Assert(node_proto.input_size() == 2 || node_proto.input_size() == 3); // tile-1: 3 inputs, tile>1: 2 inputs bool is_opset_1 = node_proto.input_size() == 3; std::vector const_input_idx; for (size_t i = 0; i < node_proto.input_size(); ++i) if (layer_id.find(node_proto.input(i)) == layer_id.end()) const_input_idx.push_back(i); bool all_const = false; if (const_input_idx.size() == node_proto.input_size()) // all inputs are constant { all_const = true; } else if ((const_input_idx.size() == 1 && const_input_idx[0] == 1) || // tile>1 (const_input_idx.size() == 2 && const_input_idx[0] == 1 && const_input_idx[1] == 2)) // tile-1 { all_const = false; } else { if (!is_opset_1) CV_Error(Error::StsNotImplemented, "ONNX/Tile: repeats being non-constant is not supported."); else CV_Error(Error::StsNotImplemented, "ONNX/Tile: tiles or axis being non-constant are not supported."); } int input0_dims = 1; if (all_const) input0_dims = getBlob(node_proto, 0).dims; else input0_dims = outShapes[node_proto.input(0)].size(); // repeats, treated as paramenter std::vector repeats_vec(input0_dims, 1); Mat input1_blob = getBlob(node_proto, 1); if (is_opset_1) { // input1 in tile-1: tiles, 1d tensor of shape [1] CV_CheckEQ(input1_blob.total(), 1ull, "ONNX/Tile: tiles must be a 0D tensor or 1D tensor of shape [1]."); int tiles = input1_blob.at(0); // input2 in tile-1: axis, 1d tensor of shape [1] Mat input2_blob = getBlob(node_proto, 2); CV_CheckEQ(input2_blob.total(), 1ull, "ONNX/Tile: axis must be a 0D tensor or 1D tensor of shape [1]."); int axis = input2_blob.at(0); repeats_vec[axis] = tiles; } else { // input1 in tile>1: repeats CV_CheckEQ(input1_blob.dims, 2, "ONNX/Tile: repeats must be a 1D tensor."); // 1D tensor is represented as a 2D Mat for (int i = 0; i < input1_blob.total(); i++) repeats_vec[i] = input1_blob.at(i); } layerParams.set("repeats", DictValue::arrayInt(repeats_vec.data(), repeats_vec.size())); if (all_const) { std::vector inputs, output; Mat input0_blob = getBlob(node_proto, 0); inputs.push_back(input0_blob); runLayer(layerParams, inputs, output); CV_Assert(output.size() == 1); addConstant(node_proto.output(0), output[0]); return; } else { addLayer(layerParams, node_proto); } } void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { bool is_all_input_const = true; for (int i = 0; i < node_proto.input_size(); i++) { if (layer_id.find(node_proto.input(i)) != layer_id.end()) { is_all_input_const = false; break; } } if (is_all_input_const && node_proto.output_size() == 1) { std::vector input, output; for (int i = 0; i < node_proto.input_size(); i++) input.push_back(getBlob(node_proto, i)); runLayer(layerParams, input, output); addConstant(node_proto.output(0), output[0]); return; } for (int j = 0; j < node_proto.input_size(); j++) { if (layer_id.find(node_proto.input(j)) == layer_id.end()) layerParams.blobs.push_back(getBlob(node_proto, j)); } addLayer(layerParams, node_proto); } void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { const std::string& name = layerParams.name; std::string& layer_type = layerParams.type; const std::string& layer_type_domain = node_proto.has_domain() ? node_proto.domain() : std::string(); if (!layer_type_domain.empty() && layer_type_domain != str_domain_ai_onnx) { // append ONNX domain name static bool DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME = utils::getConfigurationParameterBool("OPENCV_DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME", true); if (DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME) { layer_type = layer_type_domain + "." + layer_type; } } CV_LOG_IF_INFO(NULL, !LayerFactory::isLayerRegistered(layer_type), "DNN/ONNX: unknown node type, try using custom handler for node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) ); parseSimpleLayers(layerParams, node_proto); } void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 2 || node_proto.input_size() == 3); layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize"; int axis = layerParams.get("axis", 1); // For QuantizeLinear and DequantizeLinear, the scale and zeropoint can be a Scalar (per-tensor quantized) // or 1-D tensor (per-channel quantized). bool is1D = false; Mat scaleMat = getBlob(node_proto, 1); if(scaleMat.total() > 1) is1D = true; Mat zpMat; if (node_proto.input_size() == 3) { zpMat = getBlob(node_proto, 2); CV_Assert(zpMat.total() == scaleMat.total()); // zero point should has the same shape as scale. } if (is1D) { const int num = scaleMat.total(); std::vector zeropoints(num, 0); std::vector scales(num, 0); for (int i = 0; i < num; i++) { scales[i] = scaleMat.at(i); if (!zpMat.empty()) zeropoints[i] = zpMat.depth() == CV_32S ? zpMat.at(i) : (int)zpMat.at(i); } layerParams.set("is1D", true); layerParams.set("axis", axis); layerParams.set("scales", DictValue::arrayReal(scales.data(), scales.size())); layerParams.set("zeropoints", DictValue::arrayInt(zeropoints.data(), zeropoints.size())); } else { int zeropoint = zpMat.empty() ? 0 : zpMat.depth() == CV_32S ? getScalarFromMat(zpMat) : (int)getScalarFromMat(zpMat); float scale = getScalarFromMat(scaleMat); layerParams.set("is1D", false); layerParams.set("scales", scale); layerParams.set("zeropoints", zeropoint); } if (layerParams.type == "Quantize") layerParams.set("depth", CV_8S); else // Dequantize layerParams.set("depth", CV_32F); if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) // Variable input. { std::vector inputs, outputs; inputs.push_back(getBlob(node_proto, 0)); runLayer(layerParams, inputs, outputs); addConstant(node_proto.output(0), outputs[0]); } else addLayer(layerParams, node_proto); } void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; int ninputs = node_proto.input_size(); CV_Assert(ninputs == 8 || ninputs == 9); float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); int inp_zp = (int)getScalarFromMat(getBlob(node_proto, 2)); if (layerParams.has("pad")) { bool asymmetricPadding = false; DictValue pads = layerParams.get("pad"); const int dims = pads.size() / 2; for (int i = 0; i < dims; ++i) { if (pads.get(i) != pads.get(i + dims)) { asymmetricPadding = true; break; } } if (asymmetricPadding && pads.size() == 4) { layerParams.erase("pad"); std::vector paddings(4, 0); for (int i = 0; i < dims; ++i) { paddings.push_back(pads.get(i)); paddings.push_back(pads.get(dims + i)); } LayerParams padLp; padLp.name = layerParams.name + "/pad"; padLp.type = "PaddingInt8"; padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); padLp.set("depth", CV_8S); padLp.set("value", inp_zp); opencv_onnx::NodeProto proto; proto.add_input(node_proto.input(0)); proto.add_output(padLp.name); addLayer(padLp, proto); node_proto.set_input(0, padLp.name); } } Mat weights = getBlob(node_proto, 3); int outCn = weights.size[0]; Mat w_scale = getBlob(node_proto, 4); CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn); bool per_channel = w_scale.total() == outCn; Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at(0))); float out_sc = getScalarFromMat(getBlob(node_proto, 6)); int8_t out_zp = getScalarFromMat(getBlob(node_proto, 7)); Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S); Mat weights_2d = weights.reshape(1, outCn); Mat biasFused(1, outCn, CV_32S); Mat outputMultiplier(1, outCn, CV_32F); for (int i = 0; i < outCn; i++) { biasFused.at(i) = bias.at(i) - inp_zp*(cv::sum(weights_2d.row(i))[0]); outputMultiplier.at(i) = (inp_sc * wt_sc.at(i)) / out_sc; } layerParams.type = "ConvolutionInt8"; layerParams.set("num_output", outCn); layerParams.set("input_zeropoint", inp_zp); layerParams.set("input_scale",inp_sc); layerParams.set("zeropoints", out_zp); layerParams.set("scales", out_sc); layerParams.set("per_channel", per_channel); layerParams.blobs.push_back(weights); layerParams.blobs.push_back(biasFused); layerParams.blobs.push_back(outputMultiplier); addLayer(layerParams, node_proto); } void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { int ninputs = node_proto.input_size(); CV_Assert(ninputs == 8); if (constBlobs.find(node_proto.input(3)) == constBlobs.end()) CV_Error(Error::StsNotImplemented, "Variable weights is not supported"); int firstInpDims = outShapes[node_proto.input(0)].size(); float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); Mat weights = getBlob(node_proto, 3).t(); int outCn = weights.size[0]; int secondInpDims = weights.dims; Mat w_scale = getBlob(node_proto, 4); CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn); bool per_channel = w_scale.total() == outCn ? true : false; Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at(0))); float out_sc = getScalarFromMat(getBlob(node_proto, 6)); int8_t out_zp = getScalarFromMat(getBlob(node_proto, 7)); Mat bias(1, outCn, CV_32S); Mat outputMultiplier(1, outCn, CV_32F); for (int i = 0; i < outCn; i++) { bias.at(i) = -inp_zp*(cv::sum(weights.row(i))[0]); outputMultiplier.at(i) = (inp_sc * wt_sc.at(i)) / out_sc; } layerParams.type = "InnerProductInt8"; layerParams.set("num_output", outCn); layerParams.set("axis", firstInpDims - secondInpDims + 1); layerParams.set("input_scale", inp_sc); layerParams.set("input_zeropoint", inp_zp); layerParams.set("zeropoints", out_zp); layerParams.set("scales", out_sc); layerParams.set("per_channel", per_channel); layerParams.blobs.push_back(weights); layerParams.blobs.push_back(bias); layerParams.blobs.push_back(outputMultiplier); addLayer(layerParams, node_proto); } // A * B + C = Y, we require that the dimension of A is [m, k], and the dimension of B is [n, k]. // And the dim of output Y is [m, n] void ONNXImporter::parseQGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { int ninputs = node_proto.input_size(); CV_Assert(ninputs == 8 || ninputs == 9); layerParams.type = "InnerProductInt8"; if (constBlobs.find(node_proto.input(3)) == constBlobs.end()) CV_Error(Error::StsNotImplemented, "Variable weights is not supported"); Mat weights = getBlob(node_proto, 3); if (!layerParams.get("transB", 0)) { transpose(weights, weights); } CV_Assert(layerParams.get("alpha", 1) == 1.0f); CV_Assert(layerParams.get("transA", 0) == 0); int firstInpDims = outShapes[node_proto.input(0)].size(); float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); int outCn = weights.size[0]; int secondInpDims = weights.dims; Mat w_scale = getBlob(node_proto, 4); CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn); bool per_channel = w_scale.total() == outCn; Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at(0))); Mat w_zp = getBlob(node_proto, 5); int8_t* ptrZp = w_zp.ptr(0); for (int i = 0; i < w_zp.total(); i++) { if (ptrZp[i] != (int8_t)0) CV_Error(Error::StsUnsupportedFormat, "The zero-point non-zero case of W is not supported!"); } float out_sc = getScalarFromMat(getBlob(node_proto, 7)); int8_t out_zp = ninputs == 9 ? getScalarFromMat(getBlob(node_proto, 8)) : 0; Mat bias; if (constBlobs.find(node_proto.input(6)) != constBlobs.end()) bias = getBlob(node_proto, 6); else bias = Mat::zeros(1, outCn, CV_32S); Mat biasFused(1, outCn, CV_32S); Mat outputMultiplier(1, outCn, CV_32F); for (int i = 0; i < outCn; i++) { biasFused.at(i) = bias.at(i) - inp_zp*(cv::sum(weights.row(i))[0]); outputMultiplier.at(i) = (inp_sc * wt_sc.at(i)) / out_sc; } layerParams.type = "InnerProductInt8"; layerParams.set("num_output", outCn); layerParams.set("axis", firstInpDims - secondInpDims + 1); layerParams.set("input_scale", inp_sc); layerParams.set("input_zeropoint", inp_zp); layerParams.set("scales", out_sc); layerParams.set("zeropoints", out_zp); layerParams.set("per_channel", per_channel); layerParams.blobs.push_back(weights); layerParams.blobs.push_back(biasFused); layerParams.blobs.push_back(outputMultiplier); addLayer(layerParams, node_proto); } void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; CV_Assert(node_proto.input_size() == 7 || node_proto.input_size() == 8); std::string op = (node_proto.op_type() == "QLinearAdd") ? "sum" : "prod"; int constId = -1; for (int i = 0; i < 4; i += 3) { if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) constId = i; } float inp_0_sc = getScalarFromMat(getBlob(node_proto, 1)); int8_t inp_0_zp = getScalarFromMat(getBlob(node_proto, 2)); float inp_1_sc = getScalarFromMat(getBlob(node_proto, 4)); int8_t inp_1_zp = getScalarFromMat(getBlob(node_proto, 5)); // Set 2nd input as the const input if (constId == 0) { cv::swap(inp_0_sc, inp_1_sc); cv::swap(inp_0_zp, inp_1_zp); } float out_sc = getScalarFromMat(getBlob(node_proto, 6)); int8_t out_zp = 0; if (node_proto.input_size() == 8) out_zp = getScalarFromMat(getBlob(node_proto, 7)); std::vector inp_scales = {inp_0_sc, inp_1_sc}; std::vector inp_zps = {inp_0_zp, inp_1_zp}; std::vector coeffs; float offset; if (op == "sum") { coeffs = {inp_scales[0]/out_sc, inp_scales[1]/out_sc}; offset = out_zp - coeffs[0]*inp_zps[0] - coeffs[1]*inp_zps[1]; } else { coeffs = {inp_scales[0]/out_sc, inp_scales[1]}; offset = out_zp; } if (constId != -1) { Mat blob = getBlob(node_proto, constId); if (blob.total() == 1) { float val = inp_scales[1] * (blob.at(0) - inp_zps[1]); float scale = inp_scales[0] / out_sc; if (op == "prod") scale *= val; float shift = out_zp - scale*inp_zps[0]; if (op == "sum") shift += (val/out_sc); LayerParams rescaleParams; rescaleParams.name = layerParams.name; rescaleParams.type = "Requantize"; rescaleParams.set("depth", CV_8S); rescaleParams.set("scale", scale); rescaleParams.set("shift", shift); rescaleParams.set("isEltwise", true); addLayer(rescaleParams, node_proto); return; } else { MatShape inpShape = outShapes[node_proto.input(3 - constId)]; if (blob.dims == 2) blob = blob.t(); if (shape(blob) == inpShape) { LayerParams constParams; constParams.name = layerParams.name + "/const"; constParams.type = "ConstInt8"; constParams.set("depth", CV_8S); constParams.set("scales", inp_1_sc); constParams.set("zeropoints", inp_1_zp); constParams.blobs.push_back(blob); int id = dstNet.addLayer(constParams.name, constParams.type, CV_8S, constParams); layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0, CV_8S))); outShapes[constParams.name] = shape(blob); node_proto.set_input(constId, constParams.name); layerParams.type = "EltwiseInt8"; layerParams.set("operation", op); layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size())); layerParams.set("offset", offset); } else { layerParams.type = "ScaleInt8"; layerParams.set("bias_term", op == "sum"); int axis = 1; for (int i = 0; i < graph_proto.initializer_size(); i++) { opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i); if (tensor_proto.name() == node_proto.input(constId)) { axis = inpShape.size() - tensor_proto.dims_size(); break; } } layerParams.set("axis", axis); blob = blob.reshape(1, 1); Mat blob_dequantized; blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1])); layerParams.blobs.push_back(blob_dequantized); } } } else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(3)]) { layerParams.type = "EltwiseInt8"; layerParams.set("operation", op); layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size())); layerParams.set("offset", offset); } else { layerParams.type = "ScaleInt8"; layerParams.set("bias_term", op == "sum"); } layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size())); layerParams.set("scales", out_sc); layerParams.set("zeropoints", out_zp); addLayer(layerParams, node_proto); } void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5); float slope = layerParams.get("alpha"); float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); float out_sc = getScalarFromMat(getBlob(node_proto, 3)); int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat(getBlob(node_proto, 4)); Mat lookUpTable(1, 256, CV_8S); int8_t* table = lookUpTable.ptr(); for (int i = -128; i < 128; i++) { float x = inp_sc*(i - inp_zp); float y = x >= 0.f ? x : slope*x; int quantized = out_zp + cvRound(y/out_sc); table[i+128] = saturate_cast(quantized); } layerParams.type = "ReLUInt8"; layerParams.set("input_scale", inp_sc); layerParams.set("input_zeropoint", inp_zp); layerParams.set("scales", out_sc); layerParams.set("zeropoints", out_zp); layerParams.set("slope", slope); layerParams.blobs.push_back(lookUpTable); addLayer(layerParams, node_proto); } void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5); float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); float out_sc = getScalarFromMat(getBlob(node_proto, 3)); int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat(getBlob(node_proto, 4)); Mat lookUpTable(1, 256, CV_8S); int8_t* table = lookUpTable.ptr(); for (int i = -128; i < 128; i++) { float x = inp_sc*(i - inp_zp); float y = 1.f/(1.f + std::exp(-x)); int quantized = out_zp + cvRound(y/out_sc); table[i+128] = saturate_cast(quantized); } layerParams.type = "SigmoidInt8"; layerParams.set("input_scale", inp_sc); layerParams.set("input_zeropoint", inp_zp); layerParams.set("scales", out_sc); layerParams.set("zeropoints", out_zp); layerParams.blobs.push_back(lookUpTable); addLayer(layerParams, node_proto); } void ONNXImporter::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5); float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); float out_sc = getScalarFromMat(getBlob(node_proto, 3)); int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat(getBlob(node_proto, 4)); layerParams.type = "PoolingInt8"; layerParams.set("pool", "ave"); layerParams.set("global_pooling", node_proto.op_type() == "QLinearGlobalAveragePool"); layerParams.set("multiplier", inp_sc/out_sc); layerParams.set("input_scale", inp_sc); layerParams.set("input_zeropoint", inp_zp); layerParams.set("scales", out_sc); layerParams.set("zeropoints", out_zp); addLayer(layerParams, node_proto); } void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; layerParams.type = "ConcatInt8"; int num_inputs = node_proto.input_size(); float out_scale = getScalarFromMat(getBlob(node_proto, 0)); int8_t out_zp = getScalarFromMat(getBlob(node_proto, 1)); for (int i = 2; i < num_inputs; i += 3) { float inp_scale = getScalarFromMat(getBlob(node_proto, i + 1)); int8_t inp_zp = getScalarFromMat(getBlob(node_proto, i + 2)); if (inp_scale != out_scale || inp_zp != out_zp) { float scale = inp_scale/out_scale; float shift = out_zp - scale*inp_zp; if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) { Mat blob = getBlob(node_proto, i); Mat blob_rescaled; blob.convertTo(blob_rescaled, CV_8S, scale, shift); constBlobs[node_proto.input(i)] = blob_rescaled; } else { LayerParams rescaleParams; rescaleParams.name = node_proto.input(i) + "/rescale"; rescaleParams.type = "Requantize"; rescaleParams.set("depth", CV_8S); rescaleParams.set("scale", scale); rescaleParams.set("shift", shift); rescaleParams.set("isEltwise", false); opencv_onnx::NodeProto proto; proto.add_input(node_proto.input(i)); proto.add_output(rescaleParams.name); addLayer(rescaleParams, proto); node_proto.set_input(i, rescaleParams.name); } } } bool hasVariableInps = false; for (int i = 2; i < num_inputs; i += 3) { if (layer_id.find(node_proto.input(i)) != layer_id.end()) { hasVariableInps = true; break; } } if (!hasVariableInps) { std::vector inputs, concatenated; MatShape inputShape; for (size_t i = 2; i < num_inputs; i += 3) { Mat blob = getBlob(node_proto, i); if (blob.size.dims() > inputShape.size()) { inputShape = shape(blob); } inputs.push_back(blob); } int axis = layerParams.get("axis", 1); for (size_t i = 0; i < inputs.size(); ++i) { MatShape targetShape = inputShape; targetShape[axis] = shape(inputs[i])[axis]; CV_CheckEQ(total(targetShape), total(shape(inputs[i])), ""); inputs[i] = inputs[i].reshape(0, targetShape); } runLayer(layerParams, inputs, concatenated); CV_Assert(concatenated.size() == 1); addConstant(layerParams.name, concatenated[0]); return; } else { for (int i = 2; i < num_inputs; i += 3) { if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) { LayerParams constParams; constParams.name = node_proto.input(i); constParams.type = "ConstInt8"; constParams.blobs.push_back(getBlob(node_proto, i)); constParams.set("depth", CV_8S); opencv_onnx::NodeProto proto; proto.add_output(constParams.name); addLayer(constParams, proto); } } } layerParams.set("scales", out_scale); layerParams.set("zeropoints", out_zp); addLayer(layerParams, node_proto); } // Domain: ai.onnx (default) // URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) { CV_UNUSED(opset_version); DispatchMap dispatch; dispatch["ArgMax"] = dispatch["ArgMin"] = &ONNXImporter::parseArg; dispatch["MaxUnpool"] = &ONNXImporter::parseMaxUnpool; dispatch["MaxPool"] = &ONNXImporter::parseMaxPool; dispatch["AveragePool"] = &ONNXImporter::parseAveragePool; dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = &ONNXImporter::parseGlobalPool; dispatch["ReduceMax"] = dispatch["ReduceMin"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] = dispatch["ReduceSumSquare"] = dispatch["ReduceProd"] = dispatch["ReduceL1"] = dispatch["ReduceL2"] = dispatch["ReduceLogSum"] = dispatch["ReduceLogSumExp"] = &ONNXImporter::parseReduce; dispatch["Slice"] = &ONNXImporter::parseSlice; dispatch["Split"] = &ONNXImporter::parseSplit; dispatch["Neg"] = &ONNXImporter::parseNeg; dispatch["Constant"] = &ONNXImporter::parseConstant; dispatch["LSTM"] = &ONNXImporter::parseLSTM; dispatch["GRU"] = &ONNXImporter::parseGRU; dispatch["ImageScaler"] = &ONNXImporter::parseImageScaler; dispatch["Clip"] = &ONNXImporter::parseClip; dispatch["LeakyRelu"] = &ONNXImporter::parseLeakyRelu; dispatch["Relu"] = &ONNXImporter::parseRelu; dispatch["Elu"] = &ONNXImporter::parseElu; dispatch["Tanh"] = &ONNXImporter::parseTanh; dispatch["Abs"] = &ONNXImporter::parseAbs; dispatch["PRelu"] = &ONNXImporter::parsePRelu; dispatch["LRN"] = &ONNXImporter::parseLRN; dispatch["InstanceNormalization"] = &ONNXImporter::parseInstanceNormalization; dispatch["BatchNormalization"] = &ONNXImporter::parseBatchNormalization; dispatch["Gemm"] = &ONNXImporter::parseGemm; dispatch["MatMul"] = &ONNXImporter::parseMatMul; dispatch["Conv"] = &ONNXImporter::parseConv; dispatch["ConvTranspose"] = &ONNXImporter::parseConvTranspose; dispatch["Transpose"] = &ONNXImporter::parseTranspose; dispatch["Squeeze"] = &ONNXImporter::parseSqueeze; dispatch["Flatten"] = &ONNXImporter::parseFlatten; dispatch["Unsqueeze"] = &ONNXImporter::parseUnsqueeze; dispatch["Expand"] = &ONNXImporter::parseExpand; dispatch["Reshape"] = &ONNXImporter::parseReshape; dispatch["Pad"] = &ONNXImporter::parsePad; dispatch["Shape"] = &ONNXImporter::parseShape; dispatch["Cast"] = &ONNXImporter::parseCast; dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill; dispatch["Gather"] = &ONNXImporter::parseGather; dispatch["Concat"] = &ONNXImporter::parseConcat; dispatch["Resize"] = &ONNXImporter::parseResize; dispatch["Upsample"] = &ONNXImporter::parseUpsample; dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax; dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput; dispatch["CumSum"] = &ONNXImporter::parseCumSum; dispatch["SpaceToDepth"] = dispatch["DepthToSpace"] = &ONNXImporter::parseDepthToSpace; dispatch["ScatterElements"] = dispatch["Scatter"] = dispatch["ScatterND"] = &ONNXImporter::parseScatter; dispatch["Tile"] = &ONNXImporter::parseTile; dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = dispatch["Pow"] = dispatch["Add"] = dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = dispatch["GreaterOrEqual"] = dispatch["LessOrEqual"] = &ONNXImporter::parseElementWise; dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise; dispatch["Range"] = &ONNXImporter::parseRange; std::vector simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos", "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish", "Identity", "Log", "Round", "Reciprocal", "Selu", "Sign", "Sigmoid", "Sin", "Sinh", "Softmax", "Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu"}; for (const auto& name : simpleLayers) { dispatch[name] = &ONNXImporter::parseSimpleLayers; } // ai.onnx: opset 10+ dispatch["QuantizeLinear"] = dispatch["DequantizeLinear"] = &ONNXImporter::parseQuantDequant; dispatch["QLinearConv"] = &ONNXImporter::parseQConv; dispatch["QLinearMatMul"] = &ONNXImporter::parseQMatMul; domain_dispatch_map[str_domain_ai_onnx] = dispatch; } // Domain: com.microsoft // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md void ONNXImporter::buildDispatchMap_COM_MICROSOFT(int opset_version) { CV_UNUSED(opset_version); DispatchMap dispatch; dispatch["QLinearAdd"] = dispatch["QLinearMul"] = &ONNXImporter::parseQEltwise; dispatch["QLinearAveragePool"] = dispatch["QLinearGlobalAveragePool"] = &ONNXImporter::parseQAvgPool; dispatch["QLinearLeakyRelu"] = &ONNXImporter::parseQLeakyRelu; dispatch["QLinearSigmoid"] = &ONNXImporter::parseQSigmoid; dispatch["QLinearConcat"] = &ONNXImporter::parseQConcat; dispatch["QGemm"] = &ONNXImporter::parseQGemm; domain_dispatch_map["com.microsoft"] = dispatch; } Net readNetFromONNX(const String& onnxFile) { return detail::readNetDiagnostic(onnxFile.c_str()); } Net readNetFromONNX(const char* buffer, size_t sizeBuffer) { return detail::readNetDiagnostic(buffer, sizeBuffer); } Net readNetFromONNX(const std::vector& buffer) { return readNetFromONNX(reinterpret_cast(buffer.data()), buffer.size()); } Mat readTensorFromONNX(const String& path) { std::fstream input(path.c_str(), std::ios::in | std::ios::binary); if (!input) { CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", path.c_str())); } opencv_onnx::TensorProto tensor_proto = opencv_onnx::TensorProto(); if (!tensor_proto.ParseFromIstream(&input)) { CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX data: %s", path.c_str())); } Mat mat = getMatFromTensor(tensor_proto); releaseONNXTensor(tensor_proto); return mat; } CV__DNN_INLINE_NS_END }} // namespace #endif