// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #include "precomp.hpp" #include "net_impl.hpp" namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN static int g_networkId = 0; detail::NetImplBase::NetImplBase() : networkId(CV_XADD(&g_networkId, 1)) , networkDumpCounter(0) , dumpLevel(getParam_DNN_NETWORK_DUMP()) { // nothing } std::string detail::NetImplBase::getDumpFileNameBase() const { std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++); return dumpFileNameBase; } Net::Impl::~Impl() { // nothing } Net::Impl::Impl() { // allocate fake net input layer netInputLayer = Ptr(new DataLayer()); LayerData& inpl = layers.insert(make_pair(0, LayerData())).first->second; inpl.id = 0; netInputLayer->name = inpl.name = "_input"; inpl.type = "__NetInputLayer__"; inpl.layerInstance = netInputLayer; layerNameToId.insert(std::make_pair(inpl.name, inpl.id)); lastLayerId = 0; netWasAllocated = false; netWasQuantized = false; fusion = true; isAsync = false; preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT(); preferableTarget = DNN_TARGET_CPU; hasDynamicShapes = false; } bool Net::Impl::empty() const { return layers.size() <= 1; // first layer is default Data layer } void Net::Impl::clear() { CV_TRACE_FUNCTION(); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) { if (it->second.id != 0) { it->second.inputBlobs.clear(); it->second.outputBlobs.clear(); it->second.internals.clear(); } it->second.skip = false; // it->second.consumers.clear(); Ptr currLayer = it->second.layerInstance; if (currLayer.empty()) continue; currLayer->unsetAttached(); } netWasAllocated = false; layersTimings.clear(); } void Net::Impl::validateBackendAndTarget() { CV_TRACE_FUNCTION(); CV_Assert(preferableBackend != DNN_BACKEND_OPENCV || preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16); CV_Assert(preferableBackend != DNN_BACKEND_HALIDE || preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); #ifdef HAVE_WEBNN if (preferableBackend == DNN_BACKEND_WEBNN) { CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); } #endif CV_Assert(preferableBackend != DNN_BACKEND_VKCOM || preferableTarget == DNN_TARGET_VULKAN); CV_Assert(preferableBackend != DNN_BACKEND_CUDA || IS_DNN_CUDA_TARGET(preferableTarget)); CV_Assert(preferableBackend != DNN_BACKEND_TIMVX || preferableTarget == DNN_TARGET_NPU); CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && "Inheritance internal error"); } void Net::Impl::setUpNet(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); if (dumpLevel && networkDumpCounter == 0) { dumpNetworkToFile(); } validateBackendAndTarget(); if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) { if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) #ifndef HAVE_OPENCL { CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU."); preferableTarget = DNN_TARGET_CPU; } #else { if (!getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()) { // Current implementation is only valid for GPU (#11494) if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU) { CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU."); preferableTarget = DNN_TARGET_CPU; } else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel()) { CV_LOG_WARNING(NULL, "DNN: OpenCL target with fp16 precision is not supported " "with current OpenCL device (tested with Intel GPUs only), " "switching to OpenCL with fp32 precision."); preferableTarget = DNN_TARGET_OPENCL; } } } #endif if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan()) { preferableBackend = DNN_BACKEND_OPENCV; preferableTarget = DNN_TARGET_CPU; } if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA()) { #ifdef HAVE_CUDA CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU"); #else CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU"); #endif preferableBackend = DNN_BACKEND_OPENCV; preferableTarget = DNN_TARGET_CPU; } if (preferableBackend == DNN_BACKEND_TIMVX && !haveTimVX()) { preferableBackend = DNN_BACKEND_OPENCV; preferableTarget = DNN_TARGET_CPU; } clear(); if (hasDynamicShapes) { updateLayersShapes(); } this->blobsToKeep = blobsToKeep_; allocateLayers(blobsToKeep_); MapIdToLayerData::iterator it = layers.find(0); CV_Assert(it != layers.end()); it->second.skip = netInputLayer->skip; initBackend(blobsToKeep_); if (!netWasAllocated) { #ifdef HAVE_HALIDE if (preferableBackend == DNN_BACKEND_HALIDE) compileHalide(); #else CV_Assert(preferableBackend != DNN_BACKEND_HALIDE); #endif } netWasAllocated = true; if (dumpLevel) { dumpNetworkToFile(); } } } Ptr Net::Impl::getLayer(int layerId) const { LayerData& ld = getLayerData(layerId); return getLayerInstance(ld); } Ptr Net::Impl::getLayer(const LayerId& layerId) const { LayerData& ld = getLayerData(layerId); return getLayerInstance(ld); } int Net::Impl::getLayerId(const String& layerName) const { std::map::const_iterator it = layerNameToId.find(layerName); return (it != layerNameToId.end()) ? it->second : -1; } int Net::Impl::getLayerId(int id) const { MapIdToLayerData::const_iterator it = layers.find(id); return (it != layers.end()) ? id : -1; } int Net::Impl::getLayerId(DictValue& layerDesc) const { if (layerDesc.isInt()) return getLayerId(layerDesc.get()); else if (layerDesc.isString()) return getLayerId(layerDesc.get()); CV_Assert(layerDesc.isInt() || layerDesc.isString()); return -1; } String Net::Impl::getLayerName(int id) const { MapIdToLayerData::const_iterator it = layers.find(id); return (it != layers.end()) ? it->second.name : "(unknown layer)"; } LayerData& Net::Impl::getLayerData(int id) const { MapIdToLayerData::const_iterator it = layers.find(id); if (it == layers.end()) CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); return const_cast(it->second); } LayerData& Net::Impl::getLayerData(const String& layerName) const { int id = getLayerId(layerName); if (id < 0) CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found"); return getLayerData(id); } LayerData& Net::Impl::getLayerData(const DictValue& layerDesc) const { CV_Assert(layerDesc.isInt() || layerDesc.isString()); if (layerDesc.isInt()) return getLayerData(layerDesc.get()); else /*if (layerDesc.isString())*/ return getLayerData(layerDesc.get()); } /*static*/ void Net::Impl::addLayerInput(LayerData& ld, int inNum, LayerPin from) { if ((int)ld.inputBlobsId.size() <= inNum) { ld.inputBlobsId.resize(inNum + 1); } else { LayerPin storedFrom = ld.inputBlobsId[inNum]; if (storedFrom.valid() && !storedFrom.equal(from)) CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected", inNum, ld.name.c_str())); } ld.inputBlobsId[inNum] = from; } int Net::Impl::resolvePinOutputName(LayerData& ld, const String& outName) const { if (outName.empty()) return 0; return getLayerInstance(ld)->outputNameToIndex(outName); } LayerPin Net::Impl::getPinByAlias(const String& layerName) const { LayerPin pin; pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); if (pin.lid >= 0) pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName); return pin; } std::vector Net::Impl::getLayerOutPins(const String& layerName) const { int lid = (layerName.empty()) ? 0 : getLayerId(layerName); MapIdToLayerData::const_iterator it = layers.find(lid); if (it == layers.end()) CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid", lid)); const size_t nOutputs = it->second.outputBlobs.size(); std::vector pins; for (int i = 0; i < nOutputs; i++) { pins.push_back(LayerPin(lid, i)); } return pins; } // FIXIT remove dtype int Net::Impl::addLayer(const String& name, const String& type, const int& dtype, LayerParams& params) { int id = getLayerId(name); if (id >= 0) { if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented") { CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); return -1; } else { LayerData& ld = layers.find(id)->second; ld.type = type; ld.params = params; return -1; } } id = ++lastLayerId; layerNameToId.insert(std::make_pair(name, id)); layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params))); if (params.get("has_dynamic_shapes", false)) hasDynamicShapes = true; if (dtype == CV_8S) netWasQuantized = true; return id; } int Net::Impl::addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params) { int prvLid = lastLayerId; int newLid = addLayer(name, type, dtype, params); connect(prvLid, 0, newLid, 0); return newLid; } void Net::Impl::connect(int outLayerId, int outNum, int inLayerId, int inNum) { CV_Assert(outLayerId < inLayerId); LayerData& ldOut = getLayerData(outLayerId); LayerData& ldInp = getLayerData(inLayerId); addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); ldOut.requiredOutputs.insert(outNum); ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")"); } int Net::Impl::registerOutput(const std::string& outputName, int layerId, int outputPort) { int checkLayerId = getLayerId(outputName); if (checkLayerId >= 0) { if (checkLayerId == layerId) { if (outputPort == 0) { // layer name correlates with its output name CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked"); outputNameToId.insert(std::make_pair(outputName, layerId)); return checkLayerId; } } CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort)); } #if 0 // TODO if (outputPort == 0) // make alias only, need to adopt getUnconnectedOutLayers() call #endif LayerParams outputLayerParams; outputLayerParams.name = outputName; outputLayerParams.type = "Identity"; int dtype = CV_32F; // FIXIT remove int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams); connect(layerId, outputPort, outputLayerId, 0); CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort); outputNameToId.insert(std::make_pair(outputName, outputLayerId)); return outputLayerId; } void Net::Impl::allocateLayer(int lid, const LayersShapesMap& layersShapes) { CV_TRACE_FUNCTION(); LayerData& ld = layers[lid]; // already allocated if (ld.flag) return; size_t ninputs = ld.inputBlobsId.size(); #if 0 printf("layer %s:", ld.name.c_str()); for (size_t i = 0; i < ninputs; i++) { int inp_lid = ld.inputBlobsId[i].lid; LayerData &inp_ld = layers[inp_lid]; int inp_outputs = (int)inp_ld.outputBlobs.size(); std::cout << " " << inp_ld.name << "(" << inp_outputs; for( int j = 0; j < inp_outputs; j++ ) { std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; } std::cout << ")"; } printf("\n"); #endif // determine parent layers for (size_t i = 0; i < ninputs; i++) ld.inputLayersId.insert(ld.inputBlobsId[i].lid); // allocate parents for (std::set::const_iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) allocateLayer(*i, layersShapes); // bind inputs if (ld.id == 0) // DataLayer { ninputs = netInputLayer->inputsData.size(); ld.inputBlobsWrappers.resize(ninputs); for (size_t i = 0; i < ninputs; i++) ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]); } else { ld.inputBlobs.resize(ninputs); ld.inputBlobsWrappers.resize(ninputs); for (size_t i = 0; i < ninputs; i++) { LayerPin from = ld.inputBlobsId[i]; CV_Assert(from.valid()); CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; } } LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); CV_Assert(layerShapesIt != layersShapes.end()); if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F) ld.dtype = CV_16S; std::vector pinsForInternalBlobs; blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); for (int i = 0; i < ld.outputBlobs.size(); ++i) ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); /* CUDA backend has its own system for internal blobs; we don't need these */ ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA || preferableBackend == DNN_BACKEND_TIMVX) ? 0 : ld.internals.size()); for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i) ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); Ptr layerPtr = getLayerInstance(ld); { std::vector inps(ld.inputBlobs.size()); for (int i = 0; i < ld.inputBlobs.size(); ++i) { inps[i] = *ld.inputBlobs[i]; } layerPtr->finalize(inps, ld.outputBlobs); layerPtr->preferableTarget = preferableTarget; #if 0 std::cout << "\toutputs:"; size_t noutputs = ld.outputBlobs.size(); for (size_t j = 0; j < noutputs; j++) { std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; } std::cout << "\n"; #endif } // After allocation of layer, we decrease counters to it's input blobs. blobManager.releaseReferences(ld.inputBlobsId); blobManager.releaseReferences(pinsForInternalBlobs); ld.flag = 1; } void Net::Impl::allocateLayers(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; CV_Assert(!layers[0].outputBlobs.empty()); ShapesVec inputShapes; for (int i = 0; i < layers[0].outputBlobs.size(); i++) { Mat& inp = layers[0].outputBlobs[i]; CV_Assert(inp.total()); if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && layers[0].dtype == CV_32F) { layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); } inputShapes.push_back(shape(inp)); } LayersShapesMap layersShapes; getLayersShapes(inputShapes, layersShapes); blobManager.reset(); backendWrappers.clear(); for (auto& layer : layers) { auto& ld = layer.second; ld.inputBlobsWrappers.clear(); ld.outputBlobsWrappers.clear(); ld.internalBlobsWrappers.clear(); } // Fake references to input blobs. for (int i = 0; i < layers[0].outputBlobs.size(); ++i) blobManager.addReference(LayerPin(0, i)); for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) { const LayerData& ld = it->second; blobManager.addReferences(ld.inputBlobsId); } for (int i = 0; i < blobsToKeep_.size(); i++) { blobManager.addReference(blobsToKeep_[i]); } for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) { int lid = it->first; allocateLayer(lid, layersShapes); } layersTimings.resize(lastLayerId + 1, 0); fuseLayers(blobsToKeep_); } void Net::Impl::forwardLayer(LayerData& ld) { CV_TRACE_FUNCTION(); Ptr layer = ld.layerInstance; if (!ld.skip) { TickMeter tm; tm.start(); #ifndef HAVE_VULKAN std::map>::const_iterator it = ld.backendNodes.find(preferableBackend); #else std::map>::iterator it = ld.backendNodes.find(preferableBackend); #endif if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty()) { if (isAsync) CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); if (!layer->supportBackend(DNN_BACKEND_OPENCV)) CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend", ld.name.c_str(), ld.type.c_str())); #ifdef HAVE_OPENCL if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); std::vector umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers); layer->forward(umat_inputBlobs, umat_outputBlobs, umat_internalBlobs); if (getParam_DNN_CHECK_NAN_INF()) { bool fail = false; for (size_t i = 0; i < umat_outputBlobs.size(); ++i) { UMat& u = umat_outputBlobs[i]; Mat m; if (u.depth() == CV_16S) // FP16 convertFp16(u, m); else m = u.getMat(ACCESS_READ); if (!checkRange(m)) { std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; fail = true; } else if (!checkRange(m, true, NULL, -1e6, 1e6)) { std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; fail = true; } } if (fail) { for (size_t i = 0; i < umat_inputBlobs.size(); ++i) { UMat& u = umat_inputBlobs[i]; Mat m; if (u.depth() == CV_16S) // FP16 convertFp16(u, m); else m = u.getMat(ACCESS_READ); std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; } for (size_t i = 0; i < umat_outputBlobs.size(); ++i) { UMat& u = umat_outputBlobs[i]; Mat m; if (u.depth() == CV_16S) // FP16 convertFp16(u, m); else m = u.getMat(ACCESS_READ); std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; } for (size_t i = 0; i < umat_internalBlobs.size(); ++i) { UMat& u = umat_internalBlobs[i]; Mat m; if (u.depth() == CV_16S) // FP16 convertFp16(u, m); else m = u.getMat(ACCESS_READ); std::cout << "INTERNAL " << i << " " << shape(m) << std::endl; if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl; } if (getParam_DNN_CHECK_NAN_INF_RAISE_ERROR()) CV_Assert(!fail); } } OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); } else #endif { for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) { if (!ld.inputBlobsWrappers[i].empty()) ld.inputBlobsWrappers[i]->copyToHost(); } std::vector inps(ld.inputBlobs.size()); for (int i = 0; i < ld.inputBlobs.size(); ++i) { inps[i] = *ld.inputBlobs[i]; } layer->forward(inps, ld.outputBlobs, ld.internals); if (getParam_DNN_CHECK_NAN_INF()) { bool fail = false; for (size_t i = 0; i < ld.outputBlobs.size(); ++i) { const Mat& m = ld.outputBlobs[i]; if (!checkRange(m)) { std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; fail = true; } else if (!checkRange(m, true, NULL, -1e6, 1e6)) { std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; fail = true; } } if (fail) { for (size_t i = 0; i < ld.inputBlobs.size(); ++i) { const Mat* pM = ld.inputBlobs[i]; if (!pM) { std::cout << "INPUT " << i << " is NULL" << std::endl; continue; } const Mat& m = *pM; std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; } for (size_t i = 0; i < ld.outputBlobs.size(); ++i) { const Mat& m = ld.outputBlobs[i]; std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; } for (size_t i = 0; i < ld.internals.size(); ++i) { const Mat& m = ld.internals[i]; std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; } if (getParam_DNN_CHECK_NAN_INF_RAISE_ERROR()) CV_Assert(!fail); } } for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) { if (!ld.outputBlobsWrappers[i].empty()) ld.outputBlobsWrappers[i]->setHostDirty(); } } } else { Ptr node = it->second; CV_Assert(!node.empty()); if (preferableBackend == DNN_BACKEND_CUDA) { CV_Assert(haveCUDA()); #ifdef HAVE_CUDA Ptr cudaNode = node.dynamicCast(); CV_Assert(!cudaNode.empty()); cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace); for (auto id : ld.cudaD2HBackgroundTransfers) { auto wrapper = ld.outputBlobsWrappers[id].dynamicCast(); wrapper->copyToHostInBackground(); } #endif } else if (preferableBackend == DNN_BACKEND_HALIDE) { forwardHalide(ld.outputBlobsWrappers, node); } else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && "Inheritance internal error"); } else if (preferableBackend == DNN_BACKEND_WEBNN) { forwardWebnn(ld.outputBlobsWrappers, node, isAsync); } else if (preferableBackend == DNN_BACKEND_TIMVX) { forwardTimVX(ld.outputBlobsWrappers, node); } #ifdef HAVE_VULKAN else if (preferableBackend == DNN_BACKEND_VKCOM) { try { forwardVkCom(ld.outputBlobsWrappers, node); } catch (const cv::Exception& e) { CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what()); it->second = Ptr(); forwardLayer(ld); } } #endif else { CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend)); } } tm.stop(); int64 t = tm.getTimeTicks(); layersTimings[ld.id] = (t > 0) ? t : t + 1; // zero for skipped layers only } else { layersTimings[ld.id] = 0; } ld.flag = 1; } void Net::Impl::forwardToLayer(LayerData& ld, bool clearFlags) { CV_TRACE_FUNCTION(); if (clearFlags) { for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; } // already was forwarded if (ld.flag) return; // forward parents for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) { LayerData& ld = it->second; if (ld.flag) continue; forwardLayer(ld); } // forward itself forwardLayer(ld); #ifdef HAVE_CUDA if (preferableBackend == DNN_BACKEND_CUDA) cudaInfo->context.stream.synchronize(); #endif } Mat Net::Impl::forward(const String& outputName) { CV_Assert(!empty()); FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; String layerName = outputName; if (layerName.empty()) { std::vector layerNames = getLayerNames(); CV_Assert(!layerNames.empty()); layerName = layerNames.back(); } std::vector pins(1, getPinByAlias(layerName)); setUpNet(pins); forwardToLayer(getLayerData(layerName)); return getBlob(layerName); } AsyncArray Net::Impl::forwardAsync(const String& outputName) { CV_Assert(!empty()); FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; #ifdef CV_CXX11 String layerName = outputName; if (layerName.empty()) { std::vector layerNames = getLayerNames(); CV_Assert(!layerNames.empty()); layerName = layerNames.back(); } std::vector pins(1, getPinByAlias(layerName)); setUpNet(pins); if (preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backend only"); isAsync = true; forwardToLayer(getLayerData(layerName)); isAsync = false; return getBlobAsync(layerName); #else CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11"); #endif // CV_CXX11 } void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const String& outputName) { CV_Assert(!empty()); FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; String layerName = outputName; if (layerName.empty()) { std::vector layerNames = getLayerNames(); CV_Assert(!layerNames.empty()); layerName = layerNames.back(); } std::vector pins(1, getPinByAlias(layerName)); setUpNet(pins); forwardToLayer(getLayerData(layerName)); LayerPin pin = getPinByAlias(layerName); LayerData& ld = layers[pin.lid]; if (outputBlobs.isUMat()) { getBlob(layerName).copyTo(outputBlobs); } else if (outputBlobs.isMat()) { outputBlobs.assign(getBlob(layerName)); } else if (outputBlobs.isMatVector()) { if (preferableTarget != DNN_TARGET_CPU) { for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { CV_Assert(!ld.outputBlobsWrappers[i].empty()); ld.outputBlobsWrappers[i]->copyToHost(); } } if (ld.outputBlobs[0].depth() == CV_16S) { std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); outputvec.resize(ld.outputBlobs.size()); for (int i = 0; i < outputvec.size(); i++) convertFp16(ld.outputBlobs[i], outputvec[i]); } else { // Output depth can be CV_32F or CV_8S std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); outputvec = ld.outputBlobs; } } else if (outputBlobs.isUMatVector()) { std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); #ifdef HAVE_OPENCL if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { if (preferableTarget == DNN_TARGET_OPENCL) outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); else if (preferableTarget == DNN_TARGET_OPENCL_FP16) { std::vector out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); outputvec.resize(out_vec.size()); for (int i = 0; i < out_vec.size(); i++) convertFp16(out_vec[i], outputvec[i]); } } else #endif { outputvec.resize(ld.outputBlobs.size()); for (int i = 0; i < outputvec.size(); ++i) ld.outputBlobs[i].copyTo(outputvec[i]); } } } void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const std::vector& outBlobNames) { CV_Assert(!empty()); FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { pins.push_back(getPinByAlias(outBlobNames[i])); } setUpNet(pins); LayerPin out = getLatestLayerPin(pins); forwardToLayer(getLayerData(out.lid)); std::vector matvec; for (int i = 0; i < pins.size(); i++) { matvec.push_back(getBlob(pins[i])); } outputBlobs.create((int)matvec.size(), 1, CV_32F/*FIXIT*/, -1); // allocate vector outputBlobs.assign(matvec); } void Net::Impl::forward(std::vector>& outputBlobs, const std::vector& outBlobNames) { CV_Assert(!empty()); FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { pins.push_back(getPinByAlias(outBlobNames[i])); } setUpNet(pins); LayerPin out = getLatestLayerPin(pins); forwardToLayer(getLayerData(out.lid)); outputBlobs.resize(outBlobNames.size()); for (int i = 0; i < outBlobNames.size(); i++) { std::vector lp = getLayerOutPins(outBlobNames[i]); outputBlobs[i].resize(lp.size()); for (int j = 0; j < lp.size(); j++) { outputBlobs[i][j] = getBlob(lp[j]); } } } void Net::Impl::getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) { CV_CheckGE(id, 0, ""); CV_CheckLT(id, (int)layers.size(), ""); LayerData& layerData = layers[id]; std::vector& inputLayerIds = layerData.inputBlobsId; LayerShapes& layerShapes = inOutShapes[id]; if (id == 0 && layerShapes.in[0].empty()) { if (!layerData.outputBlobs.empty()) { ShapesVec shapes; for (int i = 0; i < layerData.outputBlobs.size(); i++) { Mat& inp = layerData.outputBlobs[i]; CV_Assert(!inp.empty()); shapes.push_back(shape(inp)); } layerShapes.in = shapes; } else { const std::vector& inputShapes = netInputLayer->shapes; bool none = true; for (size_t i = 0; i < inputShapes.size(); i++) { if (!inputShapes[i].empty()) { none = false; break; } } if (none) { layerShapes.out.clear(); return; } else { layerShapes.in = inputShapes; } } } if (layerShapes.in.empty()) { for (int i = 0; i < inputLayerIds.size(); i++) { int layerId = inputLayerIds[i].lid; LayersShapesMap::const_iterator it = inOutShapes.find(layerId); if (it == inOutShapes.end() || it->second.out.empty()) { getLayerShapesRecursively(layerId, inOutShapes); it = inOutShapes.find(layerId); CV_Assert(it != inOutShapes.end()); } const int out_port = inputLayerIds[i].oid; CV_CheckLT(out_port, (int)it->second.out.size(), ""); const MatShape& shape = it->second.out[out_port]; layerShapes.in.push_back(shape); } } const ShapesVec& is = layerShapes.in; ShapesVec& os = layerShapes.out; ShapesVec& ints = layerShapes.internal; int requiredOutputs = layerData.requiredOutputs.size(); const Ptr& l = getLayerInstance(layerData); CV_Assert(l); bool layerSupportInPlace = false; try { layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints); } catch (const cv::Exception& e) { CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." << " inputs=" << is.size() << " outputs=" << os.size() << "/" << requiredOutputs << " blobs=" << l->blobs.size()); for (size_t i = 0; i < is.size(); ++i) { CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); } for (size_t i = 0; i < os.size(); ++i) { CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); } for (size_t i = 0; i < l->blobs.size(); ++i) { CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); } CV_LOG_ERROR(NULL, "Exception message: " << e.what()); throw; } layerShapes.supportInPlace = layerSupportInPlace; try { for (int i = 0; i < ints.size(); i++) CV_CheckGT(total(ints[i]), 0, ""); for (int i = 0; i < os.size(); i++) CV_CheckGT(total(os[i]), 0, ""); } catch (const cv::Exception& e) { CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." << " inputs=" << is.size() << " outputs=" << os.size() << "/" << requiredOutputs << " blobs=" << l->blobs.size() << " inplace=" << layerSupportInPlace); for (size_t i = 0; i < is.size(); ++i) { CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); } for (size_t i = 0; i < os.size(); ++i) { CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); } for (size_t i = 0; i < l->blobs.size(); ++i) { CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); } CV_LOG_ERROR(NULL, "Exception message: " << e.what()); throw; } } void Net::Impl::getLayersShapes( const ShapesVec& netInputShapes, std::vector& layersIds, std::vector& inLayersShapes, std::vector& outLayersShapes) /*const*/ { layersIds.clear(); inLayersShapes.clear(); outLayersShapes.clear(); Impl::LayersShapesMap inOutShapes; getLayersShapes(netInputShapes, inOutShapes); for (Impl::LayersShapesMap::const_iterator it = inOutShapes.begin(); it != inOutShapes.end(); it++) { layersIds.push_back(it->first); inLayersShapes.push_back(it->second.in); outLayersShapes.push_back(it->second.out); } } void Net::Impl::getLayersShapes(const ShapesVec& netInputShapes, LayersShapesMap& inOutShapes) { inOutShapes.clear(); inOutShapes[0].in = netInputShapes; // insert shape for first input layer for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) { getLayerShapesRecursively(it->first, inOutShapes); } } void Net::Impl::getLayerShapes(const ShapesVec& netInputShapes, const int layerId, LayerShapes& shapes) { LayersShapesMap inOutShapes; inOutShapes[0].in = netInputShapes; // insert shape for first input layer getLayerShapesRecursively(layerId, inOutShapes); shapes = inOutShapes[layerId]; } void Net::Impl::updateLayersShapes() { CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size()); CV_Assert(netInputLayer); DataLayer& inputLayer = *netInputLayer; LayerData& inputLayerData = layers[0]; CV_Assert(inputLayerData.layerInstance.get() == &inputLayer); CV_Assert(!inputLayerData.outputBlobs.empty()); ShapesVec inputShapes; for (int i = 0; i < inputLayerData.outputBlobs.size(); i++) { Mat& inp = inputLayerData.outputBlobs[i]; CV_Assert(!inp.empty()); if (preferableBackend == DNN_BACKEND_OPENCV && // FIXIT: wrong place for output allocation preferableTarget == DNN_TARGET_OPENCL_FP16 && inputLayerData.dtype == CV_32F) { inp.create(inp.dims, inp.size, CV_16S); } inputShapes.push_back(shape(inp)); } CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes")); LayersShapesMap layersShapes; layersShapes[0].in = inputShapes; for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) { int layerId = it->first; LayerData& layerData = it->second; const std::vector& inputLayerIds = layerData.inputBlobsId; LayerShapes& layerShapes = layersShapes[layerId]; CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size()); if (layerShapes.in.empty()) { for (int i = 0; i < inputLayerIds.size(); i++) { const LayerPin& inputPin = inputLayerIds[i]; int inputLayerId = inputPin.lid; CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")"); LayersShapesMap::const_iterator inputIt = layersShapes.find(inputLayerId); if (inputIt == layersShapes.end() || inputIt->second.out.empty()) { getLayerShapesRecursively(inputLayerId, layersShapes); } const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid]; layerShapes.in.push_back(shape); } getLayerInstance(layerData)->updateMemoryShapes(layerShapes.in); } CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes")); CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes")); CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes")); } CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE"); } LayerPin Net::Impl::getLatestLayerPin(const std::vector& pins) const { return *std::max_element(pins.begin(), pins.end()); } Mat Net::Impl::getBlob(const LayerPin& pin) const { CV_TRACE_FUNCTION(); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob not found"); MapIdToLayerData::const_iterator it = layers.find(pin.lid); if (it == layers.end()) CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid (output #%d requested)", pin.lid, pin.oid)); const LayerData& ld = it->second; if ((size_t)pin.oid >= ld.outputBlobs.size()) { CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, " "the #%d was requested", ld.name.c_str(), ld.outputBlobs.size(), pin.oid)); } if (preferableTarget != DNN_TARGET_CPU) { CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); // Transfer data to CPU if it's require. ld.outputBlobsWrappers[pin.oid]->copyToHost(); } if (ld.outputBlobs[pin.oid].depth() == CV_16S) { Mat output_blob; convertFp16(ld.outputBlobs[pin.oid], output_blob); return output_blob; } else return ld.outputBlobs[pin.oid]; } Mat Net::Impl::getBlob(String outputName) const { return getBlob(getPinByAlias(outputName)); } AsyncArray Net::Impl::getBlobAsync(const LayerPin& pin) { CV_TRACE_FUNCTION(); CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required"); } AsyncArray Net::Impl::getBlobAsync(String outputName) { return getBlobAsync(getPinByAlias(outputName)); } void Net::Impl::setInputsNames(const std::vector& inputBlobNames) { CV_Assert(netInputLayer); netInputLayer->setNames(inputBlobNames); } void Net::Impl::setInputShape(const String& inputName, const MatShape& shape) { CV_Assert(netInputLayer); netInputLayer->setInputShape(inputName, shape); } void Net::Impl::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) { FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; LayerPin pin; pin.lid = 0; pin.oid = resolvePinOutputName(getLayerData(pin.lid), name); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found"); Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff MatShape blobShape = shape(blob_); if (pin.lid == 0) { CV_Assert(!netInputLayer.empty()); const DataLayer& netInputLayer = *(this->netInputLayer); if (!netInputLayer.shapes.empty()) { CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), ""); const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid]; if (!inputShapeLimitation.empty()) { CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), ""); #if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0 const size_t dims = inputShapeLimitation.size(); for (size_t dim = 0; dim < dims; dim++) { if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1) continue; // don't limit batch CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], ""); } #endif } } } LayerData& ld = layers[pin.lid]; const int numInputs = std::max(pin.oid + 1, (int)ld.requiredOutputs.size()); ld.outputBlobs.resize(numInputs); ld.outputBlobsWrappers.resize(numInputs); netInputLayer->inputsData.resize(numInputs); netInputLayer->scaleFactors.resize(numInputs); netInputLayer->means.resize(numInputs); MatShape prevShape = shape(netInputLayer->inputsData[pin.oid]); bool oldShape = prevShape == blobShape; blob_.copyTo(netInputLayer->inputsData[pin.oid]); if (!oldShape) ld.outputBlobs[pin.oid] = netInputLayer->inputsData[pin.oid]; if (!ld.outputBlobsWrappers[pin.oid].empty()) { ld.outputBlobsWrappers[pin.oid]->setHostDirty(); } netInputLayer->scaleFactors[pin.oid] = scalefactor; netInputLayer->means[pin.oid] = mean; netWasAllocated = netWasAllocated && oldShape; } Mat Net::Impl::getParam(int layer, int numParam) const { LayerData& ld = getLayerData(layer); std::vector& layerBlobs = getLayerInstance(ld)->blobs; CV_Assert(numParam < (int)layerBlobs.size()); return layerBlobs[numParam]; } void Net::Impl::setParam(int layer, int numParam, const Mat& blob) { LayerData& ld = getLayerData(layer); // FIXIT we should not modify "execution" instance std::vector& layerBlobs = getLayerInstance(ld)->blobs; CV_Assert(numParam < (int)layerBlobs.size()); // we don't make strong checks, use this function carefully layerBlobs[numParam] = blob; } static string dumpLayerParameterSize(const string& name, const LayerParams& lp) { std::ostringstream out(name, std::ios::ate); DictValue param = lp.get(name); switch (param.size()) { case 1: out << " : "; break; case 2: out << " (HxW): "; break; case 3: out << " (DxHxW): "; break; default: CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size())); out << ": "; } for (size_t i = 0; i < param.size(); i++) { if (i > 0) out << " x "; out << param.get(i); } return out.str(); } string Net::Impl::dump(bool forceAllocation) const { bool hasInput = !netInputLayer->inputsData.empty(); if (forceAllocation) { if (!netWasAllocated) const_cast(this)->setUpNet(); } std::ostringstream out; const std::map& map = layers; Backend prefBackend = (Backend)preferableBackend; std::vector> skippedLayers; std::vector skipId; std::vector allLayers(map.size(), -1); int idPrev = -1; Ptr prevNode; for (std::map::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit) { std::map>::const_iterator itBackend = rit->second.backendNodes.find(prefBackend); if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() || itBackend->second.empty()) { if (rit->second.skip) skipId.push_back(rit->first); else if (!skipId.empty()) { if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty()) skipId.push_back(rit->first); else if (idPrev != -1) skipId.push_back(idPrev); std::sort(skipId.begin(), skipId.end()); for (int i = 0; i < skipId.size(); i++) { allLayers[skipId[i]] = skippedLayers.size(); } skippedLayers.push_back(skipId); skipId.clear(); } } else { if (itBackend->second == prevNode) skipId.push_back(idPrev); else if (!skipId.empty()) { skipId.push_back(idPrev); std::sort(skipId.begin(), skipId.end()); for (int i = 0; i < skipId.size(); i++) { allLayers[skipId[i]] = skippedLayers.size(); } skippedLayers.push_back(skipId); skipId.clear(); } idPrev = rit->first; prevNode = itBackend->second; } } std::vector colors = { "#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff", "#b266ff", "#3cb371"}; string backend; switch (prefBackend) { case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break; case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break; case DNN_BACKEND_INFERENCE_ENGINE: // fallthru case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: // fallthru case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "OpenVINO/"; break; case DNN_BACKEND_OPENCV: backend = "OCV/"; break; case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break; case DNN_BACKEND_CUDA: backend = "CUDA/"; break; case DNN_BACKEND_WEBNN: backend = "WEBNN/"; break; case DNN_BACKEND_TIMVX: backend = "TIMVX/"; break; // don't use default: } out << "digraph G {\n"; // Add nodes for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) { const LayerData& ld = it->second; string name = ld.params.name; std::vector clusterIds(1, it->first); if (allLayers[it->first] == -1 && !name.empty()) { out << "\t\"" << name << "\" [label=\""; } else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0]) { continue; } else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0] { int cluster = allLayers[it->first]; out << "\t\"" << "cluster_" << cluster << "\" [label=\"{"; clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster } for (int i = 0; i < clusterIds.size(); i++) { CV_DbgAssert(map.find(clusterIds[i]) != map.end()); const LayerParams& lp = map.find(clusterIds[i])->second.params; if (!lp.name.empty()) { if (i > 0) { out << " | "; } out << lp.name << "\\n" << lp.type << "\\n"; // align center if (lp.has("kernel_size")) { string kernel = dumpLayerParameterSize("kernel_size", lp); out << kernel; out << "\\l"; // align left } else if (lp.has("kernel_h") && lp.has("kernel_w")) { DictValue h = lp.get("kernel_h"); DictValue w = lp.get("kernel_w"); out << "kernel (HxW): " << h << " x " << w; out << "\\l"; // align left } if (lp.has("stride")) { string stride = dumpLayerParameterSize("stride", lp); out << stride; out << "\\l"; // align left } else if (lp.has("stride_h") && lp.has("stride_w")) { DictValue h = lp.get("stride_h"); DictValue w = lp.get("stride_w"); out << "stride (HxW): " << h << " x " << w; out << "\\l"; // align left } if (lp.has("dilation")) { string dilation = dumpLayerParameterSize("dilation", lp); out << dilation; out << "\\l"; // align left } else if (lp.has("dilation_h") && lp.has("dilation_w")) { DictValue h = lp.get("dilation_h"); DictValue w = lp.get("dilation_w"); out << "dilation (HxW): " << h << " x " << w; out << "\\l"; // align left } if (lp.has("pad")) { DictValue pad = lp.get("pad"); out << "pad "; switch (pad.size()) { case 1: out << ": " << pad; break; case 2: out << "(HxW): (" << pad.get(0) << " x " << pad.get(1) << ")"; break; case 4: out << "(HxW): (" << pad.get(0) << ", " << pad.get(2) << ") x (" << pad.get(1) << ", " << pad.get(3) << ")"; break; case 6: out << "(DxHxW): (" << pad.get(0) << ", " << pad.get(3) << ") x (" << pad.get(1) << ", " << pad.get(4) << ") x (" << pad.get(2) << ", " << pad.get(5) << ")"; break; default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size())); } out << "\\l"; // align left } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) { DictValue l = lp.get("pad_l"); DictValue t = lp.get("pad_t"); DictValue r = lp.get("pad_r"); DictValue b = lp.get("pad_b"); out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")"; out << "\\l"; // align left } else if (lp.has("pooled_w") || lp.has("pooled_h")) { DictValue h = lp.get("pooled_h"); DictValue w = lp.get("pooled_w"); out << "pad pooled (HxW): " << h << " x " << w; out << "\\l"; // align left } if (lp.has("pool")) { out << "pool: " << lp.get("pool"); out << "\\l"; // align left } if (lp.has("global_pooling")) { out << "global_pooling: " << lp.get("global_pooling"); out << "\\l"; // align left } if (lp.has("group")) { out << "group: " << lp.get("group"); out << "\\l"; // align left } } } if (!ld.outputBlobs.empty()) { out << "output: " << ld.outputBlobs[0].size; out << "\\l"; // align left } Ptr layerBackend; std::map>::const_iterator ibn = ld.backendNodes.find(prefBackend); if (ibn != ld.backendNodes.end()) layerBackend = ibn->second; out << (!layerBackend.empty() ? backend : "OCV/"); int colorId = 0; const Target target = ld.layerInstance.empty() ? DNN_TARGET_CPU : (Target)(ld.layerInstance->preferableTarget); // TODO fix preferableTarget type switch (target) { case DNN_TARGET_CPU: out << "CPU"; colorId = layerBackend.empty() ? 0 : 5; break; case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break; case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break; case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break; case DNN_TARGET_HDDL: out << "HDDL"; colorId = 8; break; case DNN_TARGET_VULKAN: out << "VULKAN"; colorId = 7; break; case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break; case DNN_TARGET_CUDA: out << "CUDA"; colorId = 5; break; case DNN_TARGET_CUDA_FP16: out << "CUDA_FP16"; colorId = 6; break; case DNN_TARGET_NPU: out << "NPU"; colorId = 9; break; // don't use default: } CV_Assert(colorId < colors.size()); out << "\\n"; // align center out << ((clusterIds.size() == 1) ? "\" " : " }\" "); out << "fillcolor=\"" << colors[colorId] << "\" "; out << "style=filled "; out << "shape=" << ((clusterIds.size() == 1) ? "box" : "record") << "]\n"; } out << '\n'; // Add edges int inputsSize = hasInput ? netInputLayer->outNames.size() : 0; for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) { const LayerData& ld = it->second; if (allLayers[it->first] == -1) // node { for (int i = 0; i < ld.consumers.size(); i++) { int outId = ld.consumers[i].lid; if (it == map.begin() && inputsSize > 1) out << "\t\"" << ld.name << "_" << i << "\"" << " -> "; else out << "\t\"" << ld.name << "\"" << " -> "; if (allLayers[outId] == -1) // node { CV_DbgAssert(map.find(outId) != map.end()); out << "\"" << map.find(outId)->second.name << "\"\n"; } else // cluster { out << "\"" << "cluster_" << allLayers[outId] << "\"\n"; } } } else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster { for (int i = 0; i < ld.consumers.size(); i++) { int outId = ld.consumers[i].lid; if (allLayers[outId] == -1) // node { CV_DbgAssert(map.find(outId) != map.end()); out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> "; out << "\"" << map.find(outId)->second.name << "\"\n"; } else if (allLayers[outId] != allLayers[it->first]) { // another cluster out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> "; out << "\"" << "cluster_" << allLayers[outId] << "\"\n"; } } } } out << "}\n"; return out.str(); } void Net::Impl::dumpNetworkToFile() const { #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP string dumpFileNameBase = getDumpFileNameBase(); string dumpFileName = dumpFileNameBase + ".dot"; try { string dumpStr = dump(); std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary); out << dumpStr; } catch (const std::exception& e) { std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); out << "Exception: " << e.what() << std::endl; } catch (...) { std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); out << "Can't dump: unknown exception" << std::endl; } #endif } std::vector> Net::Impl::getLayerInputs(int layerId) const { LayerData& ld = getLayerData(layerId); std::vector> inputLayers; inputLayers.reserve(ld.inputBlobsId.size()); for (int i = 0; i < ld.inputBlobsId.size(); ++i) { inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid)); } return inputLayers; } std::vector Net::Impl::getLayerNames() const { std::vector res; res.reserve(layers.size()); Impl::MapIdToLayerData::const_iterator it; for (it = layers.begin(); it != layers.end(); it++) { if (it->second.id) // skip Data layer res.push_back(it->second.name); } return res; } // FIXIT drop "unconnected" API std::vector Net::Impl::getUnconnectedOutLayers() const { std::vector layersIds; // registerOutput() flow if (!outputNameToId.empty()) { for (std::map::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it) { layersIds.push_back(it->second); } return layersIds; } Impl::MapIdToLayerData::const_iterator it; for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; const LayerData& ld = it->second; if (ld.requiredOutputs.size() == 0) layersIds.push_back(lid); } return layersIds; } // FIXIT drop "unconnected" API std::vector Net::Impl::getUnconnectedOutLayersNames() /*const*/ { std::vector ids = getUnconnectedOutLayers(); const size_t n = ids.size(); std::vector names(n); for (size_t i = 0; i < n; ++i) { names[i] = layers[ids[i]].name; } return names; } int64 Net::Impl::getFLOPS(const std::vector& netInputShapes) /*const*/ { int64 flops = 0; std::vector ids; std::vector> inShapes, outShapes; getLayersShapes(netInputShapes, ids, inShapes, outShapes); CV_Assert(inShapes.size() == outShapes.size()); CV_Assert(inShapes.size() == ids.size()); for (int i = 0; i < ids.size(); i++) { flops += getLayerInstance(layers[ids[i]])->getFLOPS(inShapes[i], outShapes[i]); } return flops; } int64 Net::Impl::getFLOPS( const int layerId, const std::vector& netInputShapes) /*const*/ { Impl::MapIdToLayerData::const_iterator layer = layers.find(layerId); CV_Assert(layer != layers.end()); LayerShapes shapes; getLayerShapes(netInputShapes, layerId, shapes); return getLayerInstance(const_cast(layer->second))->getFLOPS(shapes.in, shapes.out); } void Net::Impl::getMemoryConsumption( const int layerId, const std::vector& netInputShapes, size_t& weights, size_t& blobs) /*const*/ { Impl::MapIdToLayerData::const_iterator layer = layers.find(layerId); CV_Assert(layer != layers.end()); weights = blobs = 0; for (int i = 0; i < layer->second.params.blobs.size(); i++) { const Mat& weightsBlob = layer->second.params.blobs[i]; weights += weightsBlob.total() * weightsBlob.elemSize(); } LayerShapes shapes; getLayerShapes(netInputShapes, layerId, shapes); const ShapesVec& outLayerShapes = shapes.out; // FIXIT netWasQuantized check is not enough - per layer check should be done size_t elemSize = netWasQuantized ? sizeof(char) : sizeof(float); for (int i = 0; i < outLayerShapes.size(); i++) { blobs += total(outLayerShapes[i]) * elemSize; } } void Net::Impl::getMemoryConsumption( const std::vector& netInputShapes, size_t& weights, size_t& blobs) /*const*/ { std::vector layerIds; std::vector w, b; getMemoryConsumption(netInputShapes, layerIds, w, b); weights = blobs = 0; for (int i = 0; i < layerIds.size(); i++) { weights += w[i]; blobs += b[i]; } } int64 Net::Impl::getPerfProfile(std::vector& timings) const { timings = std::vector(layersTimings.begin() + 1, layersTimings.end()); int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0); return total; } void Net::Impl::getMemoryConsumption( const std::vector& netInputShapes, std::vector& layerIds, std::vector& weights, std::vector& blobs) /*const*/ { layerIds.clear(); weights.clear(); blobs.clear(); std::vector> inLayerShapes, outLayerShapes; getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes); // FIXIT netWasQuantized check is not enough - per layer check should be done size_t elemSize = netWasQuantized ? sizeof(char) : sizeof(float); for (int i = 0; i < layerIds.size(); i++) { int w = 0, b = 0; Impl::MapIdToLayerData::const_iterator layer = layers.find(layerIds[i]); CV_Assert(layer != layers.end()); for (int j = 0; j < layer->second.params.blobs.size(); j++) { const Mat& weightsBlob = layer->second.params.blobs[j]; w += weightsBlob.total() * weightsBlob.elemSize(); } for (int j = 0; j < outLayerShapes[i].size(); j++) { b += total(outLayerShapes[i][j]) * elemSize; } weights.push_back(w); blobs.push_back(b); } } // TODO drop? void Net::Impl::getLayerTypes(std::vector& layersTypes) const { layersTypes.clear(); std::map layers_type_map; for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) { if (layers_type_map.find(it->second.type) == layers_type_map.end()) layers_type_map[it->second.type] = 0; layers_type_map[it->second.type]++; } for (std::map::const_iterator it = layers_type_map.begin(); it != layers_type_map.end(); it++) { layersTypes.push_back(it->first); } } // TODO drop? int Net::Impl::getLayersCount(const String& layerType) const { int count = 0; for (Impl::MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) { if (it->second.type == layerType) count++; } return count; } CV__DNN_INLINE_NS_END }} // namespace cv::dnn