/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "op_halide.hpp" #include "op_inf_engine.hpp" #include "halide_scheduler.hpp" #include #include #include #include #include #include #include #include #include #include #include namespace cv { namespace dnn { CV__DNN_EXPERIMENTAL_NS_BEGIN // this option is useful to run valgrind memory errors detection static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false); #ifdef HAVE_OPENCL static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false); #endif static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT", #ifdef HAVE_INF_ENGINE (size_t)DNN_BACKEND_INFERENCE_ENGINE #else (size_t)DNN_BACKEND_OPENCV #endif ); // Additional checks (slowdowns execution!) static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false); static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false); static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false); using std::vector; using std::map; using std::make_pair; using std::set; //================================================================================================== class BackendRegistry { public: typedef std::vector< std::pair > BackendsList; const BackendsList & getBackends() const { return backends; } static BackendRegistry & getRegistry() { static BackendRegistry impl; return impl; } private: BackendRegistry() { #ifdef HAVE_HALIDE backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU)); # ifdef HAVE_OPENCL if (cv::ocl::useOpenCL()) backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL)); # endif #endif // HAVE_HALIDE #ifdef HAVE_INF_ENGINE if (checkIETarget(DNN_TARGET_CPU)) backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU)); if (checkIETarget(DNN_TARGET_MYRIAD)) backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD)); if (checkIETarget(DNN_TARGET_FPGA)) backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_FPGA)); # ifdef HAVE_OPENCL if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel()) { if (checkIETarget(DNN_TARGET_OPENCL)) backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL)); if (checkIETarget(DNN_TARGET_OPENCL_FP16)) backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16)); } # endif #endif // HAVE_INF_ENGINE #ifdef HAVE_OPENCL if (cv::ocl::useOpenCL()) { backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)); backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)); } #endif backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)); } static inline bool checkIETarget(int target) { #ifndef HAVE_INF_ENGINE return false; #else cv::dnn::Net net; cv::dnn::LayerParams lp; lp.set("kernel_size", 1); lp.set("num_output", 1); lp.set("bias_term", false); lp.type = "Convolution"; lp.name = "testLayer"; lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1))); net.addLayerToPrev(lp.name, lp.type, lp); net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE); net.setPreferableTarget(target); static int inpDims[] = {1, 2, 3, 4}; net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0))); try { net.forward(); } catch(...) { return false; } return true; #endif } BackendsList backends; }; std::vector< std::pair > getAvailableBackends() { return BackendRegistry::getRegistry().getBackends(); } std::vector getAvailableTargets(Backend be) { if (be == DNN_BACKEND_DEFAULT) be = (Backend)PARAM_DNN_BACKEND_DEFAULT; std::vector result; const BackendRegistry::BackendsList all_backends = getAvailableBackends(); for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i ) { if (i->first == be) result.push_back(i->second); } return result; } //================================================================================================== namespace { typedef std::vector ShapesVec; struct LayerShapes { ShapesVec in, out, internal; // No guarantees that layer which support in-place computations // will be computed in-place (input.data_ptr == output.data_ptr). // If layer said that it could work in-place and layers after it // no longer use input blob, we'll set output = input. bool supportInPlace; LayerShapes() {supportInPlace = false;} }; } Mat blobFromImage(InputArray image, double scalefactor, const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth) { CV_TRACE_FUNCTION(); Mat blob; blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth); return blob; } void blobFromImage(InputArray image, OutputArray blob, double scalefactor, const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth) { CV_TRACE_FUNCTION(); std::vector images(1, image.getMat()); blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); } Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size, const Scalar& mean, bool swapRB, bool crop, int ddepth) { CV_TRACE_FUNCTION(); Mat blob; blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); return blob; } void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor, Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth) { CV_TRACE_FUNCTION(); CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U"); if (ddepth == CV_8U) { CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth"); CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); } std::vector images; images_.getMatVector(images); CV_Assert(!images.empty()); for (size_t i = 0; i < images.size(); i++) { Size imgSize = images[i].size(); if (size == Size()) size = imgSize; if (size != imgSize) { if(crop) { float resizeFactor = std::max(size.width / (float)imgSize.width, size.height / (float)imgSize.height); resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR); Rect crop(Point(0.5 * (images[i].cols - size.width), 0.5 * (images[i].rows - size.height)), size); images[i] = images[i](crop); } else resize(images[i], images[i], size, 0, 0, INTER_LINEAR); } if(images[i].depth() == CV_8U && ddepth == CV_32F) images[i].convertTo(images[i], CV_32F); Scalar mean = mean_; if (swapRB) std::swap(mean[0], mean[2]); images[i] -= mean; images[i] *= scalefactor; } size_t nimages = images.size(); Mat image0 = images[0]; int nch = image0.channels(); CV_Assert(image0.dims == 2); if (nch == 3 || nch == 4) { int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; blob_.create(4, sz, ddepth); Mat blob = blob_.getMat(); Mat ch[4]; for(size_t i = 0; i < nimages; i++ ) { const Mat& image = images[i]; CV_Assert(image.depth() == blob_.depth()); nch = image.channels(); CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); CV_Assert(image.size() == image0.size()); for( int j = 0; j < nch; j++ ) ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j)); if(swapRB) std::swap(ch[0], ch[2]); split(image, ch); } } else { CV_Assert(nch == 1); int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; blob_.create(4, sz, ddepth); Mat blob = blob_.getMat(); for(size_t i = 0; i < nimages; i++ ) { const Mat& image = images[i]; CV_Assert(image.depth() == blob_.depth()); nch = image.channels(); CV_Assert(image.dims == 2 && (nch == 1)); CV_Assert(image.size() == image0.size()); image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0))); } } } void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) { CV_TRACE_FUNCTION(); //A blob is a 4 dimensional matrix in floating point precision //blob_[0] = batchSize = nbOfImages //blob_[1] = nbOfChannels //blob_[2] = height //blob_[3] = width CV_Assert(blob_.depth() == CV_32F); CV_Assert(blob_.dims == 4); images_.create(cv::Size(1, blob_.size[0]), blob_.depth()); std::vector vectorOfChannels(blob_.size[1]); for (int n = 0; n < blob_.size[0]; ++n) { for (int c = 0; c < blob_.size[1]; ++c) { vectorOfChannels[c] = getPlane(blob_, n, c); } cv::merge(vectorOfChannels, images_.getMatRef(n)); } } class OpenCLBackendWrapper : public BackendWrapper { public: OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) { m.copyTo(umat); host = &m; hostDirty = false; } OpenCLBackendWrapper(const Ptr& baseBuffer, Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) { Ptr base = baseBuffer.dynamicCast(); CV_Assert(!base.empty()); host = &m; int shape[] = {1, (int)base->umat.total()}; umat = base->umat.reshape(1, 2, &shape[0]) .colRange(0, host->total()) .reshape(1, host->dims, &host->size[0]); hostDirty = false; } static Ptr create(Mat& m) { return Ptr(new OpenCLBackendWrapper(m)); } static Ptr create(const Ptr& baseBuffer, Mat& m) { return Ptr(new OpenCLBackendWrapper(baseBuffer, m)); } static std::vector getUMatVector(const std::vector >& wrappers) { const int numWrappers = wrappers.size(); std::vector mats(wrappers.size()); for (int i = 0; i < numWrappers; ++i) { Ptr umatWrapper = wrappers[i].dynamicCast(); CV_Assert(!umatWrapper.empty()); umatWrapper->copyToDevice(); mats[i] = umatWrapper->umat; } return mats; } // Replaces all umats in wrappers to specific ones. static void update(const std::vector >& wrappers, const std::vector& umats) { CV_Assert(wrappers.size() == umats.size()); for (int i = 0, n = umats.size(); i < n; ++i) { Ptr umatWrapper = wrappers[i].dynamicCast(); CV_Assert(!umatWrapper.empty()); umatWrapper->umat = umats[i]; } } ~OpenCLBackendWrapper() {} // Copies data from device to a host memory. virtual void copyToHost() CV_OVERRIDE { umat.copyTo(*host); } virtual void setHostDirty() CV_OVERRIDE { hostDirty = true; }; void copyToDevice() { if (hostDirty) { host->copyTo(umat); hostDirty = false; } } private: UMat umat; Mat* host; bool hostDirty; }; struct LayerPin { int lid; int oid; LayerPin(int layerId = -1, int outputId = -1) : lid(layerId), oid(outputId) {} bool valid() const { return (lid >= 0 && oid >= 0); } bool equal(const LayerPin &r) const { return (lid == r.lid && oid == r.oid); } bool operator<(const LayerPin &r) const { return lid < r.lid || (lid == r.lid && oid < r.oid); } bool operator ==(const LayerPin &r) const { return lid == r.lid && oid == r.oid; } }; struct LayerData { LayerData() : id(-1), skip(false), flag(0) {} LayerData(int _id, const String &_name, const String &_type, LayerParams &_params) : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0) { CV_TRACE_FUNCTION(); //add logging info params.name = name; params.type = type; } int id; String name; String type; LayerParams params; std::vector inputBlobsId; std::set inputLayersId; std::set requiredOutputs; std::vector consumers; std::vector > outputBlobsWrappers; std::vector > inputBlobsWrappers; std::vector > internalBlobsWrappers; Ptr layerInstance; std::vector outputBlobs; std::vector inputBlobs; std::vector internals; // Computation nodes of implemented backends (except DEFAULT). std::map > backendNodes; // Flag for skip layer computation for specific backend. bool skip; int flag; Ptr getLayerInstance() { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); if (layerInstance) return layerInstance; layerInstance = LayerFactory::createLayerInstance(type, params); if (!layerInstance) { CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\""); } return layerInstance; } }; //fake layer containing network input blobs struct DataLayer : public Layer { DataLayer() : Layer() { skip = false; } virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_INFERENCE_ENGINE && inputsData.size() == 1); } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (outputs_arr.depth() == CV_16S) { forward_fallback(inputs_arr, outputs_arr, internals_arr); return; } std::vector outputs, internals; outputs_arr.getMatVector(outputs); internals_arr.getMatVector(internals); // Supported modes: // | Input type | Output type | // | fp32 | fp32 | // | uint8 | fp32 | for (int i = 0; i < inputsData.size(); ++i) { double scale = scaleFactors[i]; Scalar& mean = means[i]; CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4); CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); bool singleMean = true; for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j) { singleMean = mean[j] == mean[j - 1]; } if (singleMean) { inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); } else { for (int n = 0; n < inputsData[i].size[0]; ++n) for (int c = 0; c < inputsData[i].size[1]; ++c) { Mat inp = getPlane(inputsData[i], n, c); Mat out = getPlane(outputs[i], n, c); inp.convertTo(out, CV_32F, scale, -mean[c] * scale); } } } } #ifdef HAVE_OPENCL std::vector tmp_expressions; bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) { // Supported modes: // | Input type | Output type | // | fp32 | fp32 | // | fp32 | fp16 | // | uint8 | fp32 | std::vector outputs; outputs_.getUMatVector(outputs); tmp_expressions.clear(); for (int i = 0; i < inputsData.size(); ++i) { Mat inputData = inputsData[i]; double scale = scaleFactors[i]; Scalar& mean = means[i]; CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4); bool singleMean = true; for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j) { singleMean = mean[j] == mean[j - 1]; } if (outputs_.depth() == CV_16S) { if (singleMean) { tmp_expressions.push_back(Mat(scale * (inputsData[i] - mean[0]))); convertFp16(tmp_expressions.back(), outputs[i]); } else { for (int n = 0; n < inputsData[i].size[0]; ++n) for (int c = 0; c < inputsData[i].size[1]; ++c) { Mat inp = getPlane(inputsData[i], n, c); std::vector plane(4, Range::all()); plane[0] = Range(n, n + 1); plane[1] = Range(c, c + 1); UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size); tmp_expressions.push_back(scale * (inp - mean[c])); convertFp16(tmp_expressions.back(), out); } } } else { CV_Assert(outputs_.depth() == CV_32F); if (singleMean) { inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); } else { for (int n = 0; n < inputsData[i].size[0]; ++n) for (int c = 0; c < inputsData[i].size[1]; ++c) { Mat inp = getPlane(inputsData[i], n, c); std::vector plane(4, Range::all()); plane[0] = Range(n, n + 1); plane[1] = Range(c, c + 1); UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size); inp.convertTo(out, CV_32F, scale, -mean[c] * scale); } } } } return true; } #endif int outputNameToIndex(const String& tgtName) CV_OVERRIDE { int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin()); return (idx < (int)outNames.size()) ? idx : -1; } void setNames(const std::vector &names) { outNames.assign(names.begin(), names.end()); } bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, std::vector &internals) const CV_OVERRIDE { CV_Assert(inputs.size() == requiredOutputs); outputs.assign(inputs.begin(), inputs.end()); return false; } virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE { std::vector outputs; outputs_arr.getMatVector(outputs); CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(), inputsData.size() == outputs.size()); skip = true; for (int i = 0; skip && i < inputsData.size(); ++i) { if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar()) skip = false; } } virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE { #ifdef HAVE_INF_ENGINE CV_CheckEQ(inputsData.size(), (size_t)1, ""); CV_CheckEQ(inputsData[0].dims, 4, ""); const size_t numChannels = inputsData[0].size[1]; CV_Assert(numChannels <= 4); // Scale InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels}, InferenceEngine::Layout::C); auto weights = InferenceEngine::make_shared_blob(td); weights->allocate(); float* weight_buf = weights->buffer().as(); std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]); // Mean subtraction auto biases = InferenceEngine::make_shared_blob(td); biases->allocate(); float* bias_buf = biases->buffer().as(); for (int i = 0; i < numChannels; ++i) { bias_buf[i] = -means[0][i] * scaleFactors[0]; } InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name); addConstantData("weights", weights, ieLayer); addConstantData("biases", biases, ieLayer); return Ptr(new InfEngineBackendNode(ieLayer)); #endif // HAVE_INF_ENGINE return Ptr(); } std::vector outNames; // Preprocessing parameters for each network's input. std::vector scaleFactors; std::vector means; std::vector inputsData; bool skip; }; struct BlobManager { public: // Increase references counter to layer output. void addReference(const LayerPin& lp) { std::map::iterator it = refCounter.find(lp); if (it == refCounter.end()) refCounter[lp] = 1; else it->second += 1; } void addReferences(const std::vector& pins) { for (int i = 0; i < pins.size(); i++) { addReference(pins[i]); } } // Returns number of references to allocated memory that used in specific // layer blob. int numReferences(const LayerPin& lp) { std::map::iterator mapIt = reuseMap.find(lp); CV_Assert(mapIt != reuseMap.end()); LayerPin memHost = mapIt->second; std::map::iterator refIt = refCounter.find(memHost); CV_Assert(refIt != refCounter.end()); return refIt->second; } // Reuse data allocated in inside the blob. void reuse(const LayerPin& host, const LayerPin& user) { CV_Assert(reuseMap.find(user) == reuseMap.end()); CV_Assert(reuseMap.find(host) != reuseMap.end()); LayerPin memHost = reuseMap[host]; reuseMap[user] = memHost; if (refCounter.find(memHost) != refCounter.end()) { std::map::iterator userRefIt = refCounter.find(user); if (userRefIt != refCounter.end()) { refCounter[memHost] += userRefIt->second; refCounter.erase(userRefIt); } else refCounter[memHost] += 1; } } // Decrease references counter to allocated memory inside specific blob. void releaseReference(const LayerPin& lp) { std::map::iterator mapIt = reuseMap.find(lp); CV_Assert(mapIt != reuseMap.end()); std::map::iterator refIt = refCounter.find(mapIt->second); CV_Assert(refIt != refCounter.end()); CV_Assert(refIt->second > 0); refIt->second -= 1; } void releaseReferences(const std::vector& pins) { for (int i = 0; i < pins.size(); i++) { releaseReference(pins[i]); } } void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half) { if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS) { Mat bestBlob; LayerPin bestBlobPin; std::map::iterator hostIt; std::map::iterator refIt; const int targetTotal = total(shape); int bestBlobTotal = INT_MAX; for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) { refIt = refCounter.find(hostIt->first); // Use only blobs that had references before because if not, // it might be used as output. if (refIt != refCounter.end() && refIt->second == 0) { Mat& unusedBlob = hostIt->second; if (unusedBlob.total() >= targetTotal && unusedBlob.total() < bestBlobTotal) { bestBlobPin = hostIt->first; bestBlob = unusedBlob; bestBlobTotal = unusedBlob.total(); } } } if (!bestBlob.empty()) { reuse(bestBlobPin, lp); dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape); return; } } { // if dst already has been allocated with total(shape) elements, // it won't be recreated and pointer of dst.data remains the same. dst.create(shape, use_half ? CV_16S : CV_32F); addHost(lp, dst); } } void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, std::vector& pinsForInternalBlobs, bool use_half = false) { CV_TRACE_FUNCTION(); pinsForInternalBlobs.clear(); std::vector& outputBlobs = ld.outputBlobs, &internalBlobs = ld.internals; const ShapesVec& outShapes = layerShapes.out, internalShapes = layerShapes.internal; outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob internalBlobs.resize(internalShapes.size()); CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); // Check that layer could work in-place. bool inPlace = false; if (layerShapes.supportInPlace) { if (ld.inputBlobs.size() == 1) { // Get number of references to the input memory. int numRef = numReferences(ld.inputBlobsId[0]); // If current layer is one and only customer of this blob. inPlace = numRef == 1; } } ShapesVec shapes(outShapes); shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); std::vector blobs; for(int i = 0; i < outputBlobs.size(); i++) { blobs.push_back(&outputBlobs[i]); } for(int i = 0; i < internalBlobs.size(); i++) { blobs.push_back(&internalBlobs[i]); if (total(internalShapes[i])) { pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); } } addReferences(pinsForInternalBlobs); std::map > idxSizes; for(int i = 0; i < shapes.size(); i++) { idxSizes[total(shapes[i])].push_back(i); } std::map >::reverse_iterator it; for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) { for(int j = 0; j < it->second.size(); j++) { int index = it->second[j]; if (total(shapes[index])) { LayerPin blobPin(ld.id, index); if (index < outShapes.size() && inPlace) { CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); reuse(ld.inputBlobsId[0], blobPin); } else reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half); } } } } // Clear internal state. Calls before an every reallocation. void reset() { CV_TRACE_FUNCTION(); refCounter.clear(); reuseMap.clear(); memHosts.clear(); } private: // Register allocated memory. void addHost(const LayerPin& lp, const Mat& mat) { CV_Assert(memHosts.find(lp) == memHosts.end()); reuseMap[lp] = lp; memHosts[lp] = mat; } std::map refCounter; // Maps pin to origin blob (for whom memory was allocated firstly). // For origin blobs key == value. std::map reuseMap; std::map memHosts; }; static Ptr wrapMat(int backendId, int targetId, cv::Mat& m) { if (backendId == DNN_BACKEND_OPENCV) { if (targetId == DNN_TARGET_CPU) return Ptr(); else if (IS_DNN_OPENCL_TARGET(targetId)) return OpenCLBackendWrapper::create(m); else CV_Error(Error::StsNotImplemented, "Unknown target identifier"); } else if (backendId == DNN_BACKEND_HALIDE) { CV_Assert(haveHalide()); #ifdef HAVE_HALIDE return Ptr(new HalideBackendWrapper(targetId, m)); #endif // HAVE_HALIDE } else if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { CV_Assert(haveInfEngine()); #ifdef HAVE_INF_ENGINE return Ptr(new InfEngineBackendWrapper(targetId, m)); #endif // HAVE_INF_ENGINE } else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); return Ptr(); } struct Net::Impl { typedef std::map LayersShapesMap; typedef std::map MapIdToLayerData; Impl() { //allocate fake net input layer netInputLayer = Ptr(new DataLayer()); LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second; inpl.id = 0; netInputLayer->name = inpl.name = "_input"; inpl.type = "__NetInputLayer__"; inpl.layerInstance = netInputLayer; layerNameToId.insert(std::make_pair(inpl.name, inpl.id)); lastLayerId = 0; netWasAllocated = false; fusion = true; isAsync = false; preferableBackend = DNN_BACKEND_DEFAULT; preferableTarget = DNN_TARGET_CPU; skipInfEngineInit = false; } Ptr netInputLayer; std::vector blobsToKeep; MapIdToLayerData layers; std::map layerNameToId; BlobManager blobManager; int preferableBackend; int preferableTarget; String halideConfigFile; bool skipInfEngineInit; // Map host data to backend specific wrapper. std::map > backendWrappers; int lastLayerId; bool netWasAllocated; bool fusion; bool isAsync; std::vector layersTimings; Mat output_blob; Ptr wrap(Mat& host) { if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU) return Ptr(); MatShape shape(host.dims); for (int i = 0; i < host.dims; ++i) shape[i] = host.size[i]; void* data = host.data; if (backendWrappers.find(data) != backendWrappers.end()) { Ptr baseBuffer = backendWrappers[data]; if (preferableBackend == DNN_BACKEND_OPENCV) { CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget)); return OpenCLBackendWrapper::create(baseBuffer, host); } else if (preferableBackend == DNN_BACKEND_HALIDE) { CV_Assert(haveHalide()); #ifdef HAVE_HALIDE return Ptr(new HalideBackendWrapper(baseBuffer, shape)); #endif // HAVE_HALIDE } else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) { return wrapMat(preferableBackend, preferableTarget, host); } else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } Ptr wrapper = wrapMat(preferableBackend, preferableTarget, host); backendWrappers[data] = wrapper; return wrapper; } #ifdef HAVE_HALIDE void compileHalide() { CV_TRACE_FUNCTION(); CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); HalideScheduler scheduler(halideConfigFile); std::vector< std::reference_wrapper > compileList; compileList.reserve(64); for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; Ptr layer = ld.layerInstance; if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip) { CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty()); bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]); if (!scheduled) { // Use automatic scheduling provided by layer. layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE], ld.inputBlobs, ld.outputBlobs, preferableTarget); } compileList.emplace_back(ld); } } std::atomic progress(0); auto fn = ([&] () -> void { for (;;) { int id = progress.fetch_add(1); if ((size_t)id >= compileList.size()) return; const LayerData& ld = compileList[id].get(); Ptr node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; dnn::compileHalide(ld.outputBlobs, node, preferableTarget); } }); size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency()); num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads)); std::vector threads(num_threads - 1); for (auto& t: threads) t = std::thread(fn); fn(); // process own tasks for (auto& t: threads) t.join(); } #endif void clear() { CV_TRACE_FUNCTION(); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) { if (it->second.id != 0) { it->second.inputBlobs.clear(); it->second.outputBlobs.clear(); it->second.internals.clear(); } it->second.skip = false; //it->second.consumers.clear(); Ptr currLayer = it->second.layerInstance; if( currLayer.empty() ) continue; currLayer->unsetAttached(); } layersTimings.clear(); } void setUpNet(const std::vector& blobsToKeep_ = std::vector()) { CV_TRACE_FUNCTION(); if (preferableBackend == DNN_BACKEND_DEFAULT) preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT; CV_Assert(preferableBackend != DNN_BACKEND_OPENCV || preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16); CV_Assert(preferableBackend != DNN_BACKEND_HALIDE || preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE || preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_FPGA); if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) { if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) #ifndef HAVE_OPENCL { CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU."); preferableTarget = DNN_TARGET_CPU; } #else { if (!DNN_OPENCL_ALLOW_ALL_DEVICES) { // Current implementation is only valid for GPU (#11494) if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU) { CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU."); preferableTarget = DNN_TARGET_CPU; } else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel()) { CV_LOG_WARNING(NULL, "DNN: OpenCL target with fp16 precision is not supported " "with current OpenCL device (tested with Intel GPUs only), " "switching to OpenCL with fp32 precision."); preferableTarget = DNN_TARGET_OPENCL; } } } #endif clear(); allocateLayers(blobsToKeep_); MapIdToLayerData::iterator it = layers.find(0); CV_Assert(it != layers.end()); it->second.skip = netInputLayer->skip; initBackend(); if (!netWasAllocated ) { #ifdef HAVE_HALIDE if (preferableBackend == DNN_BACKEND_HALIDE) compileHalide(); #else CV_Assert(preferableBackend != DNN_BACKEND_HALIDE); #endif } netWasAllocated = true; this->blobsToKeep = blobsToKeep_; } } int getLayerId(const String &layerName) { std::map::iterator it = layerNameToId.find(layerName); return (it != layerNameToId.end()) ? it->second : -1; } int getLayerId(int id) { MapIdToLayerData::iterator it = layers.find(id); return (it != layers.end()) ? id : -1; } int getLayerId(DictValue &layerDesc) { if (layerDesc.isInt()) return getLayerId(layerDesc.get()); else if (layerDesc.isString()) return getLayerId(layerDesc.get()); CV_Assert(layerDesc.isInt() || layerDesc.isString()); return -1; } String getLayerName(int id) { MapIdToLayerData::iterator it = layers.find(id); return (it != layers.end()) ? it->second.name : "(unknown layer)"; } LayerData& getLayerData(int id) { MapIdToLayerData::iterator it = layers.find(id); if (it == layers.end()) CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); return it->second; } LayerData& getLayerData(const String &layerName) { int id = getLayerId(layerName); if (id < 0) CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found"); return getLayerData(id); } LayerData& getLayerData(const DictValue &layerDesc) { CV_Assert(layerDesc.isInt() || layerDesc.isString()); if (layerDesc.isInt()) return getLayerData(layerDesc.get()); else /*if (layerDesc.isString())*/ return getLayerData(layerDesc.get()); } static void addLayerInput(LayerData &ld, int inNum, LayerPin from) { if ((int)ld.inputBlobsId.size() <= inNum) { ld.inputBlobsId.resize(inNum + 1); } else { LayerPin storedFrom = ld.inputBlobsId[inNum]; if (storedFrom.valid() && !storedFrom.equal(from)) CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected", inNum, ld.name.c_str())); } ld.inputBlobsId[inNum] = from; } int resolvePinOutputName(LayerData &ld, const String &outName) { if (outName.empty()) return 0; return ld.getLayerInstance()->outputNameToIndex(outName); } LayerPin getPinByAlias(const String &layerName) { LayerPin pin; pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); if (pin.lid >= 0) pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName); return pin; } std::vector getLayerOutPins(const String &layerName) { int lid = (layerName.empty()) ? 0 : getLayerId(layerName); std::vector pins; for (int i = 0; i < layers[lid].outputBlobs.size(); i++) { pins.push_back(LayerPin(lid, i)); } return pins; } void connect(int outLayerId, int outNum, int inLayerId, int inNum) { CV_Assert(outLayerId < inLayerId); LayerData &ldOut = getLayerData(outLayerId); LayerData &ldInp = getLayerData(inLayerId); addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); ldOut.requiredOutputs.insert(outNum); ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); } void initBackend() { CV_TRACE_FUNCTION(); if (preferableBackend == DNN_BACKEND_OPENCV) CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget)); else if (preferableBackend == DNN_BACKEND_HALIDE) initHalideBackend(); else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) initInfEngineBackend(); else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } void initHalideBackend() { CV_TRACE_FUNCTION(); CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide()); // Iterator to current layer. MapIdToLayerData::iterator it = layers.begin(); // Iterator to base layer for fusion. In example, in case of conv+bn+relu // it'll be a conv layer. MapIdToLayerData::iterator baseIt = layers.begin(); for (; it != layers.end(); it++) { LayerData &ldTop = it->second; Ptr layerTop = ldTop.layerInstance; if (!layerTop->supportBackend(preferableBackend)) { // Move base iterator to layer that don't support preferable // backend to prevent fusion over layer of different backend. baseIt = it; continue; } // Try to do layers fusion. LayerData &ldBot = baseIt->second; Ptr layerBot = ldBot.layerInstance; // 1. Check that bottom and top from the same backends. if (it != layers.begin() && layerBot->supportBackend(preferableBackend)) { // 2. Check that current layer works in-place. bool inPlace = ldTop.inputBlobs.size() == 1 && ldBot.outputBlobs.size() == 1 && ldTop.inputBlobs[0]->data == ldBot.outputBlobs[0].data; if (inPlace) { // 3. Try to attach node. CV_Assert(!ldBot.backendNodes[preferableBackend].empty()); Ptr fusedNode = layerTop->tryAttach(ldBot.backendNodes[preferableBackend]); if (!fusedNode.empty()) { ldTop.skip = true; ldBot.backendNodes[preferableBackend] = fusedNode; ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers; continue; } } } // No layers fusion. ldTop.skip = false; ldTop.backendNodes[DNN_BACKEND_HALIDE] = layerTop->initHalide(ldTop.inputBlobsWrappers); baseIt = it; } } #ifdef HAVE_INF_ENGINE // Before launching Inference Engine graph we need to specify output blobs. // This function requests output blobs based on inputs references of // layers from default backend or layers from different graphs. void addInfEngineNetOutputs(LayerData &ld) { Ptr layerNet; if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end()) { Ptr node = ld.backendNodes[preferableBackend]; if (!node.empty()) { Ptr ieNode = node.dynamicCast(); CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty()); layerNet = ieNode->net; } } // For an every input reference we check that it belongs to one of // the Inference Engine backend graphs. Request an output blob if it is. // Do nothing if layer's input is from the same graph. for (int i = 0; i < ld.inputBlobsId.size(); ++i) { LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; Ptr inpNode = inpLd.backendNodes[preferableBackend]; if (!inpNode.empty()) { Ptr ieInpNode = inpNode.dynamicCast(); CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); if (layerNet != ieInpNode->net) { // layerNet is empty or nodes are from different graphs. ieInpNode->net->addOutput(ieInpNode->layer.getName()); } } } } #endif // HAVE_INF_ENGINE void initInfEngineBackend() { CV_TRACE_FUNCTION(); CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, haveInfEngine()); #ifdef HAVE_INF_ENGINE MapIdToLayerData::iterator it; Ptr net; for (it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; if (ld.id == 0) { CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; #else dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]); #endif } } else { for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) dataPtr->name = ld.name; #else dataPtr->setName(ld.name); #endif } } } if (skipInfEngineInit) { Ptr node = layers[lastLayerId].backendNodes[preferableBackend]; CV_Assert(!node.empty()); Ptr ieNode = node.dynamicCast(); CV_Assert(!ieNode.empty()); for (it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; if (ld.id == 0) { for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i) { InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) dataPtr->name = netInputLayer->outNames[i]; #else dataPtr->setName(netInputLayer->outNames[i]); #endif } } else { for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) dataPtr->name = ld.name; #else dataPtr->setName(ld.name); #endif } } ieNode->net->addBlobs(ld.inputBlobsWrappers); ieNode->net->addBlobs(ld.outputBlobsWrappers); ld.skip = true; } layers[lastLayerId].skip = false; ieNode->net->init(preferableTarget); return; } // Build Inference Engine networks from sets of layers that support this // backend. Split a whole model on several Inference Engine networks if // some of layers are not implemented. // Set of all input and output blobs wrappers for current network. std::map > netBlobsWrappers; for (it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; if (ld.id == 0 && ld.skip) continue; bool fused = ld.skip; Ptr layer = ld.layerInstance; if (!fused && !layer->supportBackend(preferableBackend)) { bool customizable = ld.id != 0 && ld.outputBlobs.size() == 1; // TODO: there is a bug in Myriad plugin with custom layers shape infer. if (preferableTarget == DNN_TARGET_MYRIAD) { for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i) { customizable = ld.inputBlobs[i]->size[0] == 1; } } // TODO: fix these workarounds if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16) customizable &= ld.type != "Concat"; if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16) customizable &= ld.type != "Power"; if (preferableTarget == DNN_TARGET_OPENCL) customizable &= ld.type != "Eltwise"; if (!customizable) { addInfEngineNetOutputs(ld); net = Ptr(); netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef. layer->preferableTarget = DNN_TARGET_CPU; continue; } } ld.skip = true; // Initially skip all Inference Engine supported layers. // Create a new network if one of inputs from different Inference Engine graph. for (int i = 0; i < ld.inputBlobsId.size(); ++i) { LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; Ptr inpNode = inpLd.backendNodes[preferableBackend]; if (!inpNode.empty()) { Ptr ieInpNode = inpNode.dynamicCast(); CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); if (ieInpNode->net != net) { net = Ptr(); netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef. break; } } } Ptr node; if (!net.empty()) { if (fused) { bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && ld.inputBlobs[0]->data == ld.outputBlobs[0].data; CV_Assert(inPlace); node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; } } else net = Ptr(new InfEngineBackendNet()); if (!fused) { if (layer->supportBackend(preferableBackend)) node = layer->initInfEngine(ld.inputBlobsWrappers); else { node = Ptr(new InfEngineBackendNode( ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals)); } } else if (node.empty()) continue; CV_Assert(!node.empty()); ld.backendNodes[preferableBackend] = node; Ptr ieNode = node.dynamicCast(); CV_Assert(!ieNode.empty()); ieNode->net = net; // Convert weights in FP16 for specific targets. if ((preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_FPGA) && !fused) { #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) for (const std::string& name : {"weights", "biases"}) { auto it = ieNode->layer.getParameters().find(name); if (it != ieNode->layer.getParameters().end()) { InferenceEngine::Blob::Ptr bp = it->second.as(); it->second = convertFp16(std::const_pointer_cast(bp)); } } #else auto& blobs = ieNode->layer.getConstantData(); if (blobs.empty()) { // In case of non weightable layer we have to specify // it's precision adding dummy blob. auto blob = InferenceEngine::make_shared_blob( InferenceEngine::Precision::FP16, InferenceEngine::Layout::C, {1}); blob->allocate(); blobs[""] = blob; } else { for (auto& it : blobs) it.second = convertFp16(std::const_pointer_cast(it.second)); } #endif } if (!fused) net->addLayer(ieNode->layer); net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName()); net->addBlobs(ld.inputBlobsWrappers); net->addBlobs(ld.outputBlobsWrappers); addInfEngineNetOutputs(ld); } // Initialize all networks. for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) { LayerData &ld = it->second; if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end()) continue; Ptr node = ld.backendNodes[preferableBackend]; if (node.empty()) continue; Ptr ieNode = node.dynamicCast(); if (ieNode.empty()) continue; CV_Assert(!ieNode->net.empty()); if (!ieNode->net->isInitialized()) { ieNode->net->init(preferableTarget); ld.skip = false; } } #endif // HAVE_INF_ENGINE } void allocateLayer(int lid, const LayersShapesMap& layersShapes) { CV_TRACE_FUNCTION(); LayerData &ld = layers[lid]; //already allocated if (ld.flag) return; size_t ninputs = ld.inputBlobsId.size(); #if 0 printf("layer %s:", ld.name.c_str()); for (size_t i = 0; i < ninputs; i++) { int inp_lid = ld.inputBlobsId[i].lid; LayerData &inp_ld = layers[inp_lid]; int inp_outputs = (int)inp_ld.outputBlobs.size(); std::cout << " " << inp_ld.name << "(" << inp_outputs; for( int j = 0; j < inp_outputs; j++ ) { std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; } std::cout << ")"; } printf("\n"); #endif //determine parent layers for (size_t i = 0; i < ninputs; i++) ld.inputLayersId.insert(ld.inputBlobsId[i].lid); //allocate parents for (set::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) allocateLayer(*i, layersShapes); //bind inputs if (ld.id == 0) // DataLayer { ninputs = netInputLayer->inputsData.size(); ld.inputBlobsWrappers.resize(ninputs); for (size_t i = 0; i < ninputs; i++) { ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]); } } else { ld.inputBlobs.resize(ninputs); ld.inputBlobsWrappers.resize(ninputs); for (size_t i = 0; i < ninputs; i++) { LayerPin from = ld.inputBlobsId[i]; CV_Assert(from.valid()); CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; } } LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); CV_Assert(layerShapesIt != layersShapes.end()); std::vector pinsForInternalBlobs; blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); for (int i = 0; i < ld.outputBlobs.size(); ++i) { ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); } ld.internalBlobsWrappers.resize(ld.internals.size()); for (int i = 0; i < ld.internals.size(); ++i) { ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); } Ptr layerPtr = ld.getLayerInstance(); { std::vector inps(ld.inputBlobs.size()); for (int i = 0; i < ld.inputBlobs.size(); ++i) { inps[i] = *ld.inputBlobs[i]; } layerPtr->finalize(inps, ld.outputBlobs); layerPtr->preferableTarget = preferableTarget; #if 0 std::cout << "\toutputs:"; size_t noutputs = ld.outputBlobs.size(); for (size_t j = 0; j < noutputs; j++) { std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; } std::cout << "\n"; #endif } // After allocation of layer, we decrease counters to it's input blobs. blobManager.releaseReferences(ld.inputBlobsId); blobManager.releaseReferences(pinsForInternalBlobs); ld.flag = 1; } #if 0 #define printf_(args) printf args #else #define printf_(args) #endif void fuseLayers(const std::vector& blobsToKeep_) { if( !fusion || (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)) return; CV_TRACE_FUNCTION(); // scan through all the layers. If there is convolution layer followed by the activation layer, // we try to embed this activation into the convolution and disable separate execution of the activation std::set pinsToKeep(blobsToKeep_.begin(), blobsToKeep_.end()); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; LayerData& ld = layers[lid]; if( ld.skip ) { printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); continue; } printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); // the optimization #1. try to fuse batch norm, scaling and/or activation layers // with the current layer if they follow it. Normally, the are fused with the convolution layer, // but some of them (like activation) may be fused with fully-connected, elemwise (+) and // some other layers. Ptr& currLayer = ld.layerInstance; if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 ) { LayerData* nextData = &layers[ld.consumers[0].lid]; LayerPin lpNext(ld.consumers[0].lid, 0); while (nextData) { Ptr nextLayer = nextData->layerInstance; if (currLayer->tryFuse(nextLayer)) { printf_(("\tfused with %s\n", nextLayer->name.c_str())); nextData->skip = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; if (nextData->consumers.size() == 1) { int nextLayerId = nextData->consumers[0].lid; nextData = &layers[nextLayerId]; lpNext = LayerPin(nextLayerId, 0); } else { nextData = 0; break; } } else break; } if (preferableBackend != DNN_BACKEND_OPENCV) continue; // Go to the next layer. // TODO: OpenCL target support more fusion styles. if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) && (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" && ld.layerInstance->type != "Concat")) ) continue; while (nextData) { // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh if (IS_DNN_OPENCL_TARGET(preferableTarget) && nextData->type != "ReLU" && nextData->type != "ChannelsPReLU" && nextData->type != "ReLU6" && nextData->type != "TanH" && nextData->type != "Power") break; Ptr nextActivLayer = nextData->layerInstance.dynamicCast(); if (nextActivLayer.empty()) break; if (currLayer->setActivation(nextActivLayer)) { printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); nextData->skip = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; if (nextData->consumers.size() == 1) { int nextLayerId = nextData->consumers[0].lid; nextData = &layers[nextLayerId]; lpNext = LayerPin(nextLayerId, 0); } else { nextData = 0; break; } } else break; } // fuse convolution layer followed by eltwise + relu if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" ) { Ptr nextEltwiseLayer; if( nextData ) nextEltwiseLayer = nextData->layerInstance.dynamicCast(); if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 && nextData && nextData->inputBlobsId.size() == 2 ) { LayerData *eltwiseData = nextData; // Eltwise layer has two inputs. We need to determine which // is a base convolution layer and which could be used as it's bias. LayerData* biasLayerData = 0; for (int i = 0; i < 2; ++i) { LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid]; CV_Assert(downLayerData); while (downLayerData->skip) { if (downLayerData->inputBlobsId.size() == 1) downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; else { downLayerData = 0; break; } } if (downLayerData && ld.id == downLayerData->id) { biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid]; break; } } CV_Assert(biasLayerData); { if( eltwiseData->consumers.size() == 1 ) { // fuse eltwise + activation layer if (biasLayerData->id < ld.id) { nextData = &layers[eltwiseData->consumers[0].lid]; lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); Ptr nextActivLayer; if( nextData ) nextActivLayer = nextData->layerInstance.dynamicCast(); if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 && (!nextData->type.compare("ReLU") || !nextData->type.compare("ChannelsPReLU") || !nextData->type.compare("Power")) && currLayer->setActivation(nextActivLayer) ) { CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); eltwiseData->skip = true; nextData->skip = true; // This optimization for cases like // some_layer conv // | | // +-- eltwise --+ // | // activ // This way all the element-wise computations // (i.e. some_layer+conv or some_layer*conv) // would be done at [conv] layer. So we need to // replace [conv]'s output blob to [eltwise]'s one // considering that [activ] is an in-place layer. // Also we need to move all the consumers' references. // To prevent memory collisions (i.e. when input of // [conv] and output of [eltwise] is the same blob) // we allocate a new blob. CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); ld.outputBlobs[0] = ld.outputBlobs[0].clone(); ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); eltwiseData->outputBlobs = ld.outputBlobs; nextData->outputBlobs = ld.outputBlobs; eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; nextData->outputBlobsWrappers = ld.outputBlobsWrappers; // Move references of [activ] layer consumers to the newly allocated blob. for (int i = 0; i < nextData->consumers.size(); ++i) { LayerData& consumer = layers[nextData->consumers[i].lid]; for (int j = 0; j < consumer.inputBlobsId.size(); ++j) { if (consumer.inputBlobsId[j].lid == lpNext.lid) { consumer.inputBlobs[j] = &ld.outputBlobs[0]; consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; break; } } } } } } } } } } if (preferableBackend != DNN_BACKEND_OPENCV) continue; // Go to the next layer. // the optimization #2. if there is concat layer that concatenates channels // from the inputs together (i.e. axis == 1) then we make the inputs of // the concat layer to write to the concatenation output buffer // (and so we eliminate the concatenation layer, because the channels // are concatenated implicitly). Ptr concatLayer = ld.layerInstance.dynamicCast(); if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding && ld.outputBlobs.size() == 1 ) { Mat& output = ld.outputBlobs[0]; UMat umat_output; if (!ld.outputBlobsWrappers.empty() && (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))) { size_t i, ninputs = ld.inputBlobsId.size(); bool conv_layer = true; for( i = 0; i < ninputs; i++ ) { LayerPin pin = ld.inputBlobsId[i]; LayerData* inp_i_data = &layers[pin.lid]; while(inp_i_data->skip && inp_i_data->inputBlobsId.size() == 1 && inp_i_data->consumers.size() == 1) { pin = inp_i_data->inputBlobsId[0]; inp_i_data = &layers[pin.lid]; } conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution"); } if (!conv_layer) continue; std::vector umat_outputBlobs; umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); umat_output = umat_outputBlobs[0]; } // TODO: in general, this optimization can always be done, but // many layers currently check that the input/output blobs are // continuous arrays. Unfortunately, this is not true when // the concatenation optimization is applied with batch_size > 1. // so, for now, we only apply this optimization in the most popular // case batch_size == 1. if( output.dims == 4 && output.size[0] == 1 ) { size_t i, ninputs = ld.inputBlobsId.size(); std::vector realinputs(ninputs); for( i = 0; i < ninputs; i++ ) { LayerPin pin = ld.inputBlobsId[i]; LayerData* inp_i_data = &layers[pin.lid]; while(inp_i_data->skip && inp_i_data->inputBlobsId.size() == 1 && inp_i_data->consumers.size() == 1) { pin = inp_i_data->inputBlobsId[0]; inp_i_data = &layers[pin.lid]; } printf_(("\treal input for %s is %s\n", layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(), inp_i_data->getLayerInstance()->name.c_str())); if(inp_i_data->skip || inp_i_data->consumers.size() != 1) break; realinputs[i] = pin; } if( i >= ninputs ) { // Allocate new memory to prevent collisions during memory // reusing (see https://github.com/opencv/opencv/pull/10456). output = output.clone(); if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { std::vector umats(1); umat_output = umat_output.clone(); umats[0] = umat_output; OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats); } Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() }; int ofs = 0; for( i = 0; i < ninputs; i++ ) { LayerPin pin = realinputs[i]; LayerData* inp_i_data = &layers[pin.lid]; int channels_i = ld.inputBlobs[i]->size[1]; chrange[1] = Range(ofs, ofs + channels_i); printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(), pin.oid, ofs, ofs + channels_i)); ofs += channels_i; Mat output_slice = output(chrange); Mat& curr_output = inp_i_data->outputBlobs[pin.oid]; CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size); Mat* oldPtr = &curr_output; curr_output = output_slice; if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { std::vector umats(inp_i_data->outputBlobsWrappers.size()); umats[pin.oid] = umat_output(chrange); OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats); } // Layers that refer old input Mat will refer to the // new data but the same Mat object. CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output); } ld.skip = true; printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str())); } } } } } void allocateLayers(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; CV_Assert(!layers[0].outputBlobs.empty()); ShapesVec inputShapes; for(int i = 0; i < layers[0].outputBlobs.size(); i++) { Mat& inp = layers[0].outputBlobs[i]; CV_Assert(inp.total()); if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16) { layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); } inputShapes.push_back(shape(inp)); } LayersShapesMap layersShapes; getLayersShapes(inputShapes, layersShapes); blobManager.reset(); backendWrappers.clear(); // Fake references to input blobs. for (int i = 0; i < layers[0].outputBlobs.size(); ++i) blobManager.addReference(LayerPin(0, i)); for (it = layers.begin(); it != layers.end(); ++it) { const LayerData& ld = it->second; blobManager.addReferences(ld.inputBlobsId); } for (int i = 0; i < blobsToKeep_.size(); i++) { blobManager.addReference(blobsToKeep_[i]); } for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; allocateLayer(lid, layersShapes); } layersTimings.resize(lastLayerId + 1, 0); fuseLayers(blobsToKeep_); } void forwardLayer(LayerData &ld) { CV_TRACE_FUNCTION(); Ptr layer = ld.layerInstance; TickMeter tm; tm.start(); if( !ld.skip ) { std::map >::iterator it = ld.backendNodes.find(preferableBackend); if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty()) { if (isAsync) CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); if (!layer->supportBackend(DNN_BACKEND_OPENCV)) CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend", ld.name.c_str(), ld.type.c_str())); if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); std::vector umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers); layer->forward(umat_inputBlobs, umat_outputBlobs, umat_internalBlobs); if (DNN_CHECK_NAN_INF) { bool fail = false; for (size_t i = 0; i < umat_outputBlobs.size(); ++i) { UMat& u = umat_outputBlobs[i]; Mat m; if (u.depth() == CV_16S) // FP16 convertFp16(u, m); else m = u.getMat(ACCESS_READ); if (!checkRange(m)) { std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; fail = true; } else if (!checkRange(m, true, NULL, -1e6, 1e6)) { std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; fail = true; } } if (fail) { for (size_t i = 0; i < umat_inputBlobs.size(); ++i) { UMat& u = umat_inputBlobs[i]; Mat m; if (u.depth() == CV_16S) // FP16 convertFp16(u, m); else m = u.getMat(ACCESS_READ); std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; } for (size_t i = 0; i < umat_outputBlobs.size(); ++i) { UMat& u = umat_outputBlobs[i]; Mat m; if (u.depth() == CV_16S) // FP16 convertFp16(u, m); else m = u.getMat(ACCESS_READ); std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; } for (size_t i = 0; i < umat_internalBlobs.size(); ++i) { UMat& u = umat_internalBlobs[i]; Mat m; if (u.depth() == CV_16S) // FP16 convertFp16(u, m); else m = u.getMat(ACCESS_READ); std::cout << "INTERNAL " << i << " " << shape(m) << std::endl; if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl; } if (DNN_CHECK_NAN_INF_RAISE_ERROR) CV_Assert(!fail); } } OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); } else { for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) { if (!ld.inputBlobsWrappers[i].empty()) ld.inputBlobsWrappers[i]->copyToHost(); } std::vector inps(ld.inputBlobs.size()); for (int i = 0; i < ld.inputBlobs.size(); ++i) { inps[i] = *ld.inputBlobs[i]; } layer->forward(inps, ld.outputBlobs, ld.internals); if (DNN_CHECK_NAN_INF) { bool fail = false; for (size_t i = 0; i < ld.outputBlobs.size(); ++i) { const Mat& m = ld.outputBlobs[i]; if (!checkRange(m)) { std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; fail = true; } else if (!checkRange(m, true, NULL, -1e6, 1e6)) { std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; fail = true; } } if (fail) { for (size_t i = 0; i < ld.inputBlobs.size(); ++i) { const Mat* pM = ld.inputBlobs[i]; if (!pM) { std::cout << "INPUT " << i << " is NULL" << std::endl; continue; } const Mat& m = *pM; std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; } for (size_t i = 0; i < ld.outputBlobs.size(); ++i) { const Mat& m = ld.outputBlobs[i]; std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; } for (size_t i = 0; i < ld.internals.size(); ++i) { const Mat& m = ld.internals[i]; std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; } if (DNN_CHECK_NAN_INF_RAISE_ERROR) CV_Assert(!fail); } } for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) { if (!ld.outputBlobsWrappers[i].empty()) ld.outputBlobsWrappers[i]->setHostDirty(); } } } else { Ptr node = it->second; CV_Assert(!node.empty()); if (preferableBackend == DNN_BACKEND_HALIDE) { forwardHalide(ld.outputBlobsWrappers, node); } else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) { forwardInfEngine(ld.outputBlobsWrappers, node, isAsync); } else { CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } } } else tm.reset(); tm.stop(); layersTimings[ld.id] = tm.getTimeTicks(); ld.flag = 1; } void forwardToLayer(LayerData &ld, bool clearFlags = true) { CV_TRACE_FUNCTION(); if (clearFlags) { MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; } //already was forwarded if (ld.flag) return; //forward parents MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) { LayerData &ld = it->second; if (ld.flag) continue; forwardLayer(ld); } //forward itself forwardLayer(ld); } void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) { std::vector& inputLayerIds = layers[id].inputBlobsId; if (inOutShapes[id].in.empty()) { for(int i = 0; i < inputLayerIds.size(); i++) { int layerId = inputLayerIds[i].lid; LayersShapesMap::iterator it = inOutShapes.find(layerId); if(it == inOutShapes.end() || it->second.out.empty()) { getLayerShapesRecursively(layerId, inOutShapes); } const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; inOutShapes[id].in.push_back(shape); } } const ShapesVec& is = inOutShapes[id].in; ShapesVec& os = inOutShapes[id].out; ShapesVec& ints = inOutShapes[id].internal; int requiredOutputs = layers[id].requiredOutputs.size(); inOutShapes[id].supportInPlace = layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints); } void getLayersShapes(const ShapesVec& netInputShapes, LayersShapesMap& inOutShapes) { inOutShapes.clear(); inOutShapes[0].in = netInputShapes; //insert shape for first input layer for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) { getLayerShapesRecursively(it->first, inOutShapes); } } void getLayerShapes(const ShapesVec& netInputShapes, const int layerId, LayerShapes& shapes) { LayersShapesMap inOutShapes; inOutShapes[0].in = netInputShapes; //insert shape for first input layer getLayerShapesRecursively(layerId, inOutShapes); shapes = inOutShapes[layerId]; } LayerPin getLatestLayerPin(const std::vector& pins) { return *std::max_element(pins.begin(), pins.end()); } Mat getBlob(const LayerPin& pin) { CV_TRACE_FUNCTION(); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob not found"); LayerData &ld = layers[pin.lid]; if ((size_t)pin.oid >= ld.outputBlobs.size()) { CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, " "the #%d was requested", ld.name.c_str(), ld.outputBlobs.size(), pin.oid)); } if (preferableTarget != DNN_TARGET_CPU) { CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); // Transfer data to CPU if it's require. ld.outputBlobsWrappers[pin.oid]->copyToHost(); } if (ld.outputBlobs[pin.oid].depth() == CV_16S) { convertFp16(ld.outputBlobs[pin.oid], output_blob); return output_blob; } else return ld.outputBlobs[pin.oid]; } Mat getBlob(String outputName) { return getBlob(getPinByAlias(outputName)); } #ifdef CV_CXX11 AsyncArray getBlobAsync(const LayerPin& pin) { CV_TRACE_FUNCTION(); #ifdef HAVE_INF_ENGINE if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob not found"); LayerData &ld = layers[pin.lid]; if ((size_t)pin.oid >= ld.outputBlobs.size()) { CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, " "the #%d was requested", ld.name.c_str(), ld.outputBlobs.size(), pin.oid)); } if (preferableTarget != DNN_TARGET_CPU) { CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); // Transfer data to CPU if it's require. ld.outputBlobsWrappers[pin.oid]->copyToHost(); } CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE); Ptr wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast(); return std::move(wrapper->futureMat); #else CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE backend is required"); #endif } AsyncArray getBlobAsync(String outputName) { return getBlobAsync(getPinByAlias(outputName)); } #endif // CV_CXX11 }; Net::Net() : impl(new Net::Impl) { } Net Net::readFromModelOptimizer(const String& xml, const String& bin) { #ifndef HAVE_INF_ENGINE CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); #else InferenceEngine::CNNNetReader reader; reader.ReadNetwork(xml); reader.ReadWeights(bin); InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); std::vector inputsNames; for (auto& it : ieNet.getInputsInfo()) { inputsNames.push_back(it.first); } Net cvNet; cvNet.setInputsNames(inputsNames); Ptr backendNode(new InfEngineBackendNode(InferenceEngine::Builder::Layer(""))); backendNode->net = Ptr(new InfEngineBackendNet(ieNet)); for (auto& it : ieNet.getOutputsInfo()) { Ptr cvLayer(new InfEngineBackendLayer(ieNet)); InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str()); CV_Assert(ieLayer); LayerParams lp; int lid = cvNet.addLayer(it.first, "", lp); LayerData& ld = cvNet.impl->layers[lid]; cvLayer->name = it.first; cvLayer->type = ieLayer->type; ld.layerInstance = cvLayer; ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode; for (int i = 0; i < inputsNames.size(); ++i) cvNet.connect(0, i, lid, i); } cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE); cvNet.impl->skipInfEngineInit = true; return cvNet; #endif // HAVE_INF_ENGINE } Net::~Net() { } int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) { CV_TRACE_FUNCTION(); if (impl->getLayerId(name) >= 0) { CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); return -1; } int id = ++impl->lastLayerId; impl->layerNameToId.insert(std::make_pair(name, id)); impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params))); return id; } int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms) { CV_TRACE_FUNCTION(); int prvLid = impl->lastLayerId; int newLid = this->addLayer(name, type, params); this->connect(prvLid, 0, newLid, 0); return newLid; } void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum) { CV_TRACE_FUNCTION(); impl->connect(outLayerId, outNum, inpLayerId, inpNum); } void Net::connect(String _outPin, String _inPin) { CV_TRACE_FUNCTION(); LayerPin outPin = impl->getPinByAlias(_outPin); LayerPin inpPin = impl->getPinByAlias(_inPin); CV_Assert(outPin.valid() && inpPin.valid()); impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid); } Mat Net::forward(const String& outputName) { CV_TRACE_FUNCTION(); String layerName = outputName; if (layerName.empty()) layerName = getLayerNames().back(); std::vector pins(1, impl->getPinByAlias(layerName)); impl->setUpNet(pins); impl->forwardToLayer(impl->getLayerData(layerName)); return impl->getBlob(layerName); } AsyncArray Net::forwardAsync(const String& outputName) { CV_TRACE_FUNCTION(); #ifdef CV_CXX11 String layerName = outputName; if (layerName.empty()) layerName = getLayerNames().back(); std::vector pins(1, impl->getPinByAlias(layerName)); impl->setUpNet(pins); if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE) CV_Error(Error::StsNotImplemented, "Asynchronous forward for backend which is different from DNN_BACKEND_INFERENCE_ENGINE"); impl->isAsync = true; impl->forwardToLayer(impl->getLayerData(layerName)); impl->isAsync = false; return impl->getBlobAsync(layerName); #else CV_Error(Error::StsNotImplemented, "Asynchronous forward without C++11"); #endif // CV_CXX11 } void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) { CV_TRACE_FUNCTION(); String layerName = outputName; if (layerName.empty()) layerName = getLayerNames().back(); std::vector pins(1, impl->getPinByAlias(layerName)); impl->setUpNet(pins); impl->forwardToLayer(impl->getLayerData(layerName)); LayerPin pin = impl->getPinByAlias(layerName); LayerData &ld = impl->layers[pin.lid]; if (outputBlobs.isUMat()) { impl->getBlob(layerName).copyTo(outputBlobs); } else if (outputBlobs.isMat()) { outputBlobs.assign(impl->getBlob(layerName)); } else if (outputBlobs.isMatVector()) { if (impl->preferableTarget != DNN_TARGET_CPU) { for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { CV_Assert(!ld.outputBlobsWrappers[i].empty()); ld.outputBlobsWrappers[i]->copyToHost(); } } if (ld.outputBlobs[0].depth() == CV_32F) { std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); outputvec = ld.outputBlobs; } else { std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); outputvec.resize(ld.outputBlobs.size()); for (int i = 0; i < outputvec.size(); i++) convertFp16(ld.outputBlobs[i], outputvec[i]); } } else if (outputBlobs.isUMatVector()) { std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); if (impl->preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(impl->preferableTarget)) { if (impl->preferableTarget == DNN_TARGET_OPENCL) outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16) { std::vector out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); outputvec.resize(out_vec.size()); for (int i = 0; i < out_vec.size(); i++) convertFp16(out_vec[i], outputvec[i]); } } else { outputvec.resize(ld.outputBlobs.size()); for (int i = 0; i < outputvec.size(); ++i) ld.outputBlobs[i].copyTo(outputvec[i]); } } } void Net::forward(OutputArrayOfArrays outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { pins.push_back(impl->getPinByAlias(outBlobNames[i])); } impl->setUpNet(pins); LayerPin out = impl->getLatestLayerPin(pins); impl->forwardToLayer(impl->getLayerData(out.lid)); std::vector matvec; for (int i = 0; i < pins.size(); i++) { matvec.push_back(impl->getBlob(pins[i])); } std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); outputvec = matvec; } void Net::forward(std::vector >& outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { pins.push_back(impl->getPinByAlias(outBlobNames[i])); } impl->setUpNet(pins); LayerPin out = impl->getLatestLayerPin(pins); impl->forwardToLayer(impl->getLayerData(out.lid)); outputBlobs.resize(outBlobNames.size()); for (int i = 0; i < outBlobNames.size(); i++) { std::vector lp = impl->getLayerOutPins(outBlobNames[i]); outputBlobs[i].resize(lp.size()); for (int j = 0; j < lp.size(); j++) { outputBlobs[i][j] = impl->getBlob(lp[j]); } } } void Net::setPreferableBackend(int backendId) { CV_TRACE_FUNCTION(); CV_TRACE_ARG(backendId); if( impl->preferableBackend != backendId ) { impl->preferableBackend = backendId; impl->netWasAllocated = false; impl->clear(); } } void Net::setPreferableTarget(int targetId) { CV_TRACE_FUNCTION(); CV_TRACE_ARG(targetId); if( impl->preferableTarget != targetId ) { impl->preferableTarget = targetId; if (IS_DNN_OPENCL_TARGET(targetId)) { #ifndef HAVE_OPENCL #ifdef HAVE_INF_ENGINE if (impl->preferableBackend == DNN_BACKEND_OPENCV) #else if (impl->preferableBackend == DNN_BACKEND_DEFAULT || impl->preferableBackend == DNN_BACKEND_OPENCV) #endif // HAVE_INF_ENGINE impl->preferableTarget = DNN_TARGET_CPU; #else bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16"); if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16) impl->preferableTarget = DNN_TARGET_OPENCL; #endif } impl->netWasAllocated = false; impl->clear(); } } void Net::setInputsNames(const std::vector &inputBlobNames) { CV_TRACE_FUNCTION(); impl->netInputLayer->setNames(inputBlobNames); } void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); LayerPin pin; pin.lid = 0; pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found"); LayerData &ld = impl->layers[pin.lid]; const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size()); ld.outputBlobs.resize(numInputs); ld.outputBlobsWrappers.resize(numInputs); impl->netInputLayer->inputsData.resize(numInputs); impl->netInputLayer->scaleFactors.resize(numInputs); impl->netInputLayer->means.resize(numInputs); MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]); Mat blob_ = blob.getMat(); bool oldShape = prevShape == shape(blob_); if (oldShape) { blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]); } else { ld.outputBlobs[pin.oid] = blob_.clone(); impl->netInputLayer->inputsData[pin.oid] = ld.outputBlobs[pin.oid]; } if (!ld.outputBlobsWrappers[pin.oid].empty()) { ld.outputBlobsWrappers[pin.oid]->setHostDirty(); } impl->netInputLayer->scaleFactors[pin.oid] = scalefactor; impl->netInputLayer->means[pin.oid] = mean; impl->netWasAllocated = impl->netWasAllocated && oldShape; } Mat Net::getParam(LayerId layer, int numParam) { LayerData &ld = impl->getLayerData(layer); std::vector &layerBlobs = ld.getLayerInstance()->blobs; CV_Assert(numParam < (int)layerBlobs.size()); return layerBlobs[numParam]; } void Net::setParam(LayerId layer, int numParam, const Mat &blob) { LayerData &ld = impl->getLayerData(layer); std::vector &layerBlobs = ld.getLayerInstance()->blobs; CV_Assert(numParam < (int)layerBlobs.size()); //we don't make strong checks, use this function carefully layerBlobs[numParam] = blob; } int Net::getLayerId(const String &layer) { return impl->getLayerId(layer); } String parseLayerParams(const String& name, const LayerParams& lp) { DictValue param = lp.get(name); std::ostringstream out; out << name << " "; switch (param.size()) { case 1: out << ": "; break; case 2: out << "(HxW): "; break; case 3: out << "(DxHxW): "; break; default: CV_Error(Error::StsNotImplemented, format("Unsupported %s size = %d", name.c_str(), param.size())); } for (size_t i = 0; i < param.size() - 1; i++) { out << param.get(i) << " x "; } out << param.get(param.size() - 1) << "\\l"; return out.str(); } String Net::dump() { CV_Assert(!empty()); if (impl->netInputLayer->inputsData.empty()) CV_Error(Error::StsError, "Requested set input"); if (!impl->netWasAllocated) impl->setUpNet(); std::ostringstream out; std::map& map = impl->layers; int prefBackend = impl->preferableBackend; std::vector > skippedLayers; std::vector skipId; std::vector allLayers(map.size(), -1); int idPrev = -1; Ptr prevNode; for (std::map::reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit) { std::map >::iterator itBackend = rit->second.backendNodes.find(prefBackend); if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() || itBackend->second.empty()) { if (rit->second.skip) skipId.push_back(rit->first); else if (!skipId.empty()) { if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty()) skipId.push_back(rit->first); else if (idPrev != -1) skipId.push_back(idPrev); std::sort(skipId.begin(), skipId.end()); for (int i = 0; i < skipId.size(); i++) { allLayers[skipId[i]] = skippedLayers.size(); } skippedLayers.push_back(skipId); skipId.clear(); } } else { if (itBackend->second == prevNode) skipId.push_back(idPrev); else if (!skipId.empty()) { skipId.push_back(idPrev); std::sort(skipId.begin(), skipId.end()); for (int i = 0; i < skipId.size(); i++) { allLayers[skipId[i]] = skippedLayers.size(); } skippedLayers.push_back(skipId); skipId.clear(); } idPrev = rit->first; prevNode = itBackend->second; } } String colors[] = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462"}; String backend; switch (prefBackend) { case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break; case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break; case DNN_BACKEND_INFERENCE_ENGINE: backend = "DLIE/"; break; case DNN_BACKEND_OPENCV: backend = "OCV/"; break; } out << "digraph G {" << '\n'; // Add nodes for (std::map::iterator it = map.begin(); it != map.end(); ++it) { String name = it->second.params.name; if (allLayers[it->first] == -1 && !name.empty()) { out << " " << "\"" << name << "\"" << " [label=\""; skipId.clear(); skipId.push_back(it->first); } else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0]) continue; else { // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0] int cluster = allLayers[it->first]; out << " " << "\"" << "cluster_" << cluster << "\"" << " [label=\"{"; skipId = skippedLayers[allLayers[it->first]]; // vertices in current cluster } for (int i = 0; i < skipId.size(); i++) { LayerParams& lp = map[skipId[i]].params; if (!lp.name.empty()) { if (i > 0) { out << " | "; } out << lp.name << "\\n" << lp.type << "\\n"; if (lp.has("kernel_size")) { String kernel = parseLayerParams("kernel_size", lp); out << kernel; } else if (lp.has("kernel_h") && lp.has("kernel_w")) { DictValue h = lp.get("kernel_h"); DictValue w = lp.get("kernel_w"); out << "kernel (HxW): " << h << " x " << w << "\\l"; } if (lp.has("stride")) { String stride = parseLayerParams("stride", lp); out << stride; } else if (lp.has("stride_h") && lp.has("stride_w")) { DictValue h = lp.get("stride_h"); DictValue w = lp.get("stride_w"); out << "stride (HxW): " << h << " x " << w << "\\l"; } if (lp.has("dilation")) { String dilation = parseLayerParams("dilation", lp); out << dilation; } else if (lp.has("dilation_h") && lp.has("dilation_w")) { DictValue h = lp.get("dilation_h"); DictValue w = lp.get("dilation_w"); out << "dilation (HxW): " << h << " x " << w << "\\l"; } if (lp.has("pad")) { DictValue pad = lp.get("pad"); out << "pad "; switch (pad.size()) { case 1: out << ": " << pad << "\\l"; break; case 2: out << "(HxW): (" << pad.get(0) << " x " << pad.get(1) << ")" << "\\l"; break; case 4: out << "(HxW): (" << pad.get(0) << ", " << pad.get(2) << ") x (" << pad.get(1) << ", " << pad.get(3) << ")" << "\\l"; break; case 6: out << "(DxHxW): (" << pad.get(0) << ", " << pad.get(3) << ") x (" << pad.get(1) << ", " << pad.get(4) << ") x (" << pad.get(2) << ", " << pad.get(5) << ")" << "\\l"; break; default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size())); } } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) { DictValue l = lp.get("pad_l"); DictValue t = lp.get("pad_t"); DictValue r = lp.get("pad_r"); DictValue b = lp.get("pad_b"); out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")" << "\\l"; } else if (lp.has("pooled_w") || lp.has("pooled_h")) { DictValue h = lp.get("pooled_h"); DictValue w = lp.get("pooled_w"); out << "pad (HxW): " << h << " x " << w << "\\l"; } if (lp.has("pool")) { out << "pool: " << lp.get("pool") << "\\l"; } if (lp.has("global_pooling")) { out << "global_pooling: " << lp.get("global_pooling") << "\\l"; } if (lp.has("group")) { out << "group: " << lp.get("group") << "\\l"; } } } if (!it->second.outputBlobs.empty()) out << "output: " << it->second.outputBlobs[0].size << "\\l"; Ptr layerBackend = it->second.backendNodes[prefBackend]; out << (!layerBackend.empty() ? backend : "OCV/"); int colorId = 0; switch (it->second.layerInstance->preferableTarget) { case DNN_TARGET_CPU: out << "CPU\\n"; colorId = layerBackend.empty() ? 0 : 5; break; case DNN_TARGET_OPENCL: out << "OCL\\n"; colorId = 1; break; case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16\\n"; colorId = 2; break; case DNN_TARGET_MYRIAD: out << "MYRIAD\\n"; colorId = 3; break; case DNN_TARGET_FPGA: out << "FPGA\\n"; colorId = 4; break; } out << ((skipId.size() == 1)? "\" " : " }\" "); out << "fillcolor=\"" << colors[colorId] << "\" "; out << "style=filled "; out << "shape=" << ((skipId.size() == 1)? "box" : "record") << "]" << '\n'; } out << '\n'; // Add edges int inputsSize = impl->netInputLayer->outNames.size(); for (std::map::iterator it = map.begin(); it != map.end(); ++it) { if (allLayers[it->first] == -1) // node { for (int i = 0; i < it->second.consumers.size(); i++) { int outId = it->second.consumers[i].lid; if (it == map.begin() && inputsSize > 1) out << " " << "\"" << it->second.name << "_" << i << "\"" << " -> "; else out << " " << "\"" << it->second.name << "\"" << " -> "; if (allLayers[outId] == -1) // node out << "\"" << map[outId].name << "\"" << '\n'; else // cluster out << "\"" << "cluster_" << allLayers[outId] << "\"" << '\n'; } } else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster { for (int i = 0; i < it->second.consumers.size(); i++) { int outId = it->second.consumers[i].lid; if (allLayers[outId] == -1) { // node out << " " << "\"" << "cluster_" << allLayers[it->first] << "\"" << " -> "; out << "\"" << map[outId].name << "\"" << '\n'; } else if (allLayers[outId] != allLayers[it->first]) { // another cluster out << " " << "\"" << "cluster_" << allLayers[it->first] << "\"" << " -> "; out << "\"" << "cluster_" << allLayers[outId] << "\"" << '\n'; } } } } out << "}"; return out.str(); } void Net::dumpToFile(const String& path) { std::ofstream file(path.c_str()); file << dump(); file.close(); } Ptr Net::getLayer(LayerId layerId) { LayerData &ld = impl->getLayerData(layerId); return ld.getLayerInstance(); } std::vector > Net::getLayerInputs(LayerId layerId) { LayerData &ld = impl->getLayerData(layerId); if (!ld.layerInstance) CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str())); std::vector > inputLayers; inputLayers.reserve(ld.inputLayersId.size()); std::set::iterator it; for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) { inputLayers.push_back(getLayer(*it)); } return inputLayers; } std::vector Net::getLayerNames() const { std::vector res; res.reserve(impl->layers.size()); Impl::MapIdToLayerData::iterator it; for (it = impl->layers.begin(); it != impl->layers.end(); it++) { if (it->second.id) //skip Data layer res.push_back(it->second.name); } return res; } bool Net::empty() const { return impl->layers.size() <= 1; //first layer is default Data layer } std::vector Net::getUnconnectedOutLayers() const { std::vector layersIds; Impl::MapIdToLayerData::iterator it; for (it = impl->layers.begin(); it != impl->layers.end(); it++) { int lid = it->first; LayerData &ld = it->second; if (ld.requiredOutputs.size() == 0) layersIds.push_back(lid); } return layersIds; } std::vector Net::getUnconnectedOutLayersNames() const { std::vector ids = getUnconnectedOutLayers(); const size_t n = ids.size(); std::vector names(n); for (size_t i = 0; i < n; ++i) { names[i] = impl->layers[ids[i]].name; } return names; } void Net::getLayersShapes(const ShapesVec& netInputShapes, std::vector& layersIds, std::vector& inLayersShapes, std::vector& outLayersShapes) const { layersIds.clear(); inLayersShapes.clear(); outLayersShapes.clear(); Impl::LayersShapesMap inOutShapes; impl->getLayersShapes(netInputShapes, inOutShapes); for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin(); it != inOutShapes.end(); it++) { layersIds.push_back(it->first); inLayersShapes.push_back(it->second.in); outLayersShapes.push_back(it->second.out); } } void Net::getLayersShapes(const MatShape& netInputShape, std::vector& layerIds, std::vector& inLayersShapes, std::vector& outLayersShapes) const { getLayersShapes(ShapesVec(1, netInputShape), layerIds, inLayersShapes, outLayersShapes); } void Net::getLayerShapes(const MatShape& netInputShape, const int layerId, ShapesVec& inLayerShapes, ShapesVec& outLayerShapes) const { getLayerShapes(ShapesVec(1, netInputShape), layerId, inLayerShapes, outLayerShapes); } void Net::getLayerShapes(const ShapesVec& netInputShapes, const int layerId, ShapesVec& inLayerShapes, ShapesVec& outLayerShapes) const { LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); inLayerShapes = shapes.in; outLayerShapes = shapes.out; } int64 Net::getFLOPS(const std::vector& netInputShapes) const { CV_TRACE_FUNCTION(); int64 flops = 0; std::vector ids; std::vector > inShapes, outShapes; getLayersShapes(netInputShapes, ids, inShapes, outShapes); CV_Assert(inShapes.size() == outShapes.size()); CV_Assert(inShapes.size() == ids.size()); for(int i = 0; i < ids.size(); i++) { flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i], outShapes[i]); } return flops; } int64 Net::getFLOPS(const MatShape& netInputShape) const { return getFLOPS(std::vector(1, netInputShape)); } int64 Net::getFLOPS(const int layerId, const std::vector& netInputShapes) const { Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out); } int64 Net::getFLOPS(const int layerId, const MatShape& netInputShape) const { return getFLOPS(layerId, std::vector(1, netInputShape)); } void Net::getLayerTypes(std::vector& layersTypes) const { layersTypes.clear(); std::map layers; for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) { if (layers.find(it->second.type) == layers.end()) layers[it->second.type] = 0; layers[it->second.type]++; } for (std::map::iterator it = layers.begin(); it != layers.end(); it++) { layersTypes.push_back(it->first); } } int Net::getLayersCount(const String& layerType) const { int count = 0; for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) { if (it->second.type == layerType) count++; } return count; } void Net::getMemoryConsumption(const int layerId, const std::vector& netInputShapes, size_t& weights, size_t& blobs) const { CV_TRACE_FUNCTION(); Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); weights = blobs = 0; for(int i = 0; i < layer->second.params.blobs.size(); i++) { const Mat& weightsBlob = layer->second.params.blobs[i]; weights += weightsBlob.total()*weightsBlob.elemSize(); } ShapesVec inLayerShapes, outLayerShapes; getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes); for(int i = 0; i < outLayerShapes.size(); i++) { blobs += total(outLayerShapes[i]) * sizeof(float); } } void Net::getMemoryConsumption(const std::vector& netInputShapes, size_t& weights, size_t& blobs) const { CV_TRACE_FUNCTION(); std::vector layerIds; std::vector w, b; getMemoryConsumption(netInputShapes, layerIds, w, b); weights = blobs = 0; for(int i = 0; i < layerIds.size(); i++) { weights += w[i]; blobs += b[i]; } } void Net::getMemoryConsumption(const int layerId, const MatShape& netInputShape, size_t& weights, size_t& blobs) const { getMemoryConsumption(layerId, std::vector(1, netInputShape), weights, blobs); } void Net::getMemoryConsumption(const MatShape& netInputShape, size_t& weights, size_t& blobs) const { getMemoryConsumption(std::vector(1, netInputShape), weights, blobs); } void Net::getMemoryConsumption(const std::vector& netInputShapes, std::vector& layerIds, std::vector& weights, std::vector& blobs) const { CV_TRACE_FUNCTION(); layerIds.clear(); weights.clear(); blobs.clear(); std::vector > inLayerShapes, outLayerShapes; getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes); for(int i = 0; i < layerIds.size(); i++) { int w = 0, b = 0; Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]); CV_Assert(layer != impl->layers.end()); for(int j = 0; j < layer->second.params.blobs.size(); j++) { const Mat& weightsBlob = layer->second.params.blobs[j]; w += weightsBlob.total()*weightsBlob.elemSize(); } for(int j = 0; j < outLayerShapes[i].size(); j++) { b += total(outLayerShapes[i][j]) * sizeof(float); } weights.push_back(w); blobs.push_back(b); } } void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& layerIds, std::vector& weights, std::vector& blobs) const { getMemoryConsumption(std::vector(1, netInputShape), layerIds, weights, blobs); } void Net::enableFusion(bool fusion) { if( impl->fusion != fusion ) { impl->fusion = fusion; impl->netWasAllocated = false; impl->clear(); } } void Net::setHalideScheduler(const String& scheduler) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str()); impl->halideConfigFile = scheduler; } int64 Net::getPerfProfile(std::vector& timings) { timings = std::vector(impl->layersTimings.begin() + 1, impl->layersTimings.end()); int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0); return total; } ////////////////////////////////////////////////////////////////////////// Layer::Layer() { preferableTarget = DNN_TARGET_CPU; } Layer::Layer(const LayerParams ¶ms) : blobs(params.blobs), name(params.name), type(params.type) { preferableTarget = DNN_TARGET_CPU; } void Layer::setParamsFrom(const LayerParams ¶ms) { blobs = params.blobs; name = params.name; type = params.type; } int Layer::inputNameToIndex(String) { return -1; } int Layer::outputNameToIndex(const String&) { return 0; } bool Layer::supportBackend(int backendId) { return backendId == DNN_BACKEND_OPENCV; } Ptr Layer::initHalide(const std::vector > &) { CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + " layers is not defined."); return Ptr(); } Ptr Layer::initInfEngine(const std::vector > &) { CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + " layers is not defined."); return Ptr(); } void Layer::applyHalideScheduler(Ptr& node, const std::vector &inputs, const std::vector &outputs, int targetId) const { #ifdef HAVE_HALIDE CV_TRACE_FUNCTION(); Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"), xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile"); Halide::Func& top = node.dynamicCast()->funcs.back(); int outW, outH, outC, outN; getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN); if (targetId == DNN_TARGET_CPU) { if (outW == 1 && outH == 1) { if (outC + outN == 1) return; if (outC > 8) top.split(c, co, ci, 8) .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) .parallel(tile) .vectorize(ci, 8); else top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile) .parallel(tile); } else { if (outH > 2) { top.reorder(x, c, y) .split(y, yo, yi, 2) .fuse(yo, n, tile) .parallel(tile) .unroll(yi) .vectorize(x, outW >= 16 ? 16 : outW); } } } else if (targetId == DNN_TARGET_OPENCL) { if (outW == 1 && outH == 1) { int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC; top.split(c, co, ci, c_split) .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) .gpu_blocks(tile) .gpu_threads(ci); } else { int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW; int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH; // Supported vectorization widths: 2, 3, 4, 8, 16 int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC); top.split(x, xo, xi, x_split).split(y, yo, yi, y_split) .split(c, co, ci, c_split) .gpu_blocks(xo, yo, co) .gpu_threads(xi, yi) .reorder(xi, yi, ci, xo, yo, co) .vectorize(ci); } } else CV_Error(Error::StsNotImplemented, "Unknown target identifier"); #endif // HAVE_HALIDE } Ptr Layer::tryAttach(const Ptr& node) { return Ptr(); } bool Layer::setActivation(const Ptr&) { return false; } bool Layer::tryFuse(Ptr&) { return false; } void Layer::getScaleShift(Mat& scale, Mat& shift) const { scale = Mat(); shift = Mat(); } void Layer::unsetAttached() { setActivation(Ptr()); } template static void vecToPVec(const std::vector &v, std::vector &pv) { pv.resize(v.size()); for (size_t i = 0; i < v.size(); i++) pv[i] = const_cast(&v[i]); } void Layer::finalize(const std::vector &inputs, std::vector &outputs) { CV_TRACE_FUNCTION(); this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs); } void Layer::finalize(const std::vector &input, std::vector &output) { CV_UNUSED(input);CV_UNUSED(output); } void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) { CV_TRACE_FUNCTION(); std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); std::vector inputsp; vecToPVec(inputs, inputsp); this->finalize(inputsp, outputs); } std::vector Layer::finalize(const std::vector &inputs) { CV_TRACE_FUNCTION(); std::vector outputs; this->finalize(inputs, outputs); return outputs; } void Layer::forward(std::vector &input, std::vector &output, std::vector &internals) { // We kept this method for compatibility. DNN calls it now only to support users' implementations. } void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S) { std::vector inputs; std::vector outputs; std::vector internals; std::vector orig_inputs; std::vector orig_outputs; std::vector orig_internals; inputs_arr.getUMatVector(orig_inputs); outputs_arr.getUMatVector(orig_outputs); internals_arr.getUMatVector(orig_internals); inputs.resize(orig_inputs.size()); for (size_t i = 0; i < orig_inputs.size(); i++) convertFp16(orig_inputs[i], inputs[i]); outputs.resize(orig_outputs.size()); for (size_t i = 0; i < orig_outputs.size(); i++) outputs[i].create(shape(orig_outputs[i]), CV_32F); internals.resize(orig_internals.size()); for (size_t i = 0; i < orig_internals.size(); i++) internals[i].create(shape(orig_internals[i]), CV_32F); forward(inputs, outputs, internals); for (size_t i = 0; i < outputs.size(); i++) convertFp16(outputs[i], orig_outputs[i]); // sync results back outputs_arr.assign(orig_outputs); internals_arr.assign(orig_internals); return; } std::vector inpvec; std::vector outputs; std::vector internals; inputs_arr.getMatVector(inpvec); outputs_arr.getMatVector(outputs); internals_arr.getMatVector(internals); std::vector inputs(inpvec.size()); for (int i = 0; i < inpvec.size(); i++) inputs[i] = &inpvec[i]; this->forward(inputs, outputs, internals); // sync results back outputs_arr.assign(outputs); internals_arr.assign(internals); } void Layer::run(const std::vector &inputs, std::vector &outputs, std::vector &internals) { CV_TRACE_FUNCTION(); this->finalize(inputs, outputs); this->forward(inputs, outputs, internals); } Layer::~Layer() {} bool Layer::getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, std::vector &internals) const { CV_Assert(inputs.size()); outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]); return false; } ////////////////////////////////////////////////////////////////////////// static Mutex& getLayerFactoryMutex() { static Mutex* volatile instance = NULL; if (instance == NULL) { cv::AutoLock lock(getInitializationMutex()); if (instance == NULL) instance = new Mutex(); } return *instance; } typedef std::map > LayerFactory_Impl; static LayerFactory_Impl& getLayerFactoryImpl_() { static LayerFactory_Impl impl; return impl; } static LayerFactory_Impl& getLayerFactoryImpl() { static LayerFactory_Impl* volatile instance = NULL; if (instance == NULL) { cv::AutoLock lock(getLayerFactoryMutex()); if (instance == NULL) { instance = &getLayerFactoryImpl_(); initializeLayerFactory(); } } return *instance; } void LayerFactory::registerLayer(const String &type, Constructor constructor) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_); if (it != getLayerFactoryImpl().end()) { if (it->second.back() == constructor) CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered"); it->second.push_back(constructor); } getLayerFactoryImpl().insert(std::make_pair(type_, std::vector(1, constructor))); } void LayerFactory::unregisterLayer(const String &type) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_); if (it != getLayerFactoryImpl().end()) { if (it->second.size() > 1) it->second.pop_back(); else getLayerFactoryImpl().erase(it); } } Ptr LayerFactory::createLayerInstance(const String &type, LayerParams& params) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_); if (it != getLayerFactoryImpl().end()) { CV_Assert(!it->second.empty()); return it->second.back()(params); } else { return Ptr(); //NULL } } BackendNode::BackendNode(int backendId) : backendId(backendId) {} BackendNode::~BackendNode() {}; BackendWrapper::BackendWrapper(int backendId, int targetId) : backendId(backendId), targetId(targetId) {} BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m) { CV_Error(Error::StsNotImplemented, "Constructor of backend wrapper must be implemented"); } BackendWrapper::BackendWrapper(const Ptr& base, const MatShape& shape) { CV_Error(Error::StsNotImplemented, "Constructor of backend wrapper must be implemented"); } BackendWrapper::~BackendWrapper() {} Net readNet(const String& _model, const String& _config, const String& _framework) { String framework = _framework.toLowerCase(); String model = _model; String config = _config; const std::string modelExt = model.substr(model.rfind('.') + 1); const std::string configExt = config.substr(config.rfind('.') + 1); if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" || modelExt == "prototxt" || configExt == "prototxt") { if (modelExt == "prototxt" || configExt == "caffemodel") std::swap(model, config); return readNetFromCaffe(config, model); } if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" || modelExt == "pbtxt" || configExt == "pbtxt") { if (modelExt == "pbtxt" || configExt == "pb") std::swap(model, config); return readNetFromTensorflow(model, config); } if (framework == "torch" || modelExt == "t7" || modelExt == "net" || configExt == "t7" || configExt == "net") { return readNetFromTorch(model.empty() ? config : model); } if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || modelExt == "cfg" || configExt == "cfg") { if (modelExt == "cfg" || configExt == "weights") std::swap(model, config); return readNetFromDarknet(config, model); } if (framework == "dldt" || modelExt == "bin" || configExt == "bin" || modelExt == "xml" || configExt == "xml") { if (modelExt == "xml" || configExt == "bin") std::swap(model, config); return readNetFromModelOptimizer(config, model); } if (framework == "onnx" || modelExt == "onnx") { return readNetFromONNX(model); } CV_Error(Error::StsError, "Cannot determine an origin framework of files: " + model + (config.empty() ? "" : ", " + config)); } Net readNet(const String& _framework, const std::vector& bufferModel, const std::vector& bufferConfig) { String framework = _framework.toLowerCase(); if (framework == "caffe") return readNetFromCaffe(bufferConfig, bufferModel); else if (framework == "tensorflow") return readNetFromTensorflow(bufferModel, bufferConfig); else if (framework == "darknet") return readNetFromDarknet(bufferConfig, bufferModel); else if (framework == "torch") CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); else if (framework == "dldt") CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers"); CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); } Net readNetFromModelOptimizer(const String &xml, const String &bin) { return Net::readFromModelOptimizer(xml, bin); } CV__DNN_EXPERIMENTAL_NS_END }} // namespace