/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "op_halide.hpp" #include "halide_scheduler.hpp" #include #include #include #include #include #include #include namespace cv { namespace dnn { CV__DNN_EXPERIMENTAL_NS_BEGIN using std::vector; using std::map; using std::make_pair; using std::set; namespace { typedef std::vector ShapesVec; struct LayerShapes { ShapesVec in, out, internal; // No guarantees that layer which support in-place computations // will be computed in-place (input.data_ptr == output.data_ptr). // If layer said that it could work in-place and layers after it // no longer use input blob, we'll set output = input. bool supportInPlace; LayerShapes() {supportInPlace = false;} }; } template static String toString(const T &v) { std::ostringstream ss; ss << v; return ss.str(); } Mat blobFromImage(const Mat& image, double scalefactor, const Size& size, const Scalar& mean, bool swapRB) { CV_TRACE_FUNCTION(); std::vector images(1, image); return blobFromImages(images, scalefactor, size, mean, swapRB); } Mat blobFromImages(const std::vector& images_, double scalefactor, Size size, const Scalar& mean_, bool swapRB) { CV_TRACE_FUNCTION(); std::vector images = images_; for (int i = 0; i < images.size(); i++) { Size imgSize = images[i].size(); if (size == Size()) size = imgSize; if (size != imgSize) { float resizeFactor = std::max(size.width / (float)imgSize.width, size.height / (float)imgSize.height); resize(images[i], images[i], Size(), resizeFactor, resizeFactor); Rect crop(Point(0.5 * (images[i].cols - size.width), 0.5 * (images[i].rows - size.height)), size); images[i] = images[i](crop); } if(images[i].depth() == CV_8U) images[i].convertTo(images[i], CV_32F); Scalar mean = mean_; if (swapRB) std::swap(mean[0], mean[2]); images[i] -= mean; images[i] *= scalefactor; } size_t i, nimages = images.size(); if(nimages == 0) return Mat(); Mat image0 = images[0]; int nch = image0.channels(); CV_Assert(image0.dims == 2); Mat blob, image; if (nch == 3 || nch == 4) { int sz[] = { (int)nimages, 3, image0.rows, image0.cols }; blob = Mat(4, sz, CV_32F); Mat ch[4]; for( i = 0; i < nimages; i++ ) { image = images[i]; CV_Assert(image.depth() == CV_32F); nch = image.channels(); CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); CV_Assert(image.size() == image0.size()); for( int j = 0; j < 3; j++ ) ch[j] = Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, j)); if(swapRB) std::swap(ch[0], ch[2]); split(image, ch); } } else { CV_Assert(nch == 1); int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; blob = Mat(4, sz, CV_32F); for( i = 0; i < nimages; i++ ) { Mat image = images[i]; CV_Assert(image.depth() == CV_32F); nch = image.channels(); CV_Assert(image.dims == 2 && (nch == 1)); CV_Assert(image.size() == image0.size()); image.copyTo(Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, 0))); } } return blob; } struct LayerPin { int lid; int oid; LayerPin(int layerId = -1, int outputId = -1) : lid(layerId), oid(outputId) {} bool valid() const { return (lid >= 0 && oid >= 0); } bool equal(const LayerPin &r) const { return (lid == r.lid && oid == r.oid); } bool operator<(const LayerPin &r) const { return lid < r.lid || lid == r.lid && oid < r.oid; } bool operator ==(const LayerPin &r) const { return lid == r.lid && oid == r.oid; } }; // Objects of this class manages wrappers. For every CPU memory pointer and shape // one and only wrapper. Now it support wrapping for single backend and target. class BackendWrapManager { public: Ptr wrap(const Mat& m, int backendId, int targetId) { CV_TRACE_FUNCTION(); CV_Assert(backendId != DNN_BACKEND_DEFAULT); std::map >::iterator hostsIt; // Check that the same CPU memory was previously wrapped. hostsIt = hostWrappers.find(m.data); if (hostsIt == hostWrappers.end()) { // If not wrapped before. return (hostWrappers[m.data] = wrapHost(m, backendId, targetId)); } else { // Find if wrapper of this host and shape was created before. std::map, Ptr >::iterator it; std::pair key(m.data, m.size); it = extraWrappers.find(key); if (it == extraWrappers.end()) { MatShape shape(m.dims); for (int i = 0; i < m.dims; ++i) shape[i] = m.size.p[i]; return (extraWrappers[key] = wrapUser(hostsIt->second, shape)); } else return it->second; } } std::vector > wrap(const std::vector& mats, int backendId, int targetId) { const int num = mats.size(); std::vector > dst(num); for (int i = 0; i < num; ++i) { dst[i] = wrap(*mats[i], backendId, targetId); } return dst; } std::vector > wrap(const std::vector& mats, int backendId, int targetId) { const int num = mats.size(); std::vector > dst(num); for (int i = 0; i < num; ++i) { dst[i] = wrap(mats[i], backendId, targetId); } return dst; } void reset() { CV_TRACE_FUNCTION(); hostWrappers.clear(); extraWrappers.clear(); } private: // Backend-specific wrapping function. Ptr wrapHost(const Mat& m, int backendId, int targetId) { if (backendId == DNN_BACKEND_DEFAULT) { return Ptr(); } else if (backendId == DNN_BACKEND_HALIDE) { CV_Assert(haveHalide()); #ifdef HAVE_HALIDE return Ptr(new HalideBackendWrapper(targetId, m)); #endif // HAVE_HALIDE } else { CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } return Ptr(); } // Backend-specific wrapping function. Ptr wrapUser(const Ptr& host, const MatShape& shape) { int backendId = host->backendId; if (backendId == DNN_BACKEND_DEFAULT) { return Ptr(); } else if (backendId == DNN_BACKEND_HALIDE) { CV_Assert(haveHalide()); #ifdef HAVE_HALIDE return Ptr(new HalideBackendWrapper(host, shape)); #endif // HAVE_HALIDE } else { CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } return Ptr(); } // Wrappers that initialized for memory hosts (first wrapping of CPU data). std::map > hostWrappers; // The rest of wrappers. They initialized for non-host cv::Mat. std::map, Ptr > extraWrappers; }; struct LayerData { LayerData() : id(-1), flag(0) {} LayerData(int _id, const String &_name, const String &_type, LayerParams &_params) : id(_id), name(_name), type(_type), params(_params), flag(0) { CV_TRACE_FUNCTION(); //add logging info params.name = name; params.type = type; } int id; String name; String type; LayerParams params; std::vector inputBlobsId; std::set inputLayersId; std::set requiredOutputs; std::vector consumers; Ptr layerInstance; std::vector outputBlobs; std::vector inputBlobs; std::vector internals; // Computation nodes of implemented backends (except DEFAULT). std::map > backendNodes; // Flag for skip layer computation for specific backend. std::map skipFlags; int flag; Ptr getLayerInstance() { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); if (layerInstance) return layerInstance; layerInstance = LayerFactory::createLayerInstance(type, params); if (!layerInstance) { CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\""); } return layerInstance; } }; //fake layer containing network input blobs struct DataLayer : public Layer { void finalize(const std::vector&, std::vector&) {} void forward(std::vector&, std::vector&, std::vector &) {} int outputNameToIndex(String tgtName) { int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin()); return (idx < (int)outNames.size()) ? idx : -1; } void setNames(const std::vector &names) { outNames.assign(names.begin(), names.end()); } private: std::vector outNames; }; struct BlobManager { public: // Increase references counter to layer output. void addReference(const LayerPin& lp) { std::map::iterator it = refCounter.find(lp); if (it == refCounter.end()) refCounter[lp] = 1; else it->second += 1; } void addReferences(const std::vector& pins) { for (int i = 0; i < pins.size(); i++) { addReference(pins[i]); } } // Returns number of references to allocated memory that used in specific // layer blob. int numReferences(const LayerPin& lp) { std::map::iterator mapIt = reuseMap.find(lp); CV_Assert(mapIt != reuseMap.end()); LayerPin memHost = mapIt->second; std::map::iterator refIt = refCounter.find(memHost); CV_Assert(refIt != refCounter.end()); return refIt->second; } // Reuse data allocated in inside the blob. void reuse(const LayerPin& host, const LayerPin& user) { CV_Assert(reuseMap.find(user) == reuseMap.end()); CV_Assert(reuseMap.find(host) != reuseMap.end()); LayerPin memHost = reuseMap[host]; reuseMap[user] = memHost; if (refCounter.find(memHost) != refCounter.end()) { std::map::iterator userRefIt = refCounter.find(user); if (userRefIt != refCounter.end()) { refCounter[memHost] += userRefIt->second; refCounter.erase(userRefIt); } else refCounter[memHost] += 1; } } // Decrease references counter to allocated memory inside specific blob. void releaseReference(const LayerPin& lp) { std::map::iterator mapIt = reuseMap.find(lp); CV_Assert(mapIt != reuseMap.end()); std::map::iterator refIt = refCounter.find(mapIt->second); CV_Assert(refIt != refCounter.end()); CV_Assert(refIt->second > 0); refIt->second -= 1; } void releaseReferences(const std::vector& pins) { for (int i = 0; i < pins.size(); i++) { releaseReference(pins[i]); } } void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force) { Mat bestBlob; LayerPin bestBlobPin; if( !force ) { std::map::iterator hostIt; std::map::iterator refIt; const int targetTotal = total(shape); int bestBlobTotal = INT_MAX; for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) { refIt = refCounter.find(hostIt->first); // Use only blobs that had references before because if not, // it might be used as output. if (refIt != refCounter.end() && refIt->second == 0) { Mat& unusedBlob = hostIt->second; if (unusedBlob.total() >= targetTotal && unusedBlob.total() < bestBlobTotal) { bestBlobPin = hostIt->first; bestBlob = unusedBlob; bestBlobTotal = unusedBlob.total(); } } } } if (!bestBlob.empty()) { reuse(bestBlobPin, lp); dst = Mat(shape, CV_32F, bestBlob.data); } else { // if dst already has been allocated with total(shape) elements, // it won't be recrreated and pointer of dst.data remains the same. dst.create(shape, CV_32F); addHost(lp, dst); } } void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, std::vector& pinsForInternalBlobs, bool maximizeReuse) { CV_TRACE_FUNCTION(); pinsForInternalBlobs.clear(); std::vector& outputBlobs = ld.outputBlobs, &internalBlobs = ld.internals; const ShapesVec& outShapes = layerShapes.out, internalShapes = layerShapes.internal; outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob internalBlobs.resize(internalShapes.size()); CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); // Check that layer could work in-place. bool inPlace = false; if (layerShapes.supportInPlace) { if (ld.inputBlobs.size() == 1) { // Get number of references to the input memory. int numRef = numReferences(ld.inputBlobsId[0]); // If current layer is one and only customer of this blob. inPlace = numRef == 1; } } ShapesVec shapes(outShapes); shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); std::vector blobs; for(int i = 0; i < outputBlobs.size(); i++) { blobs.push_back(&outputBlobs[i]); } for(int i = 0; i < internalBlobs.size(); i++) { blobs.push_back(&internalBlobs[i]); if (total(internalShapes[i])) { pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); } } addReferences(pinsForInternalBlobs); std::map > idxSizes; for(int i = 0; i < shapes.size(); i++) { idxSizes[total(shapes[i])].push_back(i); } std::map >::reverse_iterator it; bool force = !maximizeReuse && ld.inputBlobsId.size() > 1; for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) { for(int j = 0; j < it->second.size(); j++) { int index = it->second[j]; if (total(shapes[index])) { LayerPin blobPin(ld.id, index); if (index < outShapes.size() && inPlace && !force) { CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); reuse(ld.inputBlobsId[0], blobPin); } else { reuseOrCreate(shapes[index], blobPin, *blobs[index], force); } } } } } // Clear internal state. Calls before an every reallocation. void reset() { CV_TRACE_FUNCTION(); refCounter.clear(); reuseMap.clear(); memHosts.clear(); } private: // Register allocated memory. void addHost(const LayerPin& lp, const Mat& mat) { CV_Assert(memHosts.find(lp) == memHosts.end()); reuseMap[lp] = lp; memHosts[lp] = mat; } std::map refCounter; // Maps pin to origin blob (for whom memory was allocated firstly). // For origin blobs key == value. std::map reuseMap; std::map memHosts; }; struct Net::Impl { typedef std::map LayersShapesMap; typedef std::map MapIdToLayerData; Impl() { //allocate fake net input layer netInputLayer = Ptr(new DataLayer()); LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second; inpl.id = 0; inpl.name = "_input"; inpl.type = "__NetInputLayer__"; inpl.layerInstance = netInputLayer; layerNameToId.insert(std::make_pair(inpl.name, inpl.id)); lastLayerId = 1; netWasAllocated = false; fusion = true; preferableBackend = DNN_BACKEND_DEFAULT; preferableTarget = DNN_TARGET_CPU; } Ptr netInputLayer; std::vector netOutputs; std::vector blobsToKeep; MapIdToLayerData layers; std::map layerNameToId; BlobManager blobManager; int preferableBackend; int preferableTarget; String halideConfigFile; // Backend-specific wrapping manager. BackendWrapManager backendWrapper; int lastLayerId; bool netWasAllocated; bool fusion; void compileHalide() { CV_TRACE_FUNCTION(); CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); HalideScheduler scheduler(halideConfigFile); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; Ptr layer = ld.layerInstance; if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skipFlags[DNN_BACKEND_HALIDE]) { CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty()); bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]); if (!scheduled) { // Use automatic scheduling provided by layer. layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE], ld.inputBlobs, ld.outputBlobs, preferableTarget); } dnn::compileHalide(ld.outputBlobs, ld.backendNodes[DNN_BACKEND_HALIDE], preferableTarget); } } } void clear() { CV_TRACE_FUNCTION(); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) { if (it->second.id != 0) { it->second.inputBlobs.clear(); it->second.outputBlobs.clear(); it->second.internals.clear(); } it->second.skipFlags.clear(); //it->second.consumers.clear(); Ptr currLayer = it->second.layerInstance; if( currLayer.empty() ) continue; currLayer->unsetAttached(); Ptr poolingLayer = currLayer.dynamicCast(); if( !poolingLayer.empty() ) { poolingLayer->computeMaxIdx = true; } } it = layers.find(0); CV_Assert(it != layers.end()); it->second.skipFlags[DNN_BACKEND_DEFAULT] = true; } void setUpNet(const std::vector& blobsToKeep_ = std::vector()) { CV_TRACE_FUNCTION(); if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) { clear(); allocateLayers(blobsToKeep_); computeNetOutputLayers(); initBackend(); if (!netWasAllocated ) { // If user didn't call compileHalide() between // setPreferableBackend(DNN_BACKEND_HALIDE) and forward(). if (preferableBackend == DNN_BACKEND_HALIDE) compileHalide(); } netWasAllocated = true; this->blobsToKeep = blobsToKeep_; } } int getLayerId(const String &layerName) { std::map::iterator it = layerNameToId.find(layerName); return (it != layerNameToId.end()) ? it->second : -1; } int getLayerId(int id) { MapIdToLayerData::iterator it = layers.find(id); return (it != layers.end()) ? id : -1; } int getLayerId(DictValue &layerDesc) { if (layerDesc.isInt()) return getLayerId(layerDesc.get()); else if (layerDesc.isString()) return getLayerId(layerDesc.get()); CV_Assert(layerDesc.isInt() || layerDesc.isString()); return -1; } String getLayerName(int id) { MapIdToLayerData::iterator it = layers.find(id); return (it != layers.end()) ? it->second.name : "(unknown layer)"; } LayerData& getLayerData(int id) { MapIdToLayerData::iterator it = layers.find(id); if (it == layers.end()) CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); return it->second; } LayerData& getLayerData(const String &layerName) { int id = getLayerId(layerName); if (id < 0) CV_Error(Error::StsError, "Requsted layer \"" + layerName + "\" not found"); return getLayerData(id); } LayerData& getLayerData(const DictValue &layerDesc) { CV_Assert(layerDesc.isInt() || layerDesc.isString()); if (layerDesc.isInt()) return getLayerData(layerDesc.get()); else /*if (layerDesc.isString())*/ return getLayerData(layerDesc.get()); } static void addLayerInput(LayerData &ld, int inNum, LayerPin from) { if ((int)ld.inputBlobsId.size() <= inNum) { ld.inputBlobsId.resize(inNum + 1); } else { LayerPin storedFrom = ld.inputBlobsId[inNum]; if (storedFrom.valid() && !storedFrom.equal(from)) CV_Error(Error::StsError, "Input #" + toString(inNum) + "of layer \"" + ld.name + "\" already was connected"); } ld.inputBlobsId[inNum] = from; } static void splitPin(const String &pinAlias, String &layerName, String &outName) { size_t delimPos = pinAlias.find('.'); layerName = pinAlias.substr(0, delimPos); outName = (delimPos == String::npos) ? String() : pinAlias.substr(delimPos + 1); } int resolvePinOutputName(LayerData &ld, const String &outName) { if (outName.empty()) return 0; if (std::isdigit(outName[0])) { char *lastChar; long inum = std::strtol(outName.c_str(), &lastChar, 10); if (*lastChar == 0) { CV_Assert(inum == (int)inum); return (int)inum; } } return ld.getLayerInstance()->outputNameToIndex(outName); } LayerPin getPinByAlias(const String &pinAlias) { LayerPin pin; String layerName, outName; splitPin(pinAlias, layerName, outName); pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); if (pin.lid >= 0) pin.oid = resolvePinOutputName(getLayerData(pin.lid), outName); return pin; } std::vector getLayerOutPins(const String &pinAlias) { String layerName, outName; splitPin(pinAlias, layerName, outName); int lid = (layerName.empty()) ? 0 : getLayerId(layerName); std::vector pins; for (int i = 0; i < layers[lid].outputBlobs.size(); i++) { pins.push_back(LayerPin(lid, i)); } return pins; } void connect(int outLayerId, int outNum, int inLayerId, int inNum) { CV_Assert(outLayerId < inLayerId); LayerData &ldOut = getLayerData(outLayerId); LayerData &ldInp = getLayerData(inLayerId); addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); ldOut.requiredOutputs.insert(outNum); ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); } void computeNetOutputLayers() { CV_TRACE_FUNCTION(); netOutputs.clear(); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; LayerData &ld = it->second; if (ld.requiredOutputs.size() == 0) netOutputs.push_back(lid); } #ifndef NDEBUG std::cout << "\nNet Outputs(" << netOutputs.size() << "):\n"; for (size_t i = 0; i < netOutputs.size(); i++) std::cout << layers[netOutputs[i]].name << "\n"; #endif } void initBackend() { CV_TRACE_FUNCTION(); backendWrapper.reset(); if (preferableBackend == DNN_BACKEND_DEFAULT) { CV_Assert(preferableTarget == DNN_TARGET_CPU); return; } // Iterator to current layer. MapIdToLayerData::iterator it = layers.begin(); // Iterator to base layer for fusion. In example, in case of conv+bn+relu // it'll be a conv layer. MapIdToLayerData::iterator baseIt = layers.begin(); for (; it != layers.end(); it++) { LayerData &ldTop = it->second; Ptr layerTop = ldTop.layerInstance; if (!layerTop->supportBackend(preferableBackend)) { // Move base iterator to layer that don't support preferable // backend to prevent fusion over layer of different backend. baseIt = it; continue; } // Try to do layers fusion. LayerData &ldBot = baseIt->second; Ptr layerBot = ldBot.layerInstance; // 1. Check that bottom and top from the same backends. if (it != layers.begin() && layerBot->supportBackend(preferableBackend)) { // 2. Check that current layer works in-place. bool inPlace = ldTop.inputBlobs.size() == 1 && ldBot.outputBlobs.size() == 1 && ldTop.inputBlobs[0]->data == ldBot.outputBlobs[0].data; if (inPlace) { // 3. Try to attach node. CV_Assert(!ldBot.backendNodes[preferableBackend].empty()); Ptr fusedNode = layerTop->tryAttach(ldBot.backendNodes[preferableBackend]); if (!fusedNode.empty()) { ldTop.skipFlags[preferableBackend] = true; ldBot.backendNodes[preferableBackend] = fusedNode; continue; } } } // No layers fusion. ldTop.skipFlags[preferableBackend] = false; std::vector > inputs = backendWrapper.wrap(ldTop.inputBlobs, preferableBackend, preferableTarget); if (preferableBackend == DNN_BACKEND_HALIDE) { ldTop.backendNodes[DNN_BACKEND_HALIDE] = layerTop->initHalide(inputs); baseIt = it; } else { CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } } } void allocateLayer(int lid, const LayersShapesMap& layersShapes) { CV_TRACE_FUNCTION(); LayerData &ld = layers[lid]; //already allocated if (ld.flag) return; size_t ninputs = ld.inputBlobsId.size(); #if 0 printf("layer %s:", ld.name.c_str()); for (size_t i = 0; i < ninputs; i++) { int inp_lid = ld.inputBlobsId[i].lid; LayerData &inp_ld = layers[inp_lid]; int inp_outputs = (int)inp_ld.outputBlobs.size(); std::cout << " " << inp_ld.name << "(" << inp_outputs; for( int j = 0; j < inp_outputs; j++ ) { std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; } std::cout << ")"; } printf("\n"); #endif //determine parent layers for (size_t i = 0; i < ninputs; i++) ld.inputLayersId.insert(ld.inputBlobsId[i].lid); //allocate parents for (set::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) allocateLayer(*i, layersShapes); //bind inputs ld.inputBlobs.resize(ninputs); for (size_t i = 0; i < ninputs; i++) { LayerPin from = ld.inputBlobsId[i]; CV_Assert(from.valid()); CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; } LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); CV_Assert(layerShapesIt != layersShapes.end()); std::vector pinsForInternalBlobs; bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE; blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse); Ptr layerPtr = ld.getLayerInstance(); { layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); #if 0 std::cout << "\toutputs:"; size_t noutputs = ld.outputBlobs.size(); for (size_t j = 0; j < noutputs; j++) { std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; } std::cout << "\n"; #endif } // After allocation of layer, we decrease counters to it's input blobs. blobManager.releaseReferences(ld.inputBlobsId); blobManager.releaseReferences(pinsForInternalBlobs); ld.flag = 1; } #if 0 #define printf_(args) printf args #else #define printf_(args) #endif void fuseLayers(const std::vector& blobsToKeep_) { if( !fusion || preferableBackend == DNN_BACKEND_HALIDE ) return; CV_TRACE_FUNCTION(); // scan through all the layers. If there is convolution layer followed by the activation layer, // we try to embed this activation into the convolution and disable separate execution of the activation std::vector outnames; std::set pinsToKeep(blobsToKeep_.begin(), blobsToKeep_.end()); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; LayerData& ld = layers[lid]; if( ld.skipFlags[DNN_BACKEND_DEFAULT] ) { printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); continue; } printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); if( ld.consumers.size() == 0 ) outnames.push_back(ld.layerInstance->name); // the optimization #1. try to fuse batch norm, scaling and/or activation layers // with the current layer if they follow it. Normally, the are fused with the convolution layer, // but some of them (like activation) may be fused with fully-connected, elemwise (+) and // some other layers. Ptr& currLayer = ld.layerInstance; if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 ) { LayerData* nextData = &layers[ld.consumers[0].lid]; Ptr nextBNormLayer = nextData->layerInstance.dynamicCast(); LayerPin lpNext(ld.consumers[0].lid, 0); if( !nextBNormLayer.empty() && pinsToKeep.count(lpNext) == 0 ) { LayerData* bnormData = nextData; nextData = 0; if( currLayer->setBatchNorm(nextBNormLayer) ) { printf_(("\tfused with %s\n", nextBNormLayer->name.c_str())); bnormData->skipFlags[DNN_BACKEND_DEFAULT] = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; if( bnormData->consumers.size() == 1 ) { nextData = &layers[bnormData->consumers[0].lid]; lpNext = LayerPin(bnormData->consumers[0].lid, 0); } } } Ptr nextScaleLayer; if( nextData ) nextScaleLayer = nextData->layerInstance.dynamicCast(); if( !nextScaleLayer.empty() && pinsToKeep.count(lpNext) == 0 ) { LayerData* scaleData = nextData; nextData = 0; if( currLayer->setScale(nextScaleLayer) ) { printf_(("\tfused with %s\n", nextScaleLayer->name.c_str())); scaleData->skipFlags[DNN_BACKEND_DEFAULT] = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; if( scaleData->consumers.size() == 1 ) { nextData = &layers[scaleData->consumers[0].lid]; lpNext = LayerPin(scaleData->consumers[0].lid, 0); } } } Ptr nextActivLayer; if( nextData ) nextActivLayer = nextData->layerInstance.dynamicCast(); if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 && currLayer->setActivation(nextActivLayer) ) { printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); nextData->skipFlags[DNN_BACKEND_DEFAULT] = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; } } // the optimization #2. if there is no layer that takes max pooling layer's computed // max indices (and only some semantical segmentation networks might need this; // many others only take the maximum values), then we switch the max pooling // layer to the faster operating mode. Ptr poolingLayer = ld.layerInstance.dynamicCast(); if( !poolingLayer.empty() && !ld.consumers.empty() ) { size_t i = 0, nconsumers = ld.consumers.size(); for( ; i < nconsumers; i++ ) if( ld.consumers[i].oid > 0 ) break; // if there is no layer that takes the second output pin of the pooling layer // on input then we don't need to compute the indices if( i >= nconsumers ) { poolingLayer->computeMaxIdx = false; printf_(("\tsimplified pooling layer %s\n", poolingLayer->name.c_str())); } } // the optimization #3. if there is concat layer that concatenates channels // from the inputs together (i.e. axis == 1) then we make the inputs of // the concat layer to write to the concatetion output buffer // (and so we eliminate the concatenation layer, because the channels // are concatenated implicitly). Ptr concatLayer = ld.layerInstance.dynamicCast(); if( !concatLayer.empty() && concatLayer->axis == 1 && ld.outputBlobs.size() == 1 ) { Mat& output = ld.outputBlobs[0]; // TODO: in general, this optimization can always be done, but // many layers currently check that the input/output blobs are // continuous arrays. Unfortunately, this is not true when // the concatenation optimization is applied with batch_size > 1. // so, for now, we only apply this optimization in the most popular // case batch_size == 1. if( output.dims == 4 && output.size[0] == 1 ) { size_t i, ninputs = ld.inputBlobsId.size(); std::vector realinputs(ninputs); for( i = 0; i < ninputs; i++ ) { LayerPin pin = ld.inputBlobsId[i]; LayerData* inp_i_data = &layers[pin.lid]; while(inp_i_data->skipFlags[DNN_BACKEND_DEFAULT] && inp_i_data->inputBlobsId.size() == 1) { pin = inp_i_data->inputBlobsId[0]; inp_i_data = &layers[pin.lid]; } printf_(("\treal input for %s is %s\n", layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(), inp_i_data->getLayerInstance()->name.c_str())); if(inp_i_data->skipFlags[DNN_BACKEND_DEFAULT]) break; realinputs[i] = pin; } if( i >= ninputs ) { Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() }; int ofs = 0; for( i = 0; i < ninputs; i++ ) { LayerPin pin = realinputs[i]; LayerData* inp_i_data = &layers[pin.lid]; int channels_i = ld.inputBlobs[i]->size[1]; chrange[1] = Range(ofs, ofs + channels_i); printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(), pin.oid, ofs, ofs + channels_i)); ofs += channels_i; Mat output_slice = output(chrange); Mat& curr_output = inp_i_data->outputBlobs[pin.oid]; CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size); curr_output = output_slice; } ld.skipFlags[DNN_BACKEND_DEFAULT] = true; printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str())); } } } } } void allocateLayers(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; CV_Assert(!layers[0].outputBlobs.empty()); ShapesVec inputShapes; for(int i = 0; i < layers[0].outputBlobs.size(); i++) { CV_Assert(layers[0].outputBlobs[i].total()); inputShapes.push_back(shape(layers[0].outputBlobs[i])); } LayersShapesMap layersShapes; getLayersShapes(inputShapes, layersShapes); blobManager.reset(); for (it = layers.begin(); it != layers.end(); ++it) { const LayerData& ld = it->second; blobManager.addReferences(ld.inputBlobsId); } for (int i = 0; i < blobsToKeep_.size(); i++) { blobManager.addReference(blobsToKeep_[i]); } for (it = layers.begin(); it != layers.end(); it++) { int lid = it->first; allocateLayer(lid, layersShapes); } fuseLayers(blobsToKeep_); } void forwardLayer(LayerData &ld) { CV_TRACE_FUNCTION(); Ptr layer = ld.layerInstance; if (preferableBackend == DNN_BACKEND_DEFAULT || !layer->supportBackend(preferableBackend)) { if( !ld.skipFlags[DNN_BACKEND_DEFAULT] ) layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); } else if (!ld.skipFlags[preferableBackend]) { std::vector > outputs = backendWrapper.wrap(ld.outputBlobs, preferableBackend, preferableTarget); Ptr node = ld.backendNodes[preferableBackend]; if (preferableBackend == DNN_BACKEND_HALIDE) { forwardHalide(outputs, node); } else { CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } } ld.flag = 1; } void forwardToLayer(LayerData &ld, bool clearFlags = true) { CV_TRACE_FUNCTION(); if (clearFlags) { MapIdToLayerData::iterator it; for (it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; } //already was forwarded if (ld.flag) return; //forward parents MapIdToLayerData::iterator it; for (it = layers.begin(); it->second.id < ld.id; it++) { LayerData &ld = it->second; if (ld.flag) continue; forwardLayer(ld); } //forward itself forwardLayer(ld); } void forwardAll() { CV_TRACE_FUNCTION(); forwardToLayer(layers.rbegin()->second, true); } void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) { std::vector& inputLayerIds = layers[id].inputBlobsId; if (inOutShapes[id].in.empty()) { for(int i = 0; i < inputLayerIds.size(); i++) { int layerId = inputLayerIds[i].lid; LayersShapesMap::iterator it = inOutShapes.find(layerId); if(it == inOutShapes.end() || it->second.out.empty()) { getLayerShapesRecursively(layerId, inOutShapes); } const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; inOutShapes[id].in.push_back(shape); } } const ShapesVec& is = inOutShapes[id].in; ShapesVec& os = inOutShapes[id].out; ShapesVec& ints = inOutShapes[id].internal; int requiredOutputs = layers[id].requiredOutputs.size(); inOutShapes[id].supportInPlace = layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints); } void getLayersShapes(const ShapesVec& netInputShapes, LayersShapesMap& inOutShapes) { inOutShapes.clear(); inOutShapes[0].in = netInputShapes; //insert shape for first input layer for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) { getLayerShapesRecursively(it->first, inOutShapes); } } void getLayerShapes(const ShapesVec& netInputShapes, const int layerId, LayerShapes& shapes) { LayersShapesMap inOutShapes; inOutShapes[0].in = netInputShapes; //insert shape for first input layer getLayerShapesRecursively(layerId, inOutShapes); shapes = inOutShapes[layerId]; } LayerPin getLatestLayerPin(const std::vector& pins) { return *std::max_element(pins.begin(), pins.end()); } Mat getBlob(const LayerPin& pin) { CV_TRACE_FUNCTION(); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob not found"); LayerData &ld = layers[pin.lid]; if ((size_t)pin.oid >= ld.outputBlobs.size()) { CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + " outputs, the #" + toString(pin.oid) + " was requsted"); } if (preferableBackend != DNN_BACKEND_DEFAULT) { // Transfer data to CPU if it's require. backendWrapper.wrap(ld.outputBlobs[pin.oid], preferableBackend, preferableTarget)->copyToHost(); } else { CV_Assert(preferableTarget == DNN_TARGET_CPU); } return ld.outputBlobs[pin.oid]; } Mat getBlob(String outputName) { return getBlob(getPinByAlias(outputName)); } }; Net::Net() : impl(new Net::Impl) { } Net::~Net() { } int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) { CV_TRACE_FUNCTION(); if (name.find('.') != String::npos) { CV_Error(Error::StsBadArg, "Added layer name \"" + name + "\" must not contain dot symbol"); return -1; } if (impl->getLayerId(name) >= 0) { CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); return -1; } int id = ++impl->lastLayerId; impl->layerNameToId.insert(std::make_pair(name, id)); impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params))); return id; } int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms) { CV_TRACE_FUNCTION(); int prvLid = impl->lastLayerId; int newLid = this->addLayer(name, type, params); this->connect(prvLid, 0, newLid, 0); return newLid; } void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum) { CV_TRACE_FUNCTION(); impl->connect(outLayerId, outNum, inpLayerId, inpNum); } void Net::connect(String _outPin, String _inPin) { CV_TRACE_FUNCTION(); LayerPin outPin = impl->getPinByAlias(_outPin); LayerPin inpPin = impl->getPinByAlias(_inPin); CV_Assert(outPin.valid() && inpPin.valid()); impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid); } Mat Net::forward(const String& outputName) { CV_TRACE_FUNCTION(); String layerName = outputName; if (layerName.empty()) layerName = getLayerNames().back(); impl->setUpNet(); impl->forwardToLayer(impl->getLayerData(layerName)); return impl->getBlob(layerName); } void Net::forward(std::vector& outputBlobs, const String& outputName) { CV_TRACE_FUNCTION(); impl->setUpNet(); String layerName = outputName; if (layerName.empty()) layerName = getLayerNames().back(); impl->forwardToLayer(impl->getLayerData(layerName)); LayerPin pin = impl->getPinByAlias(layerName); LayerData &ld = impl->layers[pin.lid]; outputBlobs = ld.outputBlobs; } void Net::forward(std::vector& outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { pins.push_back(impl->getPinByAlias(outBlobNames[i])); } impl->setUpNet(pins); LayerPin out = impl->getLatestLayerPin(pins); impl->forwardToLayer(impl->getLayerData(out.lid)); outputBlobs.clear(); for (int i = 0; i < pins.size(); i++) { outputBlobs.push_back(impl->getBlob(pins[i])); } } void Net::forward(std::vector >& outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) { std::vector lp = impl->getLayerOutPins(outBlobNames[i]); pins.insert(pins.end(), lp.begin(), lp.end()); } impl->setUpNet(pins); LayerPin out = impl->getLatestLayerPin(pins); impl->forwardToLayer(impl->getLayerData(out.lid)); outputBlobs.resize(outBlobNames.size()); for (int i = 0; i < outBlobNames.size(); i++) { std::vector lp = impl->getLayerOutPins(outBlobNames[i]); for (int i = 0; i < lp.size(); i++) { outputBlobs[i].push_back(impl->getBlob(lp[i])); } } } void Net::setPreferableBackend(int backendId) { CV_TRACE_FUNCTION(); CV_TRACE_ARG(backendId); if( impl->preferableBackend != backendId ) { impl->preferableBackend = backendId; impl->netWasAllocated = false; impl->clear(); } } void Net::setPreferableTarget(int targetId) { CV_TRACE_FUNCTION(); CV_TRACE_ARG(targetId); if( impl->preferableTarget != targetId ) { impl->preferableTarget = targetId; impl->netWasAllocated = false; impl->clear(); } } void Net::setInputsNames(const std::vector &inputBlobNames) { CV_TRACE_FUNCTION(); impl->netInputLayer->setNames(inputBlobNames); } void Net::setInput(const Mat &blob_, const String& name) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); LayerPin pin; pin.lid = 0; pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found"); LayerData &ld = impl->layers[pin.lid]; ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); MatShape prevShape = shape(ld.outputBlobs[pin.oid]); bool oldShape = prevShape == shape(blob_); if (oldShape) blob_.copyTo(ld.outputBlobs[pin.oid]); else ld.outputBlobs[pin.oid] = blob_.clone(); impl->netWasAllocated = impl->netWasAllocated && oldShape; } Mat Net::getParam(LayerId layer, int numParam) { LayerData &ld = impl->getLayerData(layer); std::vector &layerBlobs = ld.layerInstance->blobs; CV_Assert(numParam < (int)layerBlobs.size()); return layerBlobs[numParam]; } void Net::setParam(LayerId layer, int numParam, const Mat &blob) { LayerData &ld = impl->getLayerData(layer); std::vector &layerBlobs = ld.layerInstance->blobs; CV_Assert(numParam < (int)layerBlobs.size()); //we don't make strong checks, use this function carefully layerBlobs[numParam] = blob; } int Net::getLayerId(const String &layer) { return impl->getLayerId(layer); } void Net::deleteLayer(LayerId) { CV_Error(Error::StsNotImplemented, ""); } Ptr Net::getLayer(LayerId layerId) { LayerData &ld = impl->getLayerData(layerId); return ld.getLayerInstance(); } std::vector > Net::getLayerInputs(LayerId layerId) { LayerData &ld = impl->getLayerData(layerId); if (!ld.layerInstance) CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str())); std::vector > inputLayers; inputLayers.reserve(ld.inputLayersId.size()); std::set::iterator it; for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) { inputLayers.push_back(getLayer(*it)); } return inputLayers; } std::vector Net::getLayerNames() const { std::vector res; res.reserve(impl->layers.size()); Impl::MapIdToLayerData::iterator it; for (it = impl->layers.begin(); it != impl->layers.end(); it++) { if (it->second.id) //skip Data layer res.push_back(it->second.name); } return res; } bool Net::empty() const { return impl->layers.size() <= 1; //first layer is default Data layer } std::vector Net::getUnconnectedOutLayers() const { std::vector layersIds; Impl::MapIdToLayerData::iterator it; for (it = impl->layers.begin(); it != impl->layers.end(); it++) { int lid = it->first; LayerData &ld = it->second; if (ld.requiredOutputs.size() == 0) layersIds.push_back(lid); } return layersIds; } void Net::getLayersShapes(const ShapesVec& netInputShapes, std::vector* layersIds, std::vector* inLayersShapes, std::vector* outLayersShapes) const { if ((layersIds || inLayersShapes || outLayersShapes) == false) return; if (layersIds) layersIds->clear(); if (inLayersShapes) inLayersShapes->clear(); if (outLayersShapes) outLayersShapes->clear(); Impl::LayersShapesMap inOutShapes; impl->getLayersShapes(netInputShapes, inOutShapes); for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin(); it != inOutShapes.end(); it++) { if (layersIds) layersIds->push_back(it->first); if (inLayersShapes) inLayersShapes->push_back(it->second.in); if (outLayersShapes) outLayersShapes->push_back(it->second.out); } } void Net::getLayersShapes(const MatShape& netInputShape, std::vector* layerIds, std::vector* inLayersShapes, std::vector* outLayersShapes) const { getLayersShapes(ShapesVec(1, netInputShape), layerIds, inLayersShapes, outLayersShapes); } void Net::getLayerShapes(const MatShape& netInputShape, const int layerId, ShapesVec* inLayerShapes, ShapesVec* outLayerShapes) const { getLayerShapes(ShapesVec(1, netInputShape), layerId, inLayerShapes, outLayerShapes); } void Net::getLayerShapes(const ShapesVec& netInputShapes, const int layerId, ShapesVec* inLayerShapes, ShapesVec* outLayerShapes) const { LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); if (inLayerShapes) *inLayerShapes = shapes.in; if (outLayerShapes) *outLayerShapes = shapes.out; } int64 Net::getFLOPS(const std::vector& netInputShapes) const { CV_TRACE_FUNCTION(); int64 flops = 0; std::vector ids; std::vector > inShapes, outShapes; getLayersShapes(netInputShapes, &ids, &inShapes, &outShapes); CV_Assert(inShapes.size() == outShapes.size()); CV_Assert(inShapes.size() == ids.size()); for(int i = 0; i < ids.size(); i++) { flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i], outShapes[i]); } return flops; } int64 Net::getFLOPS(const MatShape& netInputShape) const { return getFLOPS(std::vector(1, netInputShape)); } int64 Net::getFLOPS(const int layerId, const std::vector& netInputShapes) const { Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out); } int64 Net::getFLOPS(const int layerId, const MatShape& netInputShape) const { return getFLOPS(layerId, std::vector(1, netInputShape)); } void Net::getLayerTypes(std::vector& layersTypes) const { layersTypes.clear(); std::map layers; for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) { if (layers.find(it->second.type) == layers.end()) layers[it->second.type] = 0; layers[it->second.type]++; } for (std::map::iterator it = layers.begin(); it != layers.end(); it++) { layersTypes.push_back(it->first); } } int Net::getLayersCount(const String& layerType) const { int count = 0; for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) { if (it->second.type == layerType) count++; } return count; } void Net::getMemoryConsumption(const int layerId, const std::vector& netInputShapes, size_t& weights, size_t& blobs) const { CV_TRACE_FUNCTION(); Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); weights = blobs = 0; for(int i = 0; i < layer->second.params.blobs.size(); i++) { const Mat& weightsBlob = layer->second.params.blobs[i]; weights += weightsBlob.total()*weightsBlob.elemSize(); } std::vector outLayerShapes; getLayerShapes(netInputShapes, layerId, 0, &outLayerShapes); for(int i = 0; i < outLayerShapes.size(); i++) { blobs += total(outLayerShapes[i]) * sizeof(float); } } void Net::getMemoryConsumption(const std::vector& netInputShapes, size_t& weights, size_t& blobs) const { CV_TRACE_FUNCTION(); std::vector layerIds; std::vector w, b; getMemoryConsumption(netInputShapes, layerIds, w, b); weights = blobs = 0; for(int i = 0; i < layerIds.size(); i++) { weights += w[i]; blobs += b[i]; } } void Net::getMemoryConsumption(const int layerId, const MatShape& netInputShape, size_t& weights, size_t& blobs) const { getMemoryConsumption(layerId, std::vector(1, netInputShape), weights, blobs); } void Net::getMemoryConsumption(const MatShape& netInputShape, size_t& weights, size_t& blobs) const { getMemoryConsumption(std::vector(1, netInputShape), weights, blobs); } void Net::getMemoryConsumption(const std::vector& netInputShapes, std::vector& layerIds, std::vector& weights, std::vector& blobs) const { CV_TRACE_FUNCTION(); layerIds.clear(); weights.clear(); blobs.clear(); std::vector > outLayerShapes; getLayersShapes(netInputShapes, &layerIds, 0, &outLayerShapes); for(int i = 0; i < layerIds.size(); i++) { int w = 0, b = 0; Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]); CV_Assert(layer != impl->layers.end()); for(int j = 0; j < layer->second.params.blobs.size(); j++) { const Mat& weightsBlob = layer->second.params.blobs[j]; w += weightsBlob.total()*weightsBlob.elemSize(); } for(int j = 0; j < outLayerShapes[i].size(); j++) { b += total(outLayerShapes[i][j]) * sizeof(float); } weights.push_back(w); blobs.push_back(b); } } void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& layerIds, std::vector& weights, std::vector& blobs) const { getMemoryConsumption(std::vector(1, netInputShape), layerIds, weights, blobs); } void Net::enableFusion(bool fusion) { if( impl->fusion != fusion ) { impl->fusion = fusion; impl->netWasAllocated = false; impl->clear(); } } void Net::setHalideScheduler(const String& scheduler) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str()); impl->halideConfigFile = scheduler; } ////////////////////////////////////////////////////////////////////////// Importer::~Importer() {} Layer::Layer() {} Layer::Layer(const LayerParams ¶ms) : blobs(params.blobs), name(params.name), type(params.type) { } void Layer::setParamsFrom(const LayerParams ¶ms) { blobs = params.blobs; name = params.name; type = params.type; } int Layer::inputNameToIndex(String) { return -1; } int Layer::outputNameToIndex(String) { return -1; } bool Layer::supportBackend(int backendId) { return backendId == DNN_BACKEND_DEFAULT; } Ptr Layer::initHalide(const std::vector > &) { CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + " layers is not defined."); return Ptr(); } void Layer::applyHalideScheduler(Ptr& node, const std::vector &inputs, const std::vector &outputs, int targetId) const { #ifdef HAVE_HALIDE CV_TRACE_FUNCTION(); Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"), xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile"); Halide::Func& top = node.dynamicCast()->funcs.back(); int outW, outH, outC, outN; getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN); if (targetId == DNN_TARGET_CPU) { if (outW == 1 && outH == 1) { if (outC + outN == 1) return; if (outC > 8) top.split(c, co, ci, 8) .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) .parallel(tile) .vectorize(ci, 8); else top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile) .parallel(tile); } else { if (outH > 2) { top.reorder(x, c, y) .split(y, yo, yi, 2) .fuse(yo, n, tile) .parallel(tile) .unroll(yi) .vectorize(x, outW >= 16 ? 16 : outW); } } } else if (targetId == DNN_TARGET_OPENCL) { int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC; if (outW == 1 && outH == 1) { top.split(c, co, ci, c_split) .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) .gpu_blocks(tile) .gpu_threads(ci); } else { int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW; int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH; top.split(x, xo, xi, x_split).split(y, yo, yi, y_split) .split(c, co, ci, c_split) .gpu_blocks(xo, yo, co) .gpu_threads(xi, yi) .reorder(xi, yi, ci, xo, yo, co) .vectorize(ci); } } else CV_Error(Error::StsNotImplemented, "Unknown target identifier"); #endif // HAVE_HALIDE } Ptr Layer::tryAttach(const Ptr& node) { return Ptr(); } bool Layer::setActivation(const Ptr&) { return false; } bool Layer::setBatchNorm(const Ptr&) { return false; } bool Layer::setScale(const Ptr&) { return false; } void Layer::unsetAttached() { setActivation(Ptr()); setBatchNorm(Ptr()); setScale(Ptr()); } template static void vecToPVec(const std::vector &v, std::vector &pv) { pv.resize(v.size()); for (size_t i = 0; i < v.size(); i++) pv[i] = const_cast(&v[i]); } void Layer::finalize(const std::vector &inputs, std::vector &outputs) { CV_TRACE_FUNCTION(); std::vector inputsp; vecToPVec(inputs, inputsp); this->finalize(inputsp, outputs); } void Layer::finalize(const std::vector &input, std::vector &output) { (void)input;(void)output; } std::vector Layer::finalize(const std::vector &inputs) { CV_TRACE_FUNCTION(); std::vector outputs; this->finalize(inputs, outputs); return outputs; } void Layer::forward(const std::vector &inputs, std::vector &outputs, std::vector &internals) { CV_TRACE_FUNCTION(); std::vector inputsp; vecToPVec(inputs, inputsp); this->forward(inputsp, outputs, internals); } void Layer::run(const std::vector &inputs, std::vector &outputs, std::vector &internals) { CV_TRACE_FUNCTION(); std::vector inputsp; vecToPVec(inputs, inputsp); this->finalize(inputsp, outputs); this->forward(inputsp, outputs, internals); } Layer::~Layer() {} bool Layer::getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, std::vector &internals) const { CV_Assert(inputs.size()); outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]); return false; } ////////////////////////////////////////////////////////////////////////// static Mutex& getLayerFactoryMutex() { static Mutex* volatile instance = NULL; if (instance == NULL) { cv::AutoLock lock(getInitializationMutex()); if (instance == NULL) instance = new Mutex(); } return *instance; } typedef std::map LayerFactory_Impl; static LayerFactory_Impl& getLayerFactoryImpl_() { static LayerFactory_Impl impl; return impl; } static LayerFactory_Impl& getLayerFactoryImpl() { static LayerFactory_Impl* volatile instance = NULL; if (instance == NULL) { cv::AutoLock lock(getLayerFactoryMutex()); if (instance == NULL) { instance = &getLayerFactoryImpl_(); initializeLayerFactory(); } } return *instance; } void LayerFactory::registerLayer(const String &type, Constuctor constructor) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_); if (it != getLayerFactoryImpl().end() && it->second != constructor) { CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered"); } getLayerFactoryImpl().insert(std::make_pair(type_, constructor)); } void LayerFactory::unregisterLayer(const String &type) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); getLayerFactoryImpl().erase(type_); } Ptr LayerFactory::createLayerInstance(const String &type, LayerParams& params) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(type, "type", type.c_str()); cv::AutoLock lock(getLayerFactoryMutex()); String type_ = type.toLowerCase(); LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_); if (it != getLayerFactoryImpl().end()) { return it->second(params); } else { return Ptr(); //NULL } } BackendNode::BackendNode(int backendId) : backendId(backendId) {} BackendNode::~BackendNode() {}; BackendWrapper::BackendWrapper(int backendId, int targetId) : backendId(backendId), targetId(targetId) {} BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m) { CV_Error(Error::StsNotImplemented, "Constructor of backend wrapper must be implemented"); } BackendWrapper::BackendWrapper(const Ptr& base, const MatShape& shape) { CV_Error(Error::StsNotImplemented, "Constructor of backend wrapper must be implemented"); } BackendWrapper::~BackendWrapper() {} CV__DNN_EXPERIMENTAL_NS_END }} // namespace