diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 77c62475a2..dce7ba11ec 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS) endif() endif() endif() + +ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON) +if (${the_module}_REUSE_MEMORY) + add_definitions(-DREUSE_DNN_MEMORY=1) +endif() diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 10e4b0e349..8889d6020b 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -367,43 +367,42 @@ public: } } - void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force) + void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst) { +#ifdef REUSE_DNN_MEMORY Mat bestBlob; LayerPin bestBlobPin; - if( !force ) + std::map::iterator hostIt; + std::map::iterator refIt; + + const int targetTotal = total(shape); + int bestBlobTotal = INT_MAX; + + for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) { - std::map::iterator hostIt; - std::map::iterator refIt; - - const int targetTotal = total(shape); - int bestBlobTotal = INT_MAX; - - for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) + refIt = refCounter.find(hostIt->first); + // Use only blobs that had references before because if not, + // it might be used as output. + if (refIt != refCounter.end() && refIt->second == 0) { - refIt = refCounter.find(hostIt->first); - // Use only blobs that had references before because if not, - // it might be used as output. - if (refIt != refCounter.end() && refIt->second == 0) + Mat& unusedBlob = hostIt->second; + if (unusedBlob.total() >= targetTotal && + unusedBlob.total() < bestBlobTotal) { - Mat& unusedBlob = hostIt->second; - if (unusedBlob.total() >= targetTotal && - unusedBlob.total() < bestBlobTotal) - { - bestBlobPin = hostIt->first; - bestBlob = unusedBlob; - bestBlobTotal = unusedBlob.total(); - } + bestBlobPin = hostIt->first; + bestBlob = unusedBlob; + bestBlobTotal = unusedBlob.total(); } } } if (!bestBlob.empty()) { reuse(bestBlobPin, lp); - dst = Mat(shape, CV_32F, bestBlob.data); + dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape); } else +#endif // REUSE_DNN_MEMORY { // if dst already has been allocated with total(shape) elements, // it won't be recrreated and pointer of dst.data remains the same. @@ -412,34 +411,32 @@ public: } } - void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force) + void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst) { +#ifdef REUSE_DNN_MEMORY UMat bestBlob; LayerPin bestBlobPin; - if( !force ) + std::map::iterator hostIt; + std::map::iterator refIt; + + const int targetTotal = total(shape); + int bestBlobTotal = INT_MAX; + + for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt) { - std::map::iterator hostIt; - std::map::iterator refIt; - - const int targetTotal = total(shape); - int bestBlobTotal = INT_MAX; - - for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt) + refIt = refCounter.find(hostIt->first); + // Use only blobs that had references before because if not, + // it might be used as output. + if (refIt != refCounter.end() && refIt->second == 0) { - refIt = refCounter.find(hostIt->first); - // Use only blobs that had references before because if not, - // it might be used as output. - if (refIt != refCounter.end() && refIt->second == 0) + UMat& unusedBlob = hostIt->second; + if (unusedBlob.total() >= targetTotal && + unusedBlob.total() < bestBlobTotal) { - UMat& unusedBlob = hostIt->second; - if (unusedBlob.total() >= targetTotal && - unusedBlob.total() < bestBlobTotal) - { - bestBlobPin = hostIt->first; - bestBlob = unusedBlob; - bestBlobTotal = unusedBlob.total(); - } + bestBlobPin = hostIt->first; + bestBlob = unusedBlob; + bestBlobTotal = unusedBlob.total(); } } } @@ -449,6 +446,7 @@ public: umat_dst.create(shape, CV_32F); } else +#endif // REUSE_DNN_MEMORY { // if dst already has been allocated with total(shape) elements, // it won't be recrreated and pointer of dst.data remains the same. @@ -458,8 +456,7 @@ public: } void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, - std::vector& pinsForInternalBlobs, - bool maximizeReuse) + std::vector& pinsForInternalBlobs) { CV_TRACE_FUNCTION(); bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT && @@ -530,7 +527,6 @@ public: } std::map >::reverse_iterator it; - bool force = !maximizeReuse && ld.inputBlobsId.size() > 1; for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) { for(int j = 0; j < it->second.size(); j++) @@ -539,7 +535,7 @@ public: if (total(shapes[index])) { LayerPin blobPin(ld.id, index); - if (index < outShapes.size() && inPlace && !force) + if (index < outShapes.size() && inPlace) { if (use_umat) { @@ -558,9 +554,9 @@ public: else { if (use_umat) - reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force); + reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]); else - reuseOrCreate(shapes[index], blobPin, *blobs[index], force); + reuseOrCreate(shapes[index], blobPin, *blobs[index]); } } } @@ -1111,8 +1107,7 @@ struct Net::Impl CV_Assert(layerShapesIt != layersShapes.end()); std::vector pinsForInternalBlobs; - bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE; - blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse); + blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); for (int i = 0; i < ld.outputBlobs.size(); ++i) { @@ -1415,6 +1410,9 @@ struct Net::Impl if( i >= ninputs ) { + // Allocate new memory to prevent collisions during memory + // reusing (see https://github.com/opencv/opencv/pull/10456). + output = output.clone(); Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() }; int ofs = 0; for( i = 0; i < ninputs; i++ )