Allocate new memory for optimized concat to prevent collisions.

Add a flag to disable memory reusing in dnn module.
2025-06-12 12:22:51 +08:00 · 2017-12-28 16:04:09 +03:00 · 2017-12-28 16:04:09 +03:00 · a9807d8f54
commit a9807d8f54
parent 3542c98d45
2 changed files with 54 additions and 51 deletions
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
    endif()
  endif()
 endif()
 ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
 if (${the_module}_REUSE_MEMORY)
  add_definitions(-DREUSE_DNN_MEMORY=1)
 endif()
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -367,43 +367,42 @@ public:
        }
    }
-    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force)
+    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
    {
 #ifdef REUSE_DNN_MEMORY
        Mat bestBlob;
        LayerPin bestBlobPin;
-        if( !force )
+        std::map<LayerPin, Mat>::iterator hostIt;
        std::map<LayerPin, int>::iterator refIt;
        const int targetTotal = total(shape);
        int bestBlobTotal = INT_MAX;
        for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
        {
-            std::map<LayerPin, Mat>::iterator hostIt;
+            refIt = refCounter.find(hostIt->first);
-            std::map<LayerPin, int>::iterator refIt;
+            // Use only blobs that had references before because if not,
-
+            // it might be used as output.
-            const int targetTotal = total(shape);
+            if (refIt != refCounter.end() && refIt->second == 0)
            int bestBlobTotal = INT_MAX;
            for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
            {
-                refIt = refCounter.find(hostIt->first);
+                Mat& unusedBlob = hostIt->second;
-                // Use only blobs that had references before because if not,
+                if (unusedBlob.total() >= targetTotal &&
-                // it might be used as output.
+                    unusedBlob.total() < bestBlobTotal)
                if (refIt != refCounter.end() && refIt->second == 0)
                {
-                    Mat& unusedBlob = hostIt->second;
+                    bestBlobPin = hostIt->first;
-                    if (unusedBlob.total() >= targetTotal &&
+                    bestBlob = unusedBlob;
-                        unusedBlob.total() < bestBlobTotal)
+                    bestBlobTotal = unusedBlob.total();
                    {
                        bestBlobPin = hostIt->first;
                        bestBlob = unusedBlob;
                        bestBlobTotal = unusedBlob.total();
                    }
                }
            }
        }
        if (!bestBlob.empty())
        {
            reuse(bestBlobPin, lp);
-            dst = Mat(shape, CV_32F, bestBlob.data);
+            dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
        }
        else
 #endif  // REUSE_DNN_MEMORY
        {
            // if dst already has been allocated with total(shape) elements,
            // it won't be recrreated and pointer of dst.data remains the same.
@ -412,34 +411,32 @@ public:
        }
    }
-    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
+    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
    {
 #ifdef REUSE_DNN_MEMORY
        UMat bestBlob;
        LayerPin bestBlobPin;
-        if( !force )
+        std::map<LayerPin, UMat>::iterator hostIt;
        std::map<LayerPin, int>::iterator refIt;
        const int targetTotal = total(shape);
        int bestBlobTotal = INT_MAX;
        for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
        {
-            std::map<LayerPin, UMat>::iterator hostIt;
+            refIt = refCounter.find(hostIt->first);
-            std::map<LayerPin, int>::iterator refIt;
+            // Use only blobs that had references before because if not,
-
+            // it might be used as output.
-            const int targetTotal = total(shape);
+            if (refIt != refCounter.end() && refIt->second == 0)
            int bestBlobTotal = INT_MAX;
            for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
            {
-                refIt = refCounter.find(hostIt->first);
+                UMat& unusedBlob = hostIt->second;
-                // Use only blobs that had references before because if not,
+                if (unusedBlob.total() >= targetTotal &&
-                // it might be used as output.
+                    unusedBlob.total() < bestBlobTotal)
                if (refIt != refCounter.end() && refIt->second == 0)
                {
-                    UMat& unusedBlob = hostIt->second;
+                    bestBlobPin = hostIt->first;
-                    if (unusedBlob.total() >= targetTotal &&
+                    bestBlob = unusedBlob;
-                        unusedBlob.total() < bestBlobTotal)
+                    bestBlobTotal = unusedBlob.total();
                    {
                        bestBlobPin = hostIt->first;
                        bestBlob = unusedBlob;
                        bestBlobTotal = unusedBlob.total();
                    }
                }
            }
        }
@ -449,6 +446,7 @@ public:
            umat_dst.create(shape, CV_32F);
        }
        else
 #endif  // REUSE_DNN_MEMORY
        {
            // if dst already has been allocated with total(shape) elements,
            // it won't be recrreated and pointer of dst.data remains the same.
@ -458,8 +456,7 @@ public:
    }
    void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
-                               std::vector<LayerPin>& pinsForInternalBlobs,
+                               std::vector<LayerPin>& pinsForInternalBlobs)
                               bool maximizeReuse)
    {
        CV_TRACE_FUNCTION();
        bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
@ -530,7 +527,6 @@ public:
        }
        std::map<int, std::vector<int> >::reverse_iterator it;
        bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
        for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
        {
            for(int j = 0; j < it->second.size(); j++)
@ -539,7 +535,7 @@ public:
                if (total(shapes[index]))
                {
                    LayerPin blobPin(ld.id, index);
-                    if (index < outShapes.size() && inPlace && !force)
+                    if (index < outShapes.size() && inPlace)
                    {
                        if (use_umat)
                        {
@ -558,9 +554,9 @@ public:
                    else
                    {
                        if (use_umat)
-                            reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
+                            reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
                        else
-                            reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
+                            reuseOrCreate(shapes[index], blobPin, *blobs[index]);
                    }
                }
            }
@ -1111,8 +1107,7 @@ struct Net::Impl
        CV_Assert(layerShapesIt != layersShapes.end());
        std::vector<LayerPin> pinsForInternalBlobs;
-        bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
+        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
        ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
        for (int i = 0; i < ld.outputBlobs.size(); ++i)
        {
@ -1415,6 +1410,9 @@ struct Net::Impl
                    if( i >= ninputs )
                    {
                        // Allocate new memory to prevent collisions during memory
                        // reusing (see https://github.com/opencv/opencv/pull/10456).
                        output = output.clone();
                        Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
                        int ofs = 0;
                        for( i = 0; i < ninputs; i++ )