Merge pull request #25181 from dkurt:release_conv_weights

Release convolution weightsMat after usage #25181 ### Pull Request Readiness Checklist related (but not resolved): https://github.com/opencv/opencv/issues/24134 Minor memory footprint improvement. Also, adds a test for VmHWM. RAM top memory usage (-230MB) | YOLOv3 (237MB file) | 4.x | PR | |---------------------|---------|---------| | no winograd | 808 MB | 581 MB | | winograd | 1985 MB | 1750 MB | See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-06-07 09:25:45 +08:00 · 2024-03-25 09:03:28 +03:00 · 2024-03-25 09:03:28 +03:00 · 0b6c9a2123
commit 0b6c9a2123
parent aae77b65a5
4 changed files with 45 additions and 0 deletions
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -1299,6 +1299,10 @@ public:
                fastConvImpl = initFastConv(weightsMat, &biasvec[0], ngroups, K, C, kernel_size, strides,
                                            dilations, pads_begin, pads_end, conv_dim,
                                            preferableTarget == DNN_TARGET_CPU_FP16, canUseWinograd);
+                // This is legal to release weightsMat here as this is not used anymore for
+                // OpenCV inference. If network needs to be reinitialized (new shape, new backend)
+                // a new version of weightsMat is created at .finalize() from original weights
+                weightsMat.release();
            }

            runFastConv(inputs[0], outputs[0], fastConvImpl, nstripes, activ, reluslope, fusedAdd);
@ -1405,6 +1409,7 @@ public:
        params.set("input_zeropoint", inputZp);
        params.set("input_scale", inputScale);

+        Mat weightsMat = blobs[0].reshape(1, numOutput);
        Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S);
        Mat biasQuantized(1, numOutput, CV_32S);
        Mat outputMultiplier(1, numOutput, CV_32F);
--- a/modules/dnn/test/test_common.hpp
+++ b/modules/dnn/test/test_common.hpp
@ -232,6 +232,8 @@ public:
            expectNoFallbacks(net);
    }

+    size_t getTopMemoryUsageMB();
+
 protected:
    void checkBackend(Mat* inp = 0, Mat* ref = 0)
    {
--- a/modules/dnn/test/test_common.impl.hpp
+++ b/modules/dnn/test/test_common.impl.hpp
@ -15,6 +15,14 @@
 #include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/logger.hpp>

+#ifdef _WIN32
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#include <psapi.h>
+#endif  // _WIN32
+
 namespace cv { namespace dnn {
 CV__DNN_INLINE_NS_BEGIN

@ -502,4 +510,28 @@ void initDNNTests()
    );
 }

+size_t DNNTestLayer::getTopMemoryUsageMB()
+{
+#ifdef _WIN32
+    PROCESS_MEMORY_COUNTERS proc;
+    GetProcessMemoryInfo(GetCurrentProcess(), &proc, sizeof(proc));
+    return proc.PeakWorkingSetSize / pow(1024, 2);  // bytes to megabytes
+#else
+    std::ifstream status("/proc/self/status");
+    std::string line, title;
+    while (std::getline(status, line))
+    {
+        std::istringstream iss(line);
+        iss >> title;
+        if (title == "VmHWM:")
+        {
+            size_t mem;
+            iss >> mem;
+            return mem / 1024;
+        }
+    }
+    return 0l;
+#endif
+}
+
 } // namespace
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -2298,7 +2298,13 @@ TEST_P(Test_ONNX_nets, ResNet50v1)
    applyTestTag(CV_TEST_TAG_MEMORY_512MB);

    // output range: [-67; 75], after Softmax [0, 0.98]
+    size_t hwm0 = getTopMemoryUsageMB();
    testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
+    size_t hwm1 = getTopMemoryUsageMB();
+    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU)
+    {
+        EXPECT_LE(hwm1 - hwm0, 350) << "Top allocated memory";
+    }
 }

 TEST_P(Test_ONNX_nets, ResNet50_Int8)