Merge pull request #25181 from dkurt:release_conv_weights

Release convolution weightsMat after usage #25181 ### Pull Request Readiness Checklist related (but not resolved): https://github.com/opencv/opencv/issues/24134 Minor memory footprint improvement. Also, adds a test for VmHWM. RAM top memory usage (-230MB) | YOLOv3 (237MB file) | 4.x | PR | |---------------------|---------|---------| | no winograd | 808 MB | 581 MB | | winograd | 1985 MB | 1750 MB | See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-06-07 17:44:04 +08:00 · 2024-03-25 09:03:28 +03:00 · 2024-03-25 09:03:28 +03:00 · 0b6c9a2123
commit 0b6c9a2123
parent aae77b65a5
4 changed files with 45 additions and 0 deletions
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -1299,6 +1299,10 @@ public:
                fastConvImpl = initFastConv(weightsMat, &biasvec[0], ngroups, K, C, kernel_size, strides,
                                            dilations, pads_begin, pads_end, conv_dim,
                                            preferableTarget == DNN_TARGET_CPU_FP16, canUseWinograd);
                // This is legal to release weightsMat here as this is not used anymore for
                // OpenCV inference. If network needs to be reinitialized (new shape, new backend)
                // a new version of weightsMat is created at .finalize() from original weights
                weightsMat.release();
            }
            runFastConv(inputs[0], outputs[0], fastConvImpl, nstripes, activ, reluslope, fusedAdd);
@ -1405,6 +1409,7 @@ public:
        params.set("input_zeropoint", inputZp);
        params.set("input_scale", inputScale);
        Mat weightsMat = blobs[0].reshape(1, numOutput);
        Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S);
        Mat biasQuantized(1, numOutput, CV_32S);
        Mat outputMultiplier(1, numOutput, CV_32F);
--- a/modules/dnn/test/test_common.hpp
+++ b/modules/dnn/test/test_common.hpp
@ -232,6 +232,8 @@ public:
            expectNoFallbacks(net);
    }
    size_t getTopMemoryUsageMB();
 protected:
    void checkBackend(Mat* inp = 0, Mat* ref = 0)
    {
--- a/modules/dnn/test/test_common.impl.hpp
+++ b/modules/dnn/test/test_common.impl.hpp
@ -15,6 +15,14 @@
 #include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/logger.hpp>
 #ifdef _WIN32
 #ifndef NOMINMAX
 #define NOMINMAX
 #endif
 #include <windows.h>
 #include <psapi.h>
 #endif  // _WIN32
 namespace cv { namespace dnn {
 CV__DNN_INLINE_NS_BEGIN
@ -502,4 +510,28 @@ void initDNNTests()
    );
 }
 size_t DNNTestLayer::getTopMemoryUsageMB()
 {
 #ifdef _WIN32
    PROCESS_MEMORY_COUNTERS proc;
    GetProcessMemoryInfo(GetCurrentProcess(), &proc, sizeof(proc));
    return proc.PeakWorkingSetSize / pow(1024, 2);  // bytes to megabytes
 #else
    std::ifstream status("/proc/self/status");
    std::string line, title;
    while (std::getline(status, line))
    {
        std::istringstream iss(line);
        iss >> title;
        if (title == "VmHWM:")
        {
            size_t mem;
            iss >> mem;
            return mem / 1024;
        }
    }
    return 0l;
 #endif
 }
 } // namespace
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -2298,7 +2298,13 @@ TEST_P(Test_ONNX_nets, ResNet50v1)
    applyTestTag(CV_TEST_TAG_MEMORY_512MB);
    // output range: [-67; 75], after Softmax [0, 0.98]
    size_t hwm0 = getTopMemoryUsageMB();
    testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
    size_t hwm1 = getTopMemoryUsageMB();
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU)
    {
        EXPECT_LE(hwm1 - hwm0, 350) << "Top allocated memory";
    }
 }
 TEST_P(Test_ONNX_nets, ResNet50_Int8)