Merge pull request #25181 from dkurt:release_conv_weights

Release convolution weightsMat after usage #25181

### Pull Request Readiness Checklist

related (but not resolved): https://github.com/opencv/opencv/issues/24134

Minor memory footprint improvement. Also, adds a test for VmHWM.

RAM top memory usage (-230MB)

| YOLOv3 (237MB file) |   4.x   |    PR   |
|---------------------|---------|---------|
| no winograd         | 808 MB  | 581 MB  |
| winograd            | 1985 MB | 1750 MB |

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Dmitry Kurtaev 2024-03-25 09:03:28 +03:00 committed by GitHub
parent aae77b65a5
commit 0b6c9a2123
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 45 additions and 0 deletions

View File

@ -1299,6 +1299,10 @@ public:
fastConvImpl = initFastConv(weightsMat, &biasvec[0], ngroups, K, C, kernel_size, strides,
dilations, pads_begin, pads_end, conv_dim,
preferableTarget == DNN_TARGET_CPU_FP16, canUseWinograd);
// This is legal to release weightsMat here as this is not used anymore for
// OpenCV inference. If network needs to be reinitialized (new shape, new backend)
// a new version of weightsMat is created at .finalize() from original weights
weightsMat.release();
}
runFastConv(inputs[0], outputs[0], fastConvImpl, nstripes, activ, reluslope, fusedAdd);
@ -1405,6 +1409,7 @@ public:
params.set("input_zeropoint", inputZp);
params.set("input_scale", inputScale);
Mat weightsMat = blobs[0].reshape(1, numOutput);
Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S);
Mat biasQuantized(1, numOutput, CV_32S);
Mat outputMultiplier(1, numOutput, CV_32F);

View File

@ -232,6 +232,8 @@ public:
expectNoFallbacks(net);
}
size_t getTopMemoryUsageMB();
protected:
void checkBackend(Mat* inp = 0, Mat* ref = 0)
{

View File

@ -15,6 +15,14 @@
#include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/utils/logger.hpp>
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#include <psapi.h>
#endif // _WIN32
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
@ -502,4 +510,28 @@ void initDNNTests()
);
}
size_t DNNTestLayer::getTopMemoryUsageMB()
{
#ifdef _WIN32
PROCESS_MEMORY_COUNTERS proc;
GetProcessMemoryInfo(GetCurrentProcess(), &proc, sizeof(proc));
return proc.PeakWorkingSetSize / pow(1024, 2); // bytes to megabytes
#else
std::ifstream status("/proc/self/status");
std::string line, title;
while (std::getline(status, line))
{
std::istringstream iss(line);
iss >> title;
if (title == "VmHWM:")
{
size_t mem;
iss >> mem;
return mem / 1024;
}
}
return 0l;
#endif
}
} // namespace

View File

@ -2298,7 +2298,13 @@ TEST_P(Test_ONNX_nets, ResNet50v1)
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
// output range: [-67; 75], after Softmax [0, 0.98]
size_t hwm0 = getTopMemoryUsageMB();
testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
size_t hwm1 = getTopMemoryUsageMB();
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU)
{
EXPECT_LE(hwm1 - hwm0, 350) << "Top allocated memory";
}
}
TEST_P(Test_ONNX_nets, ResNet50_Int8)