mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 05:06:29 +08:00
Merge pull request #25181 from dkurt:release_conv_weights
Release convolution weightsMat after usage #25181 ### Pull Request Readiness Checklist related (but not resolved): https://github.com/opencv/opencv/issues/24134 Minor memory footprint improvement. Also, adds a test for VmHWM. RAM top memory usage (-230MB) | YOLOv3 (237MB file) | 4.x | PR | |---------------------|---------|---------| | no winograd | 808 MB | 581 MB | | winograd | 1985 MB | 1750 MB | See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
aae77b65a5
commit
0b6c9a2123
@ -1299,6 +1299,10 @@ public:
|
||||
fastConvImpl = initFastConv(weightsMat, &biasvec[0], ngroups, K, C, kernel_size, strides,
|
||||
dilations, pads_begin, pads_end, conv_dim,
|
||||
preferableTarget == DNN_TARGET_CPU_FP16, canUseWinograd);
|
||||
// This is legal to release weightsMat here as this is not used anymore for
|
||||
// OpenCV inference. If network needs to be reinitialized (new shape, new backend)
|
||||
// a new version of weightsMat is created at .finalize() from original weights
|
||||
weightsMat.release();
|
||||
}
|
||||
|
||||
runFastConv(inputs[0], outputs[0], fastConvImpl, nstripes, activ, reluslope, fusedAdd);
|
||||
@ -1405,6 +1409,7 @@ public:
|
||||
params.set("input_zeropoint", inputZp);
|
||||
params.set("input_scale", inputScale);
|
||||
|
||||
Mat weightsMat = blobs[0].reshape(1, numOutput);
|
||||
Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S);
|
||||
Mat biasQuantized(1, numOutput, CV_32S);
|
||||
Mat outputMultiplier(1, numOutput, CV_32F);
|
||||
|
@ -232,6 +232,8 @@ public:
|
||||
expectNoFallbacks(net);
|
||||
}
|
||||
|
||||
size_t getTopMemoryUsageMB();
|
||||
|
||||
protected:
|
||||
void checkBackend(Mat* inp = 0, Mat* ref = 0)
|
||||
{
|
||||
|
@ -15,6 +15,14 @@
|
||||
#include <opencv2/core/utils/configuration.private.hpp>
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <psapi.h>
|
||||
#endif // _WIN32
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
@ -502,4 +510,28 @@ void initDNNTests()
|
||||
);
|
||||
}
|
||||
|
||||
size_t DNNTestLayer::getTopMemoryUsageMB()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
PROCESS_MEMORY_COUNTERS proc;
|
||||
GetProcessMemoryInfo(GetCurrentProcess(), &proc, sizeof(proc));
|
||||
return proc.PeakWorkingSetSize / pow(1024, 2); // bytes to megabytes
|
||||
#else
|
||||
std::ifstream status("/proc/self/status");
|
||||
std::string line, title;
|
||||
while (std::getline(status, line))
|
||||
{
|
||||
std::istringstream iss(line);
|
||||
iss >> title;
|
||||
if (title == "VmHWM:")
|
||||
{
|
||||
size_t mem;
|
||||
iss >> mem;
|
||||
return mem / 1024;
|
||||
}
|
||||
}
|
||||
return 0l;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -2298,7 +2298,13 @@ TEST_P(Test_ONNX_nets, ResNet50v1)
|
||||
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
||||
|
||||
// output range: [-67; 75], after Softmax [0, 0.98]
|
||||
size_t hwm0 = getTopMemoryUsageMB();
|
||||
testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
|
||||
size_t hwm1 = getTopMemoryUsageMB();
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU)
|
||||
{
|
||||
EXPECT_LE(hwm1 - hwm0, 350) << "Top allocated memory";
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_nets, ResNet50_Int8)
|
||||
|
Loading…
Reference in New Issue
Block a user