mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #25181 from dkurt:release_conv_weights
Release convolution weightsMat after usage #25181 ### Pull Request Readiness Checklist related (but not resolved): https://github.com/opencv/opencv/issues/24134 Minor memory footprint improvement. Also, adds a test for VmHWM. RAM top memory usage (-230MB) | YOLOv3 (237MB file) | 4.x | PR | |---------------------|---------|---------| | no winograd | 808 MB | 581 MB | | winograd | 1985 MB | 1750 MB | See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
aae77b65a5
commit
0b6c9a2123
@ -1299,6 +1299,10 @@ public:
|
|||||||
fastConvImpl = initFastConv(weightsMat, &biasvec[0], ngroups, K, C, kernel_size, strides,
|
fastConvImpl = initFastConv(weightsMat, &biasvec[0], ngroups, K, C, kernel_size, strides,
|
||||||
dilations, pads_begin, pads_end, conv_dim,
|
dilations, pads_begin, pads_end, conv_dim,
|
||||||
preferableTarget == DNN_TARGET_CPU_FP16, canUseWinograd);
|
preferableTarget == DNN_TARGET_CPU_FP16, canUseWinograd);
|
||||||
|
// This is legal to release weightsMat here as this is not used anymore for
|
||||||
|
// OpenCV inference. If network needs to be reinitialized (new shape, new backend)
|
||||||
|
// a new version of weightsMat is created at .finalize() from original weights
|
||||||
|
weightsMat.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
runFastConv(inputs[0], outputs[0], fastConvImpl, nstripes, activ, reluslope, fusedAdd);
|
runFastConv(inputs[0], outputs[0], fastConvImpl, nstripes, activ, reluslope, fusedAdd);
|
||||||
@ -1405,6 +1409,7 @@ public:
|
|||||||
params.set("input_zeropoint", inputZp);
|
params.set("input_zeropoint", inputZp);
|
||||||
params.set("input_scale", inputScale);
|
params.set("input_scale", inputScale);
|
||||||
|
|
||||||
|
Mat weightsMat = blobs[0].reshape(1, numOutput);
|
||||||
Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S);
|
Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S);
|
||||||
Mat biasQuantized(1, numOutput, CV_32S);
|
Mat biasQuantized(1, numOutput, CV_32S);
|
||||||
Mat outputMultiplier(1, numOutput, CV_32F);
|
Mat outputMultiplier(1, numOutput, CV_32F);
|
||||||
|
@ -232,6 +232,8 @@ public:
|
|||||||
expectNoFallbacks(net);
|
expectNoFallbacks(net);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t getTopMemoryUsageMB();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void checkBackend(Mat* inp = 0, Mat* ref = 0)
|
void checkBackend(Mat* inp = 0, Mat* ref = 0)
|
||||||
{
|
{
|
||||||
|
@ -15,6 +15,14 @@
|
|||||||
#include <opencv2/core/utils/configuration.private.hpp>
|
#include <opencv2/core/utils/configuration.private.hpp>
|
||||||
#include <opencv2/core/utils/logger.hpp>
|
#include <opencv2/core/utils/logger.hpp>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#ifndef NOMINMAX
|
||||||
|
#define NOMINMAX
|
||||||
|
#endif
|
||||||
|
#include <windows.h>
|
||||||
|
#include <psapi.h>
|
||||||
|
#endif // _WIN32
|
||||||
|
|
||||||
namespace cv { namespace dnn {
|
namespace cv { namespace dnn {
|
||||||
CV__DNN_INLINE_NS_BEGIN
|
CV__DNN_INLINE_NS_BEGIN
|
||||||
|
|
||||||
@ -502,4 +510,28 @@ void initDNNTests()
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t DNNTestLayer::getTopMemoryUsageMB()
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
PROCESS_MEMORY_COUNTERS proc;
|
||||||
|
GetProcessMemoryInfo(GetCurrentProcess(), &proc, sizeof(proc));
|
||||||
|
return proc.PeakWorkingSetSize / pow(1024, 2); // bytes to megabytes
|
||||||
|
#else
|
||||||
|
std::ifstream status("/proc/self/status");
|
||||||
|
std::string line, title;
|
||||||
|
while (std::getline(status, line))
|
||||||
|
{
|
||||||
|
std::istringstream iss(line);
|
||||||
|
iss >> title;
|
||||||
|
if (title == "VmHWM:")
|
||||||
|
{
|
||||||
|
size_t mem;
|
||||||
|
iss >> mem;
|
||||||
|
return mem / 1024;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0l;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -2298,7 +2298,13 @@ TEST_P(Test_ONNX_nets, ResNet50v1)
|
|||||||
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
applyTestTag(CV_TEST_TAG_MEMORY_512MB);
|
||||||
|
|
||||||
// output range: [-67; 75], after Softmax [0, 0.98]
|
// output range: [-67; 75], after Softmax [0, 0.98]
|
||||||
|
size_t hwm0 = getTopMemoryUsageMB();
|
||||||
testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
|
testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
|
||||||
|
size_t hwm1 = getTopMemoryUsageMB();
|
||||||
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU)
|
||||||
|
{
|
||||||
|
EXPECT_LE(hwm1 - hwm0, 350) << "Top allocated memory";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_ONNX_nets, ResNet50_Int8)
|
TEST_P(Test_ONNX_nets, ResNet50_Int8)
|
||||||
|
Loading…
Reference in New Issue
Block a user