mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 03:33:28 +08:00
Allocate new memory for optimized concat to prevent collisions.
Add a flag to disable memory reusing in dnn module.
This commit is contained in:
parent
3542c98d45
commit
a9807d8f54
@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
|
||||
if (${the_module}_REUSE_MEMORY)
|
||||
add_definitions(-DREUSE_DNN_MEMORY=1)
|
||||
endif()
|
||||
|
@ -367,43 +367,42 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force)
|
||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
|
||||
{
|
||||
#ifdef REUSE_DNN_MEMORY
|
||||
Mat bestBlob;
|
||||
LayerPin bestBlobPin;
|
||||
|
||||
if( !force )
|
||||
std::map<LayerPin, Mat>::iterator hostIt;
|
||||
std::map<LayerPin, int>::iterator refIt;
|
||||
|
||||
const int targetTotal = total(shape);
|
||||
int bestBlobTotal = INT_MAX;
|
||||
|
||||
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
|
||||
{
|
||||
std::map<LayerPin, Mat>::iterator hostIt;
|
||||
std::map<LayerPin, int>::iterator refIt;
|
||||
|
||||
const int targetTotal = total(shape);
|
||||
int bestBlobTotal = INT_MAX;
|
||||
|
||||
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
|
||||
refIt = refCounter.find(hostIt->first);
|
||||
// Use only blobs that had references before because if not,
|
||||
// it might be used as output.
|
||||
if (refIt != refCounter.end() && refIt->second == 0)
|
||||
{
|
||||
refIt = refCounter.find(hostIt->first);
|
||||
// Use only blobs that had references before because if not,
|
||||
// it might be used as output.
|
||||
if (refIt != refCounter.end() && refIt->second == 0)
|
||||
Mat& unusedBlob = hostIt->second;
|
||||
if (unusedBlob.total() >= targetTotal &&
|
||||
unusedBlob.total() < bestBlobTotal)
|
||||
{
|
||||
Mat& unusedBlob = hostIt->second;
|
||||
if (unusedBlob.total() >= targetTotal &&
|
||||
unusedBlob.total() < bestBlobTotal)
|
||||
{
|
||||
bestBlobPin = hostIt->first;
|
||||
bestBlob = unusedBlob;
|
||||
bestBlobTotal = unusedBlob.total();
|
||||
}
|
||||
bestBlobPin = hostIt->first;
|
||||
bestBlob = unusedBlob;
|
||||
bestBlobTotal = unusedBlob.total();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!bestBlob.empty())
|
||||
{
|
||||
reuse(bestBlobPin, lp);
|
||||
dst = Mat(shape, CV_32F, bestBlob.data);
|
||||
dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
|
||||
}
|
||||
else
|
||||
#endif // REUSE_DNN_MEMORY
|
||||
{
|
||||
// if dst already has been allocated with total(shape) elements,
|
||||
// it won't be recrreated and pointer of dst.data remains the same.
|
||||
@ -412,34 +411,32 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
|
||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
|
||||
{
|
||||
#ifdef REUSE_DNN_MEMORY
|
||||
UMat bestBlob;
|
||||
LayerPin bestBlobPin;
|
||||
|
||||
if( !force )
|
||||
std::map<LayerPin, UMat>::iterator hostIt;
|
||||
std::map<LayerPin, int>::iterator refIt;
|
||||
|
||||
const int targetTotal = total(shape);
|
||||
int bestBlobTotal = INT_MAX;
|
||||
|
||||
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
|
||||
{
|
||||
std::map<LayerPin, UMat>::iterator hostIt;
|
||||
std::map<LayerPin, int>::iterator refIt;
|
||||
|
||||
const int targetTotal = total(shape);
|
||||
int bestBlobTotal = INT_MAX;
|
||||
|
||||
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
|
||||
refIt = refCounter.find(hostIt->first);
|
||||
// Use only blobs that had references before because if not,
|
||||
// it might be used as output.
|
||||
if (refIt != refCounter.end() && refIt->second == 0)
|
||||
{
|
||||
refIt = refCounter.find(hostIt->first);
|
||||
// Use only blobs that had references before because if not,
|
||||
// it might be used as output.
|
||||
if (refIt != refCounter.end() && refIt->second == 0)
|
||||
UMat& unusedBlob = hostIt->second;
|
||||
if (unusedBlob.total() >= targetTotal &&
|
||||
unusedBlob.total() < bestBlobTotal)
|
||||
{
|
||||
UMat& unusedBlob = hostIt->second;
|
||||
if (unusedBlob.total() >= targetTotal &&
|
||||
unusedBlob.total() < bestBlobTotal)
|
||||
{
|
||||
bestBlobPin = hostIt->first;
|
||||
bestBlob = unusedBlob;
|
||||
bestBlobTotal = unusedBlob.total();
|
||||
}
|
||||
bestBlobPin = hostIt->first;
|
||||
bestBlob = unusedBlob;
|
||||
bestBlobTotal = unusedBlob.total();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -449,6 +446,7 @@ public:
|
||||
umat_dst.create(shape, CV_32F);
|
||||
}
|
||||
else
|
||||
#endif // REUSE_DNN_MEMORY
|
||||
{
|
||||
// if dst already has been allocated with total(shape) elements,
|
||||
// it won't be recrreated and pointer of dst.data remains the same.
|
||||
@ -458,8 +456,7 @@ public:
|
||||
}
|
||||
|
||||
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
|
||||
std::vector<LayerPin>& pinsForInternalBlobs,
|
||||
bool maximizeReuse)
|
||||
std::vector<LayerPin>& pinsForInternalBlobs)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||
@ -530,7 +527,6 @@ public:
|
||||
}
|
||||
|
||||
std::map<int, std::vector<int> >::reverse_iterator it;
|
||||
bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
|
||||
for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
|
||||
{
|
||||
for(int j = 0; j < it->second.size(); j++)
|
||||
@ -539,7 +535,7 @@ public:
|
||||
if (total(shapes[index]))
|
||||
{
|
||||
LayerPin blobPin(ld.id, index);
|
||||
if (index < outShapes.size() && inPlace && !force)
|
||||
if (index < outShapes.size() && inPlace)
|
||||
{
|
||||
if (use_umat)
|
||||
{
|
||||
@ -558,9 +554,9 @@ public:
|
||||
else
|
||||
{
|
||||
if (use_umat)
|
||||
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
|
||||
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
|
||||
else
|
||||
reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
|
||||
reuseOrCreate(shapes[index], blobPin, *blobs[index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1111,8 +1107,7 @@ struct Net::Impl
|
||||
CV_Assert(layerShapesIt != layersShapes.end());
|
||||
|
||||
std::vector<LayerPin> pinsForInternalBlobs;
|
||||
bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
|
||||
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
|
||||
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
|
||||
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
||||
for (int i = 0; i < ld.outputBlobs.size(); ++i)
|
||||
{
|
||||
@ -1415,6 +1410,9 @@ struct Net::Impl
|
||||
|
||||
if( i >= ninputs )
|
||||
{
|
||||
// Allocate new memory to prevent collisions during memory
|
||||
// reusing (see https://github.com/opencv/opencv/pull/10456).
|
||||
output = output.clone();
|
||||
Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
|
||||
int ofs = 0;
|
||||
for( i = 0; i < ninputs; i++ )
|
||||
|
Loading…
Reference in New Issue
Block a user