mirror of
https://github.com/opencv/opencv.git
synced 2025-06-12 12:22:51 +08:00
Allocate new memory for optimized concat to prevent collisions.
Add a flag to disable memory reusing in dnn module.
This commit is contained in:
parent
3542c98d45
commit
a9807d8f54
@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
|
||||||
|
if (${the_module}_REUSE_MEMORY)
|
||||||
|
add_definitions(-DREUSE_DNN_MEMORY=1)
|
||||||
|
endif()
|
||||||
|
@ -367,43 +367,42 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force)
|
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
|
||||||
{
|
{
|
||||||
|
#ifdef REUSE_DNN_MEMORY
|
||||||
Mat bestBlob;
|
Mat bestBlob;
|
||||||
LayerPin bestBlobPin;
|
LayerPin bestBlobPin;
|
||||||
|
|
||||||
if( !force )
|
std::map<LayerPin, Mat>::iterator hostIt;
|
||||||
|
std::map<LayerPin, int>::iterator refIt;
|
||||||
|
|
||||||
|
const int targetTotal = total(shape);
|
||||||
|
int bestBlobTotal = INT_MAX;
|
||||||
|
|
||||||
|
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
|
||||||
{
|
{
|
||||||
std::map<LayerPin, Mat>::iterator hostIt;
|
refIt = refCounter.find(hostIt->first);
|
||||||
std::map<LayerPin, int>::iterator refIt;
|
// Use only blobs that had references before because if not,
|
||||||
|
// it might be used as output.
|
||||||
const int targetTotal = total(shape);
|
if (refIt != refCounter.end() && refIt->second == 0)
|
||||||
int bestBlobTotal = INT_MAX;
|
|
||||||
|
|
||||||
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
|
|
||||||
{
|
{
|
||||||
refIt = refCounter.find(hostIt->first);
|
Mat& unusedBlob = hostIt->second;
|
||||||
// Use only blobs that had references before because if not,
|
if (unusedBlob.total() >= targetTotal &&
|
||||||
// it might be used as output.
|
unusedBlob.total() < bestBlobTotal)
|
||||||
if (refIt != refCounter.end() && refIt->second == 0)
|
|
||||||
{
|
{
|
||||||
Mat& unusedBlob = hostIt->second;
|
bestBlobPin = hostIt->first;
|
||||||
if (unusedBlob.total() >= targetTotal &&
|
bestBlob = unusedBlob;
|
||||||
unusedBlob.total() < bestBlobTotal)
|
bestBlobTotal = unusedBlob.total();
|
||||||
{
|
|
||||||
bestBlobPin = hostIt->first;
|
|
||||||
bestBlob = unusedBlob;
|
|
||||||
bestBlobTotal = unusedBlob.total();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!bestBlob.empty())
|
if (!bestBlob.empty())
|
||||||
{
|
{
|
||||||
reuse(bestBlobPin, lp);
|
reuse(bestBlobPin, lp);
|
||||||
dst = Mat(shape, CV_32F, bestBlob.data);
|
dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
#endif // REUSE_DNN_MEMORY
|
||||||
{
|
{
|
||||||
// if dst already has been allocated with total(shape) elements,
|
// if dst already has been allocated with total(shape) elements,
|
||||||
// it won't be recrreated and pointer of dst.data remains the same.
|
// it won't be recrreated and pointer of dst.data remains the same.
|
||||||
@ -412,34 +411,32 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
|
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
|
||||||
{
|
{
|
||||||
|
#ifdef REUSE_DNN_MEMORY
|
||||||
UMat bestBlob;
|
UMat bestBlob;
|
||||||
LayerPin bestBlobPin;
|
LayerPin bestBlobPin;
|
||||||
|
|
||||||
if( !force )
|
std::map<LayerPin, UMat>::iterator hostIt;
|
||||||
|
std::map<LayerPin, int>::iterator refIt;
|
||||||
|
|
||||||
|
const int targetTotal = total(shape);
|
||||||
|
int bestBlobTotal = INT_MAX;
|
||||||
|
|
||||||
|
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
|
||||||
{
|
{
|
||||||
std::map<LayerPin, UMat>::iterator hostIt;
|
refIt = refCounter.find(hostIt->first);
|
||||||
std::map<LayerPin, int>::iterator refIt;
|
// Use only blobs that had references before because if not,
|
||||||
|
// it might be used as output.
|
||||||
const int targetTotal = total(shape);
|
if (refIt != refCounter.end() && refIt->second == 0)
|
||||||
int bestBlobTotal = INT_MAX;
|
|
||||||
|
|
||||||
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
|
|
||||||
{
|
{
|
||||||
refIt = refCounter.find(hostIt->first);
|
UMat& unusedBlob = hostIt->second;
|
||||||
// Use only blobs that had references before because if not,
|
if (unusedBlob.total() >= targetTotal &&
|
||||||
// it might be used as output.
|
unusedBlob.total() < bestBlobTotal)
|
||||||
if (refIt != refCounter.end() && refIt->second == 0)
|
|
||||||
{
|
{
|
||||||
UMat& unusedBlob = hostIt->second;
|
bestBlobPin = hostIt->first;
|
||||||
if (unusedBlob.total() >= targetTotal &&
|
bestBlob = unusedBlob;
|
||||||
unusedBlob.total() < bestBlobTotal)
|
bestBlobTotal = unusedBlob.total();
|
||||||
{
|
|
||||||
bestBlobPin = hostIt->first;
|
|
||||||
bestBlob = unusedBlob;
|
|
||||||
bestBlobTotal = unusedBlob.total();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -449,6 +446,7 @@ public:
|
|||||||
umat_dst.create(shape, CV_32F);
|
umat_dst.create(shape, CV_32F);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
#endif // REUSE_DNN_MEMORY
|
||||||
{
|
{
|
||||||
// if dst already has been allocated with total(shape) elements,
|
// if dst already has been allocated with total(shape) elements,
|
||||||
// it won't be recrreated and pointer of dst.data remains the same.
|
// it won't be recrreated and pointer of dst.data remains the same.
|
||||||
@ -458,8 +456,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
|
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
|
||||||
std::vector<LayerPin>& pinsForInternalBlobs,
|
std::vector<LayerPin>& pinsForInternalBlobs)
|
||||||
bool maximizeReuse)
|
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
|
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||||
@ -530,7 +527,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::map<int, std::vector<int> >::reverse_iterator it;
|
std::map<int, std::vector<int> >::reverse_iterator it;
|
||||||
bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
|
|
||||||
for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
|
for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
|
||||||
{
|
{
|
||||||
for(int j = 0; j < it->second.size(); j++)
|
for(int j = 0; j < it->second.size(); j++)
|
||||||
@ -539,7 +535,7 @@ public:
|
|||||||
if (total(shapes[index]))
|
if (total(shapes[index]))
|
||||||
{
|
{
|
||||||
LayerPin blobPin(ld.id, index);
|
LayerPin blobPin(ld.id, index);
|
||||||
if (index < outShapes.size() && inPlace && !force)
|
if (index < outShapes.size() && inPlace)
|
||||||
{
|
{
|
||||||
if (use_umat)
|
if (use_umat)
|
||||||
{
|
{
|
||||||
@ -558,9 +554,9 @@ public:
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (use_umat)
|
if (use_umat)
|
||||||
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
|
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
|
||||||
else
|
else
|
||||||
reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
|
reuseOrCreate(shapes[index], blobPin, *blobs[index]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1111,8 +1107,7 @@ struct Net::Impl
|
|||||||
CV_Assert(layerShapesIt != layersShapes.end());
|
CV_Assert(layerShapesIt != layersShapes.end());
|
||||||
|
|
||||||
std::vector<LayerPin> pinsForInternalBlobs;
|
std::vector<LayerPin> pinsForInternalBlobs;
|
||||||
bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
|
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
|
||||||
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
|
|
||||||
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
||||||
for (int i = 0; i < ld.outputBlobs.size(); ++i)
|
for (int i = 0; i < ld.outputBlobs.size(); ++i)
|
||||||
{
|
{
|
||||||
@ -1415,6 +1410,9 @@ struct Net::Impl
|
|||||||
|
|
||||||
if( i >= ninputs )
|
if( i >= ninputs )
|
||||||
{
|
{
|
||||||
|
// Allocate new memory to prevent collisions during memory
|
||||||
|
// reusing (see https://github.com/opencv/opencv/pull/10456).
|
||||||
|
output = output.clone();
|
||||||
Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
|
Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
|
||||||
int ofs = 0;
|
int ofs = 0;
|
||||||
for( i = 0; i < ninputs; i++ )
|
for( i = 0; i < ninputs; i++ )
|
||||||
|
Loading…
Reference in New Issue
Block a user