UMat usageFlags fixes opencv/opencv#19807

- corrects code to support non- USAGE_DEFAULT settings
- accuracy, regression, perf test cases
- not tested on the 3.x branch
This commit is contained in:
Dale Phurrough 2021-05-04 22:29:15 +02:00
parent 0e8431d17b
commit c2ce3d927a
No known key found for this signature in database
GPG Key ID: E53384A29713D41F
5 changed files with 79 additions and 26 deletions

View File

@ -2572,27 +2572,38 @@ public:
- number of channels
*/
int flags;
//! the matrix dimensionality, >= 2
int dims;
//! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
int rows, cols;
//! number of rows in the matrix; -1 when the matrix has more than 2 dimensions
int rows;
//! number of columns in the matrix; -1 when the matrix has more than 2 dimensions
int cols;
//! custom allocator
MatAllocator* allocator;
UMatUsageFlags usageFlags; // usage flags for allocator
//! usage flags for allocator; recommend do not set directly, instead set during construct/create/getUMat
UMatUsageFlags usageFlags;
//! and the standard allocator
static MatAllocator* getStdAllocator();
//! internal use method: updates the continuity flag
void updateContinuityFlag();
// black-box container of UMat data
//! black-box container of UMat data
UMatData* u;
// offset of the submatrix (or 0)
//! offset of the submatrix (or 0)
size_t offset;
//! dimensional size of the matrix; accessible in various formats
MatSize size;
//! number of bytes each matrix element/row/plane/dimension occupies
MatStep step;
protected:

View File

@ -12,25 +12,33 @@
namespace opencv_test {
namespace ocl {
typedef TestBaseWithParam<tuple<cv::Size, bool> > UsageFlagsBoolFixture;
typedef TestBaseWithParam<tuple<cv::Size, UMatUsageFlags, UMatUsageFlags, UMatUsageFlags>> SizeUsageFlagsFixture;
OCL_PERF_TEST_P(UsageFlagsBoolFixture, UsageFlags_AllocHostMem, ::testing::Combine(OCL_TEST_SIZES, Bool()))
OCL_PERF_TEST_P(SizeUsageFlagsFixture, UsageFlags_AllocMem,
::testing::Combine(
OCL_TEST_SIZES,
testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY), // USAGE_ALLOCATE_SHARED_MEMORY
testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY), // USAGE_ALLOCATE_SHARED_MEMORY
testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY) // USAGE_ALLOCATE_SHARED_MEMORY
))
{
Size sz = get<0>(GetParam());
bool allocHostMem = get<1>(GetParam());
UMatUsageFlags srcAllocMem = get<1>(GetParam());
UMatUsageFlags dstAllocMem = get<2>(GetParam());
UMatUsageFlags finalAllocMem = get<3>(GetParam());
UMat src(sz, CV_8UC1, Scalar::all(128));
UMat src(sz, CV_8UC1, Scalar::all(128), srcAllocMem);
OCL_TEST_CYCLE()
{
UMat dst(allocHostMem ? USAGE_ALLOCATE_HOST_MEMORY : USAGE_DEFAULT);
UMat dst(dstAllocMem);
cv::add(src, Scalar::all(1), dst);
{
Mat canvas = dst.getMat(ACCESS_RW);
cv::putText(canvas, "Test", Point(20, 20), FONT_HERSHEY_PLAIN, 1, Scalar::all(255));
}
UMat final;
UMat final(finalAllocMem);
cv::subtract(dst, Scalar::all(1), final);
}

View File

@ -5518,13 +5518,19 @@ public:
&& !(u->originalUMatData && u->originalUMatData->handle)
)
{
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
// Change the host-side origdata[size] to "pinned memory" that enables fast
// DMA-transfers over PCIe to the device. Often used with clEnqueueMapBuffer/clEnqueueUnmapMemObject
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR),
u->size, u->origdata, &retval);
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR), sz=%lld, origdata=%p) => %p",
(long long int)u->size, u->origdata, (void*)handle).c_str());
}
if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
{
// Allocate device-side memory and immediately copy data from the host-side pointer origdata[size].
// If createFlags=CL_MEM_ALLOC_HOST_PTR (aka cv::USAGE_ALLOCATE_HOST_MEMORY), then
// additionally allocate a host-side "pinned" duplicate of the origdata that is
// managed by OpenCL. This is potentially faster in unaligned/unmanaged scenarios.
handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
u->size, u->origdata, &retval);
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",

View File

@ -307,8 +307,7 @@ UMat& UMat::operator=(const UMat& m)
else
copySize(m);
allocator = m.allocator;
if (usageFlags == USAGE_DEFAULT)
usageFlags = m.usageFlags;
usageFlags = m.usageFlags;
u = m.u;
offset = m.offset;
}
@ -332,9 +331,6 @@ void UMat::assignTo(UMat& m, int _type) const
void UMat::create(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
{
_type &= TYPE_MASK;
if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u )
return;
int sz[] = {_rows, _cols};
create(2, sz, _type, _usageFlags);
}
@ -426,7 +422,9 @@ UMat& UMat::operator=(UMat&& m)
m.step.p = m.step.buf;
m.size.p = &m.rows;
}
m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
m.flags = MAGIC_VAL;
m.usageFlags = USAGE_DEFAULT;
m.dims = m.rows = m.cols = 0;
m.allocator = NULL;
m.u = NULL;
m.offset = 0;
@ -600,6 +598,7 @@ UMat Mat::getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags) const
CV_XADD(&(u->urefcount), 1);
}
hdr.flags = flags;
hdr.usageFlags = usageFlags;
setSize(hdr, dims, size.p, step.p);
finalizeHdr(hdr);
hdr.u = new_u;
@ -610,16 +609,21 @@ UMat Mat::getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags) const
void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags)
{
this->usageFlags = _usageFlags;
int i;
CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes);
_type = CV_MAT_TYPE(_type);
if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() )
// if param value is USAGE_DEFAULT by implicit default param value -or- explicit value
// ...then don't change the existing usageFlags
// it is not possible to change usage from non-default to USAGE_DEFAULT through create()
// ...instead must construct UMat()
if (_usageFlags == cv::USAGE_DEFAULT)
{
_usageFlags = usageFlags;
}
if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() && _usageFlags == usageFlags )
{
if( d == 2 && rows == _sizes[0] && cols == _sizes[1] )
return;
for( i = 0; i < d; i++ )
if( size[i] != _sizes[i] )
break;
@ -636,6 +640,7 @@ void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlag
}
release();
usageFlags = _usageFlags;
if( d == 0 )
return;
flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL;

View File

@ -207,9 +207,32 @@ TEST_P(OCL_OpenCLExecutionContext_P, ScopeTest)
executeUMatCall();
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, OCL_OpenCLExecutionContext_P, getOpenCLTestConfigurations());
typedef testing::TestWithParam<UMatUsageFlags> UsageFlagsFixture;
OCL_TEST_P(UsageFlagsFixture, UsageFlagsRetained)
{
if (!cv::ocl::useOpenCL())
{
throw SkipTestException("OpenCL is not available / disabled");
}
const UMatUsageFlags usage = GetParam();
cv::UMat flip_in(10, 10, CV_32F, usage);
cv::UMat flip_out(usage);
cv::flip(flip_in, flip_out, 1);
cv::ocl::finish();
ASSERT_EQ(usage, flip_in.usageFlags);
ASSERT_EQ(usage, flip_out.usageFlags);
}
INSTANTIATE_TEST_CASE_P(
/*nothing*/,
UsageFlagsFixture,
testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY)
);
} } // namespace opencv_test::ocl