Merge pull request #16956 from alalek:ocl_control_buffer_mapping

This commit is contained in:
Alexander Alekhin 2020-04-02 17:56:50 +00:00
commit 388a7ce86e
2 changed files with 155 additions and 1 deletions

View File

@ -146,6 +146,141 @@ OCL_PERF_TEST_P(CopyToFixture, CopyToWithMaskUninit,
SANITY_CHECK(dst);
}
enum ROIType {
ROI_FULL,
ROI_2_RECT,
ROI_2_TOP, // contiguous memory block
ROI_2_LEFT,
ROI_4,
ROI_16,
};
static Rect getROI(enum ROIType t, const Size& sz)
{
switch (t)
{
case ROI_FULL: return Rect(0, 0, sz.width, sz.height);
case ROI_2_RECT: return Rect(0, 0, sz.width * 71 / 100, sz.height * 71 / 100); // 71 = sqrt(1/2) * 100
case ROI_2_TOP: return Rect(0, 0, sz.width, sz.height / 2); // 71 = sqrt(1/2) * 100
case ROI_2_LEFT: return Rect(0, 0, sz.width / 2, sz.height); // 71 = sqrt(1/2) * 100
case ROI_4: return Rect(0, 0, sz.width / 2, sz.height / 2);
case ROI_16: return Rect(0, 0, sz.width / 4, sz.height / 4);
}
CV_Assert(false);
}
typedef TestBaseWithParam< tuple<cv::Size, MatType, ROIType> > OpenCLBuffer;
static inline void PrintTo(const tuple<cv::Size, MatType, enum ROIType>& v, std::ostream* os)
{
*os << "(" << get<0>(v) << ", " << typeToString(get<1>(v)) << ", ";
enum ROIType roiType = get<2>(v);
if (roiType == ROI_FULL)
*os << "ROI_100_FULL";
else if (roiType == ROI_2_RECT)
*os << "ROI_050_RECT_HALF";
else if (roiType == ROI_2_TOP)
*os << "ROI_050_TOP_HALF";
else if (roiType == ROI_2_LEFT)
*os << "ROI_050_LEFT_HALF";
else if (roiType == ROI_4)
*os << "ROI_025_1/4";
else
*os << "ROI_012_1/16";
*os << ")";
}
PERF_TEST_P_(OpenCLBuffer, cpu_write)
{
const Size srcSize = get<0>(GetParam());
const int type = get<1>(GetParam());
const Rect roi = getROI(get<2>(GetParam()), srcSize);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type);
declare.in(src(roi), WARMUP_NONE);
OCL_TEST_CYCLE()
{
Mat m = src(roi).getMat(ACCESS_WRITE);
m.setTo(Scalar(1, 2, 3, 4));
}
SANITY_CHECK_NOTHING();
}
PERF_TEST_P_(OpenCLBuffer, cpu_read)
{
const Size srcSize = get<0>(GetParam());
const int type = get<1>(GetParam());
const Rect roi = getROI(get<2>(GetParam()), srcSize);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type, Scalar(1, 2, 3, 4));
declare.in(src(roi), WARMUP_NONE);
OCL_TEST_CYCLE()
{
unsigned counter = 0;
Mat m = src(roi).getMat(ACCESS_READ);
for (int y = 0; y < m.rows; y++)
{
uchar* ptr = m.ptr(y);
size_t width_bytes = m.cols * m.elemSize();
for (size_t x_bytes = 0; x_bytes < width_bytes; x_bytes++)
counter += (unsigned)(ptr[x_bytes]);
}
}
SANITY_CHECK_NOTHING();
}
PERF_TEST_P_(OpenCLBuffer, cpu_update)
{
const Size srcSize = get<0>(GetParam());
const int type = get<1>(GetParam());
const Rect roi = getROI(get<2>(GetParam()), srcSize);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type, Scalar(1, 2, 3, 4));
declare.in(src(roi), WARMUP_NONE);
OCL_TEST_CYCLE()
{
Mat m = src(roi).getMat(ACCESS_READ | ACCESS_WRITE);
for (int y = 0; y < m.rows; y++)
{
uchar* ptr = m.ptr(y);
size_t width_bytes = m.cols * m.elemSize();
for (size_t x_bytes = 0; x_bytes < width_bytes; x_bytes++)
ptr[x_bytes] += 1;
}
}
SANITY_CHECK_NOTHING();
}
INSTANTIATE_TEST_CASE_P(/*FULL*/, OpenCLBuffer,
testing::Combine(
testing::Values(szVGA, sz720p, sz1080p, sz2160p),
testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4),
testing::Values(ROI_FULL)
)
);
INSTANTIATE_TEST_CASE_P(ROI, OpenCLBuffer,
testing::Combine(
testing::Values(sz1080p, sz2160p),
testing::Values(CV_8UC1),
testing::Values(ROI_16, ROI_4, ROI_2_RECT, ROI_2_LEFT, ROI_2_TOP, ROI_FULL)
)
);
} } // namespace opencv_test::ocl
#endif // HAVE_OPENCL

View File

@ -4607,6 +4607,17 @@ public:
return u;
}
static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
{
static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
return value;
}
static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
{
static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
return value;
}
void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
{
const Device& dev = ctx.device(0);
@ -4614,7 +4625,15 @@ public:
if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
createFlags |= CL_MEM_ALLOC_HOST_PTR;
if( dev.hostUnifiedMemory() )
if (!isOpenCLCopyingForced() &&
(isOpenCLMapForced() ||
(dev.hostUnifiedMemory()
#ifndef __APPLE__
|| dev.isIntel()
#endif
)
)
)
flags0 = 0;
else
flags0 = UMatData::COPY_ON_MAP;