mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #16608 from vpisarev:fix_mac_ocl_tests
* fixed several problems when running tests on Mac: * OCL_pyrUp * OCL_flip * some basic UMat tests * histogram badarg test (out of range access) * retained the storepix fix in ocl_flip only for 16U/16S datatype, where the OpenCL compiler on Mac generates incorrect code * moved deletion of ACCESS_FAST flag to non-SVM branch (where SVM is shared virtual memory (in OpenCL 2.x), not support vector machine) * force OpenCL to use read/write for GPU<=>CPU memory transfers on machines with discrete video only on Macs. On Windows/Linux the drivers are seemingly smart enough to implement map/unmap properly (and maybe more efficiently than explicit read/write)
This commit is contained in:
parent
150c29356a
commit
07b475062f
@ -916,9 +916,9 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
|
|||||||
kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
|
kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
|
||||||
|
|
||||||
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
|
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
|
||||||
format( "-D T=%s -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
|
format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
|
||||||
kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
|
kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
|
||||||
kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), cn, pxPerWIy, kercn));
|
kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn));
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -4705,6 +4705,8 @@ public:
|
|||||||
int createFlags = 0, flags0 = 0;
|
int createFlags = 0, flags0 = 0;
|
||||||
getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
|
getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
|
||||||
|
|
||||||
|
bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
|
||||||
|
|
||||||
cl_context ctx_handle = (cl_context)ctx.ptr();
|
cl_context ctx_handle = (cl_context)ctx.ptr();
|
||||||
int allocatorFlags = 0;
|
int allocatorFlags = 0;
|
||||||
int tempUMatFlags = 0;
|
int tempUMatFlags = 0;
|
||||||
@ -4764,8 +4766,15 @@ public:
|
|||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
if( copyOnMap )
|
||||||
|
accessFlags &= ~ACCESS_FAST;
|
||||||
|
|
||||||
tempUMatFlags = UMatData::TEMP_UMAT;
|
tempUMatFlags = UMatData::TEMP_UMAT;
|
||||||
if (CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
|
if (
|
||||||
|
#ifdef __APPLE__
|
||||||
|
!copyOnMap &&
|
||||||
|
#endif
|
||||||
|
CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
|
||||||
// There are OpenCL runtime issues for less aligned data
|
// There are OpenCL runtime issues for less aligned data
|
||||||
&& (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
|
&& (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
|
||||||
&& u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
|
&& u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
|
||||||
@ -4793,7 +4802,7 @@ public:
|
|||||||
u->handle = handle;
|
u->handle = handle;
|
||||||
u->prevAllocator = u->currAllocator;
|
u->prevAllocator = u->currAllocator;
|
||||||
u->currAllocator = this;
|
u->currAllocator = this;
|
||||||
u->flags |= tempUMatFlags;
|
u->flags |= tempUMatFlags | flags0;
|
||||||
u->allocatorFlags_ = allocatorFlags;
|
u->allocatorFlags_ = allocatorFlags;
|
||||||
}
|
}
|
||||||
if(accessFlags & ACCESS_WRITE)
|
if(accessFlags & ACCESS_WRITE)
|
||||||
|
@ -42,10 +42,25 @@
|
|||||||
#if kercn != 3
|
#if kercn != 3
|
||||||
#define loadpix(addr) *(__global const T *)(addr)
|
#define loadpix(addr) *(__global const T *)(addr)
|
||||||
#define storepix(val, addr) *(__global T *)(addr) = val
|
#define storepix(val, addr) *(__global T *)(addr) = val
|
||||||
|
#define storepix_2(val0, val1, addr0, addr1) \
|
||||||
|
*(__global T *)(addr0) = val0; *(__global T *)(addr1) = val1
|
||||||
#define TSIZE (int)sizeof(T)
|
#define TSIZE (int)sizeof(T)
|
||||||
#else
|
#else
|
||||||
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
|
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
|
||||||
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
|
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
|
||||||
|
#if DEPTH == 2 || DEPTH == 3
|
||||||
|
#define storepix_2(val0, val1, addr0, addr1) \
|
||||||
|
((__global T1 *)(addr0))[0] = val0.x; \
|
||||||
|
((__global T1 *)(addr1))[0] = val1.x; \
|
||||||
|
((__global T1 *)(addr0))[1] = val0.y; \
|
||||||
|
((__global T1 *)(addr1))[1] = val1.y; \
|
||||||
|
((__global T1 *)(addr0))[2] = val0.z; \
|
||||||
|
((__global T1 *)(addr1))[2] = val1.z
|
||||||
|
#else
|
||||||
|
#define storepix_2(val0, val1, addr0, addr1) \
|
||||||
|
storepix(val0, addr0); \
|
||||||
|
storepix(val1, addr1)
|
||||||
|
#endif
|
||||||
#define TSIZE ((int)sizeof(T1)*3)
|
#define TSIZE ((int)sizeof(T1)*3)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -69,8 +84,7 @@ __kernel void arithm_flip_rows(__global const uchar * srcptr, int src_step, int
|
|||||||
T src0 = loadpix(srcptr + src_index0);
|
T src0 = loadpix(srcptr + src_index0);
|
||||||
T src1 = loadpix(srcptr + src_index1);
|
T src1 = loadpix(srcptr + src_index1);
|
||||||
|
|
||||||
storepix(src1, dstptr + dst_index0);
|
storepix_2(src1, src0, dstptr + dst_index0, dstptr + dst_index1);
|
||||||
storepix(src0, dstptr + dst_index1);
|
|
||||||
|
|
||||||
src_index0 += src_step;
|
src_index0 += src_step;
|
||||||
src_index1 -= src_step;
|
src_index1 -= src_step;
|
||||||
@ -115,8 +129,7 @@ __kernel void arithm_flip_rows_cols(__global const uchar * srcptr, int src_step,
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
storepix(src1, dstptr + dst_index0);
|
storepix_2(src1, src0, dstptr + dst_index0, dstptr + dst_index1);
|
||||||
storepix(src0, dstptr + dst_index1);
|
|
||||||
|
|
||||||
src_index0 += src_step;
|
src_index0 += src_step;
|
||||||
src_index1 -= src_step;
|
src_index1 -= src_step;
|
||||||
@ -161,8 +174,7 @@ __kernel void arithm_flip_cols(__global const uchar * srcptr, int src_step, int
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
storepix(src1, dstptr + dst_index0);
|
storepix_2(src1, src0, dstptr + dst_index0, dstptr + dst_index1);
|
||||||
storepix(src0, dstptr + dst_index1);
|
|
||||||
|
|
||||||
src_index0 += src_step;
|
src_index0 += src_step;
|
||||||
src_index1 += src_step;
|
src_index1 += src_step;
|
||||||
|
@ -1078,7 +1078,7 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int
|
|||||||
UMat dst = _dst.getUMat();
|
UMat dst = _dst.getUMat();
|
||||||
|
|
||||||
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
|
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
|
||||||
const int local_size = 16;
|
const int local_size = channels == 1 ? 16 : 8;
|
||||||
char cvt[2][50];
|
char cvt[2][50];
|
||||||
String buildOptions = format(
|
String buildOptions = format(
|
||||||
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
|
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
|
||||||
@ -1092,22 +1092,17 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int
|
|||||||
size_t globalThreads[2] = { (size_t)dst.cols, (size_t)dst.rows };
|
size_t globalThreads[2] = { (size_t)dst.cols, (size_t)dst.rows };
|
||||||
size_t localThreads[2] = { (size_t)local_size, (size_t)local_size };
|
size_t localThreads[2] = { (size_t)local_size, (size_t)local_size };
|
||||||
ocl::Kernel k;
|
ocl::Kernel k;
|
||||||
if (ocl::Device::getDefault().isIntel() && channels == 1)
|
if (type == CV_8UC1 && src.cols % 2 == 0)
|
||||||
{
|
{
|
||||||
if (type == CV_8UC1 && src.cols % 2 == 0)
|
buildOptions.clear();
|
||||||
{
|
k.create("pyrUp_cols2", ocl::imgproc::pyramid_up_oclsrc, buildOptions);
|
||||||
buildOptions.clear();
|
globalThreads[0] = dst.cols/4; globalThreads[1] = dst.rows/2;
|
||||||
k.create("pyrUp_cols2", ocl::imgproc::pyramid_up_oclsrc, buildOptions);
|
|
||||||
globalThreads[0] = dst.cols/4; globalThreads[1] = dst.rows/2;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions);
|
|
||||||
globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
k.create("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions);
|
{
|
||||||
|
k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions);
|
||||||
|
globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2;
|
||||||
|
}
|
||||||
|
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
return false;
|
return false;
|
||||||
|
@ -1966,7 +1966,7 @@ TEST(Imgproc_Hist_Calc, badarg)
|
|||||||
Mat img = cv::Mat::zeros(10, 10, CV_8UC1);
|
Mat img = cv::Mat::zeros(10, 10, CV_8UC1);
|
||||||
Mat imgInt = cv::Mat::zeros(10, 10, CV_32SC1);
|
Mat imgInt = cv::Mat::zeros(10, 10, CV_32SC1);
|
||||||
Mat hist;
|
Mat hist;
|
||||||
const int hist_size[] = { 100 };
|
const int hist_size[] = { 100, 100 };
|
||||||
// base run
|
// base run
|
||||||
EXPECT_NO_THROW(cv::calcHist(&img, 1, channels, noArray(), hist, 1, hist_size, ranges, true));
|
EXPECT_NO_THROW(cv::calcHist(&img, 1, channels, noArray(), hist, 1, hist_size, ranges, true));
|
||||||
// bad parameters
|
// bad parameters
|
||||||
|
Loading…
Reference in New Issue
Block a user