mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 13:10:12 +08:00
Merge pull request #2470 from ilya-lavrenov:tapi_reduction
This commit is contained in:
commit
c5bf1c108c
@ -344,7 +344,7 @@ OCL_PERF_TEST_P(FlipFixture, Flip,
|
||||
typedef Size_MatType MinMaxLocFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MinMaxLocFixture, MinMaxLoc,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
@ -380,7 +380,7 @@ typedef Size_MatType SumFixture;
|
||||
|
||||
OCL_PERF_TEST_P(SumFixture, Sum,
|
||||
::testing::Combine(OCL_TEST_SIZES,
|
||||
OCL_TEST_TYPES))
|
||||
OCL_TEST_TYPES_134))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
@ -652,7 +652,8 @@ OCL_PERF_TEST_P(SetIdentityFixture, SetIdentity,
|
||||
typedef Size_MatType MeanStdDevFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MeanStdDevFixture, MeanStdDev,
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES))
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
|
||||
OCL_TEST_TYPES_134))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
@ -688,7 +689,8 @@ typedef std::tr1::tuple<Size, MatType, NormType> NormParams;
|
||||
typedef TestBaseWithParam<NormParams> NormFixture;
|
||||
|
||||
OCL_PERF_TEST_P(NormFixture, Norm,
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES, NormType::all()))
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
|
||||
OCL_TEST_TYPES_134, NormType::all()))
|
||||
{
|
||||
const NormParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
@ -711,7 +713,8 @@ OCL_PERF_TEST_P(NormFixture, Norm,
|
||||
typedef Size_MatType UMatDotFixture;
|
||||
|
||||
OCL_PERF_TEST_P(UMatDotFixture, UMatDot,
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES))
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
|
||||
OCL_TEST_TYPES_134))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
@ -820,7 +823,8 @@ typedef tuple<Size, MatType, NormalizeModes> NormalizeParams;
|
||||
typedef TestBaseWithParam<NormalizeParams> NormalizeFixture;
|
||||
|
||||
OCL_PERF_TEST_P(NormalizeFixture, Normalize,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, NormalizeModes::all()))
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134,
|
||||
NormalizeModes::all()))
|
||||
{
|
||||
const NormalizeParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
|
@ -52,6 +52,18 @@
|
||||
|
||||
#define noconvert
|
||||
|
||||
#if cn != 3
|
||||
#define loadpix(addr) *(__global const srcT *)(addr)
|
||||
#define storepix(val, addr) *(__global dstT *)(addr) = val
|
||||
#define srcTSIZE (int)sizeof(srcT)
|
||||
#define dstTSIZE (int)sizeof(dstT)
|
||||
#else
|
||||
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
|
||||
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
|
||||
#define srcTSIZE ((int)sizeof(srcT1)*3)
|
||||
#define dstTSIZE ((int)sizeof(dstT1)*3)
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MASK
|
||||
#define EXTRA_PARAMS , __global const uchar * mask, int mask_step, int mask_offset
|
||||
#else
|
||||
@ -88,19 +100,20 @@
|
||||
|
||||
#ifdef HAVE_MASK
|
||||
#define REDUCE_GLOBAL \
|
||||
dstT temp = convertToDT(src[0]); \
|
||||
int mask_index = mad24(id / cols, mask_step, mask_offset + (id % cols)); \
|
||||
if (mask[mask_index]) \
|
||||
FUNC(accumulator, temp)
|
||||
{ \
|
||||
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||
FUNC(accumulator, temp); \
|
||||
}
|
||||
#elif defined OP_DOT
|
||||
#define REDUCE_GLOBAL \
|
||||
int src2_index = mad24(id / cols, src2_step, mad24(id % cols, (int)sizeof(srcT), src2_offset)); \
|
||||
__global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index); \
|
||||
dstT temp = convertToDT(src[0]), temp2 = convertToDT(src2[0]); \
|
||||
int src2_index = mad24(id / cols, src2_step, mad24(id % cols, srcTSIZE, src2_offset)); \
|
||||
dstT temp = convertToDT(loadpix(srcptr + src_index)), temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
|
||||
FUNC(accumulator, temp, temp2)
|
||||
#else
|
||||
#define REDUCE_GLOBAL \
|
||||
dstT temp = convertToDT(src[0]); \
|
||||
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
|
||||
FUNC(accumulator, temp)
|
||||
#endif
|
||||
|
||||
@ -111,8 +124,7 @@
|
||||
#define REDUCE_LOCAL_2 \
|
||||
localmem[lid] += localmem[lid2]
|
||||
#define CALC_RESULT \
|
||||
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
|
||||
dst[0] = localmem[0]
|
||||
storepix(localmem[0], dstptr + dstTSIZE * gid)
|
||||
|
||||
// countNonZero stuff
|
||||
#elif defined OP_COUNT_NON_ZERO
|
||||
@ -123,7 +135,7 @@
|
||||
dstT accumulator = (dstT)(0); \
|
||||
srcT zero = (srcT)(0), one = (srcT)(1)
|
||||
#define REDUCE_GLOBAL \
|
||||
accumulator += src[0] == zero ? zero : one
|
||||
accumulator += loadpix(srcptr + src_index) == zero ? zero : one
|
||||
#define SET_LOCAL_1 \
|
||||
localmem[lid] = accumulator
|
||||
#define REDUCE_LOCAL_1 \
|
||||
@ -131,8 +143,7 @@
|
||||
#define REDUCE_LOCAL_2 \
|
||||
localmem[lid] += localmem[lid2]
|
||||
#define CALC_RESULT \
|
||||
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
|
||||
dst[0] = localmem[0]
|
||||
storepix(localmem[0], dstptr + dstTSIZE * gid)
|
||||
|
||||
// minMaxLoc stuff
|
||||
#elif defined OP_MIN_MAX_LOC || defined OP_MIN_MAX_LOC_MASK
|
||||
@ -167,6 +178,8 @@
|
||||
#define MAX_VAL DBL_MAX
|
||||
#endif
|
||||
|
||||
#define dstT srcT
|
||||
|
||||
#define DECLARE_LOCAL_MEM \
|
||||
__local srcT localmem_min[WGS2_ALIGNED]; \
|
||||
__local srcT localmem_max[WGS2_ALIGNED]; \
|
||||
@ -181,7 +194,7 @@
|
||||
srcT temp; \
|
||||
int temploc
|
||||
#define REDUCE_GLOBAL \
|
||||
temp = src[0]; \
|
||||
temp = loadpix(srcptr + src_index); \
|
||||
temploc = id; \
|
||||
srcT temp_minval = minval, temp_maxval = maxval; \
|
||||
minval = min(minval, temp); \
|
||||
@ -217,10 +230,8 @@
|
||||
localmem_maxloc[lid] : (max1 == max2) ? (max1 == oldmax) ? min(localmem_maxloc[lid2],localmem_maxloc[lid]) : \
|
||||
localmem_maxloc[lid2] : localmem_maxloc[lid]
|
||||
#define CALC_RESULT \
|
||||
__global srcT * dstminval = (__global srcT *)(dstptr + (int)sizeof(srcT) * gid); \
|
||||
__global srcT * dstmaxval = (__global srcT *)(dstptr2 + (int)sizeof(srcT) * gid); \
|
||||
dstminval[0] = localmem_min[0]; \
|
||||
dstmaxval[0] = localmem_max[0]; \
|
||||
storepix(localmem_min[0], dstptr + dstTSIZE * gid); \
|
||||
storepix(localmem_max[0], dstptr2 + dstTSIZE * gid); \
|
||||
dstlocptr[gid] = localmem_minloc[0]; \
|
||||
dstlocptr2[gid] = localmem_maxloc[0]
|
||||
|
||||
@ -236,7 +247,7 @@
|
||||
int temploc
|
||||
#undef REDUCE_GLOBAL
|
||||
#define REDUCE_GLOBAL \
|
||||
temp = src[0]; \
|
||||
temp = loadpix(srcptr + src_index); \
|
||||
temploc = id; \
|
||||
int mask_index = mad24(id / cols, mask_step, mask_offset + (id % cols) * (int)sizeof(uchar)); \
|
||||
__global const uchar * mask = (__global const uchar *)(maskptr + mask_index); \
|
||||
@ -278,8 +289,7 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
|
||||
|
||||
for (int grain = groupnum * WGS; id < total; id += grain)
|
||||
{
|
||||
int src_index = mad24(id / cols, src_step, mad24(id % cols, (int)sizeof(srcT), src_offset));
|
||||
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
|
||||
int src_index = mad24(id / cols, src_step, mad24(id % cols, srcTSIZE, src_offset));
|
||||
REDUCE_GLOBAL;
|
||||
}
|
||||
|
||||
|
@ -475,7 +475,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
if ( (!doubleSupport && depth == CV_64F) || cn > 4 || cn == 3 )
|
||||
if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
|
||||
return false;
|
||||
|
||||
int dbsize = ocl::Device::getDefault().maxComputeUnits();
|
||||
@ -494,8 +494,11 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
|
||||
static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" };
|
||||
char cvt[40];
|
||||
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
|
||||
format("-D srcT=%s -D dstT=%s -D ddepth=%d -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
|
||||
ocl::typeToStr(type), ocl::typeToStr(dtype), ddepth, ocl::convertTypeStr(depth, ddepth, cn, cvt),
|
||||
format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
|
||||
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
|
||||
ocl::typeToStr(type), ocl::typeToStr(depth),
|
||||
ocl::typeToStr(dtype), ocl::typeToStr(ddepth), ddepth, cn,
|
||||
ocl::convertTypeStr(depth, ddepth, cn, cvt),
|
||||
opMap[sum_op], (int)wgs, wgs2_aligned,
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||
haveMask ? " -D HAVE_MASK" : ""));
|
||||
|
Loading…
Reference in New Issue
Block a user