Optimize OpenCL version of conversScaleAbs function

This commit is contained in:
vbystricky 2014-10-20 17:43:18 +04:00
parent 54b59c3cfb
commit a8aa6381d9
3 changed files with 20 additions and 6 deletions

View File

@ -618,7 +618,7 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
OclVectorStrategy strat = OCL_VECTOR_DEFAULT); OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
CV_EXPORTS int checkOptimalVectorWidth(int *vectorWidths, CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),

View File

@ -3275,13 +3275,26 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta ) static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
{ {
const ocl::Device & d = ocl::Device::getDefault(); const ocl::Device & d = ocl::Device::getDefault();
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
kercn = ocl::predictOptimalVectorWidth(_src, _dst), rowsPerWI = d.isIntel() ? 4 : 1;
bool doubleSupport = d.doubleFPConfig() > 0;
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = d.doubleFPConfig() > 0;
if (!doubleSupport && depth == CV_64F) if (!doubleSupport && depth == CV_64F)
return false; return false;
_dst.create(_src.size(), CV_8UC(cn));
int kercn = 1;
if (d.isIntel())
{
static const int vectorWidths[] = {4, 4, 4, 4, 4, 4, 4, -1};
kercn = ocl::checkOptimalVectorWidth( vectorWidths, _src, _dst,
noArray(), noArray(), noArray(),
noArray(), noArray(), noArray(),
noArray(), ocl::OCL_VECTOR_MAX);
}
else
kercn = ocl::predictOptimalVectorWidthMax(_src, _dst);
int rowsPerWI = d.isIntel() ? 4 : 1;
char cvt[2][50]; char cvt[2][50];
int wdepth = std::max(depth, CV_32F); int wdepth = std::max(depth, CV_32F);
String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s"
@ -3299,7 +3312,6 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
return false; return false;
UMat src = _src.getUMat(); UMat src = _src.getUMat();
_dst.create(src.size(), CV_8UC(cn));
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),

View File

@ -4531,12 +4531,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat); return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
} }
int checkOptimalVectorWidth(int *vectorWidths, int checkOptimalVectorWidth(const int *vectorWidths,
InputArray src1, InputArray src2, InputArray src3, InputArray src1, InputArray src2, InputArray src3,
InputArray src4, InputArray src5, InputArray src6, InputArray src4, InputArray src5, InputArray src6,
InputArray src7, InputArray src8, InputArray src9, InputArray src7, InputArray src8, InputArray src9,
OclVectorStrategy strat) OclVectorStrategy strat)
{ {
CV_Assert(vectorWidths);
int ref_type = src1.type(); int ref_type = src1.type();
std::vector<size_t> offsets, steps, cols; std::vector<size_t> offsets, steps, cols;