vectorized ocl::threshold for single channel images

This commit is contained in:
Ilya Lavrenov 2013-10-30 19:02:51 +04:00
parent c70fbb95a3
commit 1d5f5d2364
3 changed files with 91 additions and 14 deletions

View File

@ -366,21 +366,23 @@ PERF_TEST_P(resizeFixture, resize,
///////////// threshold////////////////////////
CV_ENUM(ThreshType, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
typedef tuple<Size, ThreshType> ThreshParams;
typedef tuple<Size, MatType, ThreshType> ThreshParams;
typedef TestBaseWithParam<ThreshParams> ThreshFixture;
PERF_TEST_P(ThreshFixture, threshold,
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC4, CV_32FC1),
ThreshType::all()))
{
const ThreshParams params = GetParam();
const Size srcSize = get<0>(params);
const int threshType = get<1>(params);
const int srcType = get<1>(params);
const int threshType = get<2>(params);
const double maxValue = 220.0, threshold = 50;
Mat src(srcSize, CV_8U), dst(srcSize, CV_8U);
Mat src(srcSize, srcType), dst(srcSize, srcType);
randu(src, 0, 100);
declare.in(src).out(dst);

View File

@ -118,22 +118,20 @@ namespace cv
static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
{
bool ival = src.depth() < CV_32F;
int cn = src.channels(), vecSize = 4, depth = src.depth();
std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(),
dst.oclchannels(), dst.channels());
std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
const char * const channelMap[] = { "", "", "2", "4", "4" };
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
std::string buildOptions = format("-D T=%s%s -D %s", typeMap[src.depth()], channelMap[src.channels()],
thresholdMap[thresholdType]);
std::string buildOptions = format("-D T=%s%s -D %s", typeMap[depth], channelMap[cn], thresholdMap[thresholdType]);
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
int elemSize = src.elemSize();
int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
vector< pair<size_t, const void *> > args;
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
@ -142,11 +140,32 @@ namespace cv
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0]));
int max_index = dst.cols, cols = dst.cols;
if (cn == 1 && vecSize > 1)
{
CV_Assert(((vecSize - 1) & vecSize) == 0 && vecSize <= 16);
cols = divUp(cols, vecSize);
buildOptions += format(" -D VECTORIZED -D VT=%s%d -D VLOADN=vload%d -D VECSIZE=%d -D VSTOREN=vstore%d",
typeMap[depth], vecSize, vecSize, vecSize, vecSize);
int vecSizeBytes = vecSize * dst.elemSize1();
if ((dst.offset % dst.step) % vecSizeBytes == 0 && dst.step % vecSizeBytes == 0)
buildOptions += " -D DST_ALIGNED";
if ((src.offset % src.step) % vecSizeBytes == 0 && src.step % vecSizeBytes == 0)
buildOptions += " -D SRC_ALIGNED";
args.push_back( make_pair(sizeof(cl_int), (void *)&max_index));
}
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { cols, dst.rows, 1 };
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args,
-1, -1, buildOptions.c_str());
}

View File

@ -51,9 +51,63 @@
#endif
#endif
#ifdef VECTORIZED
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
__global T * dst, int dst_offset, int dst_step,
int rows, int cols, T thresh, T max_val)
T thresh, T max_val, int max_index, int rows, int cols)
{
int gx = get_global_id(0);
int gy = get_global_id(1);
if (gx < cols && gy < rows)
{
gx *= VECSIZE;
int src_index = mad24(gy, src_step, src_offset + gx);
int dst_index = mad24(gy, dst_step, dst_offset + gx);
#ifdef SRC_ALIGNED
VT sdata = *((__global VT *)(src + src_index));
#else
VT sdata = VLOADN(0, src + src_index);
#endif
VT vthresh = (VT)(thresh), zero = (VT)(0);
#ifdef THRESH_BINARY
VT vecValue = sdata > vthresh ? max_val : zero;
#elif defined THRESH_BINARY_INV
VT vecValue = sdata > vthresh ? zero : max_val;
#elif defined THRESH_TRUNC
VT vecValue = sdata > vthresh ? thresh : sdata;
#elif defined THRESH_TOZERO
VT vecValue = sdata > vthresh ? sdata : zero;
#elif defined THRESH_TOZERO_INV
VT vecValue = sdata > vthresh ? zero : sdata;
#endif
if (gx + VECSIZE <= max_index)
#ifdef DST_ALIGNED
*(__global VT*)(dst + dst_index) = vecValue;
#else
VSTOREN(vecValue, 0, dst + dst_index);
#endif
else
{
T array[VECSIZE];
VSTOREN(vecValue, 0, array);
#pragma unroll
for (int i = 0; i < VECSIZE; ++i)
if (gx + i < max_index)
dst[dst_index + i] = array[i];
}
}
}
#else
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
__global T * dst, int dst_offset, int dst_step,
T thresh, T max_val, int rows, int cols)
{
int gx = get_global_id(0);
int gy = get_global_id(1);
@ -78,3 +132,5 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
#endif
}
}
#endif