mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 13:10:12 +08:00
vectorized ocl::threshold for single channel images
This commit is contained in:
parent
c70fbb95a3
commit
1d5f5d2364
@ -366,21 +366,23 @@ PERF_TEST_P(resizeFixture, resize,
|
||||
|
||||
///////////// threshold////////////////////////
|
||||
|
||||
CV_ENUM(ThreshType, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
|
||||
CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
|
||||
|
||||
typedef tuple<Size, ThreshType> ThreshParams;
|
||||
typedef tuple<Size, MatType, ThreshType> ThreshParams;
|
||||
typedef TestBaseWithParam<ThreshParams> ThreshFixture;
|
||||
|
||||
PERF_TEST_P(ThreshFixture, threshold,
|
||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC4, CV_32FC1),
|
||||
ThreshType::all()))
|
||||
{
|
||||
const ThreshParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int threshType = get<1>(params);
|
||||
const int srcType = get<1>(params);
|
||||
const int threshType = get<2>(params);
|
||||
const double maxValue = 220.0, threshold = 50;
|
||||
|
||||
Mat src(srcSize, CV_8U), dst(srcSize, CV_8U);
|
||||
Mat src(srcSize, srcType), dst(srcSize, srcType);
|
||||
randu(src, 0, 100);
|
||||
declare.in(src).out(dst);
|
||||
|
||||
|
@ -118,22 +118,20 @@ namespace cv
|
||||
static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
|
||||
{
|
||||
bool ival = src.depth() < CV_32F;
|
||||
int cn = src.channels(), vecSize = 4, depth = src.depth();
|
||||
std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(),
|
||||
dst.oclchannels(), dst.channels());
|
||||
std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
|
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
||||
|
||||
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
|
||||
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
|
||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||
std::string buildOptions = format("-D T=%s%s -D %s", typeMap[src.depth()], channelMap[src.channels()],
|
||||
thresholdMap[thresholdType]);
|
||||
std::string buildOptions = format("-D T=%s%s -D %s", typeMap[depth], channelMap[cn], thresholdMap[thresholdType]);
|
||||
|
||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
int elemSize = src.elemSize();
|
||||
int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
|
||||
int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
@ -142,11 +140,32 @@ namespace cv
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
|
||||
args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0]));
|
||||
|
||||
int max_index = dst.cols, cols = dst.cols;
|
||||
if (cn == 1 && vecSize > 1)
|
||||
{
|
||||
CV_Assert(((vecSize - 1) & vecSize) == 0 && vecSize <= 16);
|
||||
cols = divUp(cols, vecSize);
|
||||
buildOptions += format(" -D VECTORIZED -D VT=%s%d -D VLOADN=vload%d -D VECSIZE=%d -D VSTOREN=vstore%d",
|
||||
typeMap[depth], vecSize, vecSize, vecSize, vecSize);
|
||||
|
||||
int vecSizeBytes = vecSize * dst.elemSize1();
|
||||
if ((dst.offset % dst.step) % vecSizeBytes == 0 && dst.step % vecSizeBytes == 0)
|
||||
buildOptions += " -D DST_ALIGNED";
|
||||
if ((src.offset % src.step) % vecSizeBytes == 0 && src.step % vecSizeBytes == 0)
|
||||
buildOptions += " -D SRC_ALIGNED";
|
||||
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&max_index));
|
||||
}
|
||||
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
|
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
size_t globalThreads[3] = { cols, dst.rows, 1 };
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args,
|
||||
-1, -1, buildOptions.c_str());
|
||||
}
|
||||
|
@ -51,9 +51,63 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef VECTORIZED
|
||||
|
||||
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
|
||||
__global T * dst, int dst_offset, int dst_step,
|
||||
int rows, int cols, T thresh, T max_val)
|
||||
T thresh, T max_val, int max_index, int rows, int cols)
|
||||
{
|
||||
int gx = get_global_id(0);
|
||||
int gy = get_global_id(1);
|
||||
|
||||
if (gx < cols && gy < rows)
|
||||
{
|
||||
gx *= VECSIZE;
|
||||
int src_index = mad24(gy, src_step, src_offset + gx);
|
||||
int dst_index = mad24(gy, dst_step, dst_offset + gx);
|
||||
|
||||
#ifdef SRC_ALIGNED
|
||||
VT sdata = *((__global VT *)(src + src_index));
|
||||
#else
|
||||
VT sdata = VLOADN(0, src + src_index);
|
||||
#endif
|
||||
VT vthresh = (VT)(thresh), zero = (VT)(0);
|
||||
|
||||
#ifdef THRESH_BINARY
|
||||
VT vecValue = sdata > vthresh ? max_val : zero;
|
||||
#elif defined THRESH_BINARY_INV
|
||||
VT vecValue = sdata > vthresh ? zero : max_val;
|
||||
#elif defined THRESH_TRUNC
|
||||
VT vecValue = sdata > vthresh ? thresh : sdata;
|
||||
#elif defined THRESH_TOZERO
|
||||
VT vecValue = sdata > vthresh ? sdata : zero;
|
||||
#elif defined THRESH_TOZERO_INV
|
||||
VT vecValue = sdata > vthresh ? zero : sdata;
|
||||
#endif
|
||||
|
||||
if (gx + VECSIZE <= max_index)
|
||||
#ifdef DST_ALIGNED
|
||||
*(__global VT*)(dst + dst_index) = vecValue;
|
||||
#else
|
||||
VSTOREN(vecValue, 0, dst + dst_index);
|
||||
#endif
|
||||
else
|
||||
{
|
||||
T array[VECSIZE];
|
||||
VSTOREN(vecValue, 0, array);
|
||||
#pragma unroll
|
||||
for (int i = 0; i < VECSIZE; ++i)
|
||||
if (gx + i < max_index)
|
||||
dst[dst_index + i] = array[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__kernel void threshold(__global const T * restrict src, int src_offset, int src_step,
|
||||
__global T * dst, int dst_offset, int dst_step,
|
||||
T thresh, T max_val, int rows, int cols)
|
||||
{
|
||||
int gx = get_global_id(0);
|
||||
int gy = get_global_id(1);
|
||||
@ -78,3 +132,5 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user