Merge pull request #1633 from ilya-lavrenov:ocl_imgproc

This commit is contained in:
Andrey Pavlenko 2013-10-21 15:58:59 +04:00 committed by OpenCV Buildbot
commit bd1a1cc031
11 changed files with 1486 additions and 2497 deletions

View File

@ -52,25 +52,24 @@ using namespace cv::ocl;
void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
{
CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type());
Context *clCxt = src.clCxt;
const std::string kernelName = "columnSum";
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
std::vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset));
size_t globalThreads[3] = {dst.cols, 1, 1};
size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(src.clCxt, &imgproc_columnsum, "columnSum", globalThreads, localThreads, args, src.oclchannels(), src.depth());
}

View File

@ -183,111 +183,89 @@ namespace cv
void remap( const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int borderType, const Scalar &borderValue )
{
Context *clCxt = src.clCxt;
bool supportsDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!supportsDouble && src.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
return;
}
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) ||
(map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
CV_Assert(!map2.data || map2.size() == map1.size());
CV_Assert(dst.size() == map1.size());
CV_Assert(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_WRAP
|| borderType == BORDER_REFLECT_101 || borderType == BORDER_REFLECT);
dst.create(map1.size(), src.type());
string kernelName;
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const channelMap[] = { "", "", "2", "4", "4" };
const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
"BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
string kernelName = "remap";
if ( map1.type() == CV_32FC2 && !map2.data )
{
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNFConstant";
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNFConstant";
}
kernelName += "_32FC2";
else if (map1.type() == CV_16SC2 && !map2.data)
{
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNSConstant";
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNSConstant";
}
kernelName += "_16SC2";
else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
{
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNF1Constant";
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNF1Constant";
}
size_t blkSizeX = 16, blkSizeY = 16;
size_t glbSizeX;
int cols = dst.cols;
if (src.type() == CV_8UC1)
{
cols = (dst.cols + dst.offset % 4 + 3) / 4;
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
}
else if (src.type() == CV_32FC1 && interpolation == INTER_LINEAR)
{
cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4;
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
}
kernelName += "_2_32FC1";
else
glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
CV_Error(CV_StsBadArg, "Unsupported map types");
size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
int ocn = dst.oclchannels();
size_t localThreads[3] = { 16, 16, 1};
size_t globalThreads[3] = { dst.cols, dst.rows, 1};
Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue);
std::string buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation],
borderMap[borderType], typeMap[src.depth()], channelMap[ocn]);
if (interpolation != INTER_NEAREST)
{
int wdepth = std::max(CV_32F, dst.depth());
if (!supportsDouble)
wdepth = std::min(CV_32F, wdepth);
buildOptions += format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s"
" -D convertToWT2=convert_%s2 -D WT2=%s2",
typeMap[wdepth], channelMap[ocn],
typeMap[src.depth()], channelMap[ocn], src.depth() < CV_32F ? "_sat_rte" : "",
typeMap[wdepth], channelMap[ocn],
typeMap[wdepth], typeMap[wdepth]);
}
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int map1_step = map1.step / map1.elemSize(), map1_offset = map1.offset / map1.elemSize();
int map2_step = map2.step / map2.elemSize(), map2_offset = map2.offset / map2.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
vector< pair<size_t, const void *> > args;
if (map1.channels() == 2)
{
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
else
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
if (map1.channels() == 1)
{
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
if (!map2.empty())
args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
else
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1_offset));
if (!map2.empty())
args.push_back( make_pair(sizeof(cl_int), (void *)&map2_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1_step));
if (!map2.empty())
args.push_back( make_pair(sizeof(cl_int), (void *)&map2_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(scalar.elemSize(), (void *)scalar.data));
openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
}
////////////////////////////////////////////////////////////////////////////////////////////
@ -448,31 +426,47 @@ namespace cv
void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar)
{
CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0);
if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
if (((bordertype & cv::BORDER_ISOLATED) == 0) &&
(bordertype != cv::BORDER_CONSTANT) &&
(bordertype != cv::BORDER_REPLICATE))
{
CV_Error(CV_StsBadArg, "Unsupported border type");
}
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
return;
}
oclMat _src = src;
CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0);
if( _src.offset != 0 && (bordertype & BORDER_ISOLATED) == 0 )
{
Size wholeSize;
Point ofs;
_src.locateROI(wholeSize, ofs);
int dtop = std::min(ofs.y, top);
int dbottom = std::min(wholeSize.height - _src.rows - ofs.y, bottom);
int dleft = std::min(ofs.x, left);
int dright = std::min(wholeSize.width - _src.cols - ofs.x, right);
_src.adjustROI(dtop, dbottom, dleft, dright);
top -= dtop;
left -= dleft;
bottom -= dbottom;
right -= dright;
}
bordertype &= ~cv::BORDER_ISOLATED;
// TODO need to remove this conditions and fix the code
if (bordertype == cv::BORDER_REFLECT || bordertype == cv::BORDER_WRAP)
{
CV_Assert((src.cols >= left) && (src.cols >= right) && (src.rows >= top) && (src.rows >= bottom));
CV_Assert((_src.cols >= left) && (_src.cols >= right) && (_src.rows >= top) && (_src.rows >= bottom));
}
else if (bordertype == cv::BORDER_REFLECT_101)
{
CV_Assert((src.cols > left) && (src.cols > right) && (src.rows > top) && (src.rows > bottom));
CV_Assert((_src.cols > left) && (_src.cols > right) && (_src.rows > top) && (_src.rows > bottom));
}
dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
int srcStep = src.step1() / src.oclchannels(), dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.elemSize(), dstOffset = dst.offset / dst.elemSize();
int depth = src.depth(), ochannels = src.oclchannels();
dst.create(_src.rows + top + bottom, _src.cols + left + right, _src.type());
int srcStep = _src.step1() / _src.oclchannels(), dstStep = dst.step1() / dst.oclchannels();
int srcOffset = _src.offset / _src.elemSize(), dstOffset = dst.offset / dst.elemSize();
int depth = _src.depth(), ochannels = _src.oclchannels();
int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101};
const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
@ -483,19 +477,19 @@ namespace cv
break;
if (bordertype_index == sizeof(__bordertype) / sizeof(int))
CV_Error(CV_StsBadArg, "unsupported border type");
CV_Error(CV_StsBadArg, "Unsupported border type");
string kernelName = "copymakeborder";
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&_src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&_src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&_src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
@ -1314,6 +1308,8 @@ namespace cv
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
String kernelName = "calcLut";
size_t localThreads[3] = { 32, 8, 1 };
@ -1333,7 +1329,7 @@ namespace cv
}
static void transform(const oclMat &src, oclMat &dst, const oclMat &lut,
const int tilesX, const int tilesY, const cv::Size tileSize)
const int tilesX, const int tilesY, const Size & tileSize)
{
cl_int2 tile_size;
tile_size.s[0] = tileSize.width;
@ -1351,6 +1347,9 @@ namespace cv
args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.offset ));
size_t localThreads[3] = { 32, 8, 1 };
size_t globalThreads[3] = { src.cols, src.rows, 1 };
@ -1419,9 +1418,10 @@ namespace cv
}
else
{
cv::ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar());
ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0,
tilesX_ - (src.cols % tilesX_), BORDER_REFLECT_101, Scalar::all(0));
tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
tileSize = Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
srcForLut = srcExt_;
}
@ -1579,30 +1579,31 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st
{
dst.create(src.size(), src.type());
int channels = dst.oclchannels(), depth = dst.depth();
size_t vector_length = 1;
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
int rows = dst.rows;
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { cols, rows, 1 };
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
int temp1_step = temp1.step / temp1.elemSize(), temp1_offset = temp1.offset / temp1.elemSize();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&temp1.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_offset ));
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, dst.depth());
}
void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y)
{
CV_Assert(x.depth() == CV_32F && t.depth() == CV_32F);

View File

@ -53,12 +53,8 @@ int calc_lut(__local int* smem, int val, int tid)
barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0)
{
for (int i = 1; i < 256; ++i)
{
smem[i] += smem[i - 1];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
return smem[tid];
@ -71,69 +67,51 @@ void reduce(volatile __local int* smem, int val, int tid)
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128)
{
smem[tid] = val += smem[tid + 128];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64)
{
smem[tid] = val += smem[tid + 64];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem[tid] += smem[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
smem[tid] += smem[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
smem[tid] += smem[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
smem[tid] += smem[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
smem[tid] += smem[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
smem[256] = smem[tid] + smem[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
#else
void reduce(__local volatile int* smem, int val, int tid)
{
smem[tid] = val;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128)
{
smem[tid] = val += smem[tid + 128];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64)
{
smem[tid] = val += smem[tid + 64];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
@ -141,12 +119,17 @@ void reduce(__local volatile int* smem, int val, int tid)
smem[tid] += smem[tid + 32];
#if WAVE_SIZE < 32
} barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) {
if (tid < 16)
{
#endif
smem[tid] += smem[tid + 16];
#if WAVE_SIZE < 16
} barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8) {
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
smem[tid] += smem[tid + 8];
smem[tid] += smem[tid + 4];
@ -159,7 +142,8 @@ void reduce(__local volatile int* smem, int val, int tid)
__kernel void calcLut(__global __const uchar * src, __global uchar * lut,
const int srcStep, const int dstStep,
const int2 tileSize, const int tilesX,
const int clipLimit, const float lutScale)
const int clipLimit, const float lutScale,
const int src_offset, const int dst_offset)
{
__local int smem[512];
@ -173,25 +157,21 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
{
__global const uchar* srcPtr = src + mad24( ty * tileSize.y + i,
srcStep, tx * tileSize.x );
__global const uchar* srcPtr = src + mad24(ty * tileSize.y + i, srcStep, tx * tileSize.x + src_offset);
for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
{
const int data = srcPtr[j];
atomic_inc(&smem[data]);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
int tHistVal = smem[tid];
barrier(CLK_LOCAL_MEM_FENCE);
if (clipLimit > 0)
{
// clip histogram bar
int clipped = 0;
if (tHistVal > clipLimit)
{
@ -200,7 +180,6 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
}
// find number of overall clipped samples
reduce(smem, clipped, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
@ -229,7 +208,7 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
const int lutVal = calc_lut(smem, tHistVal, tid);
uint ires = (uint)convert_int_rte(lutScale * lutVal);
lut[(ty * tilesX + tx) * dstStep + tid] =
lut[(ty * tilesX + tx) * dstStep + tid + dst_offset] =
convert_uchar(clamp(ires, (uint)0, (uint)255));
}
@ -239,7 +218,8 @@ __kernel void transform(__global __const uchar * src,
const int srcStep, const int dstStep, const int lutStep,
const int cols, const int rows,
const int2 tileSize,
const int tilesX, const int tilesY)
const int tilesX, const int tilesY,
const int src_offset, const int dst_offset, int lut_offset)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
@ -261,15 +241,15 @@ __kernel void transform(__global __const uchar * src,
tx1 = max(tx1, 0);
tx2 = min(tx2, tilesX - 1);
const int srcVal = src[mad24(y, srcStep, x)];
const int srcVal = src[mad24(y, srcStep, x + src_offset)];
float res = 0;
res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (1.0f - ya));
res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (1.0f - ya));
res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (ya));
res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (ya));
res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (1.0f - ya));
res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (1.0f - ya));
res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (ya));
res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (ya));
uint ires = (uint)convert_int_rte(res);
dst[mad24(y, dstStep, x)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
dst[mad24(y, dstStep, x + dst_offset)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
}

View File

@ -43,38 +43,28 @@
//
//M*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
////////////////////////////////////////////////////////////////////
///////////////////////// columnSum ////////////////////////////////
////////////////////////////////////////////////////////////////////
/// CV_32FC1
__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep)
__kernel void columnSum_C1_D5(__global float * src, __global float * dst,
int cols, int rows, int src_step, int dst_step, int src_offset, int dst_offset)
{
const int x = get_global_id(0);
srcStep >>= 2;
dstStep >>= 2;
if (x < srcCols)
if (x < cols)
{
int srcIdx = x ;
int dstIdx = x ;
int srcIdx = x + src_offset;
int dstIdx = x + dst_offset;
float sum = 0;
for (int y = 0; y < srcRows; ++y)
for (int y = 0; y < rows; ++y)
{
sum += src[srcIdx];
dst[dstIdx] = sum;
srcIdx += srcStep;
dstIdx += dstStep;
srcIdx += src_step;
dstIdx += dst_step;
}
}
}

View File

@ -48,9 +48,12 @@
#elif defined (__NVIDIA__)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
/************************************** convolve **************************************/
__kernel void convolve_D5 (__global float *src, __global float *temp1, __global float *dst,
int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight)
__kernel void convolve_D5(__global float *src, __global float *temp1, __global float *dst,
int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight,
int src_offset, int dst_offset, int koffset)
{
__local float smem[16 + 2 * 8][16 + 2 * 8];
@ -65,7 +68,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
// 0 | 0 0 | 0
// -----------
// 0 | 0 0 | 0
smem[y][x] = src[min(max(gy - 8, 0), rows - 1)*(src_step >> 2) + min(max(gx - 8, 0), cols - 1)];
smem[y][x] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
// 0 | 0 x | x
// -----------
@ -73,7 +76,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
// 0 | 0 0 | 0
// -----------
// 0 | 0 0 | 0
smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1)*(src_step >> 2) + min(gx + 8, cols - 1)];
smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
// 0 | 0 0 | 0
// -----------
@ -81,7 +84,7 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
// x | x 0 | 0
// -----------
// x | x 0 | 0
smem[y + 16][x] = src[min(gy + 8, rows - 1)*(src_step >> 2) + min(max(gx - 8, 0), cols - 1)];
smem[y + 16][x] = src[min(gy + 8, rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
// 0 | 0 0 | 0
// -----------
@ -89,21 +92,18 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
// 0 | 0 x | x
// -----------
// 0 | 0 x | x
smem[y + 16][x + 16] = src[min(gy + 8, rows - 1)*(src_step >> 2) + min(gx + 8, cols - 1)];
smem[y + 16][x + 16] = src[min(gy + 8, rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
barrier(CLK_LOCAL_MEM_FENCE);
if (gx < cols && gy < rows)
{
float res = 0;
float res = 0;
for (int i = 0; i < kHeight; ++i)
{
for (int j = 0; j < kWidth; ++j)
{
res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * (k_step>>2) + j];
}
}
dst[gy*(dst_step >> 2)+gx] = res;
}
res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * k_step + j + koffset];
dst[gy * dst_step + gx + dst_offset] = res;
}
}

View File

@ -34,6 +34,13 @@
//
//
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
#ifdef BORDER_CONSTANT
//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii

File diff suppressed because it is too large Load Diff

View File

@ -62,8 +62,7 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
int, // border type, or iteration
bool) // roi or not
{
int type, borderType;
int ksize;
int type, borderType, ksize;
bool useRoi;
Mat src, dst_whole, src_roi, dst_roi;
@ -92,8 +91,12 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
void Near(double threshold = 0.0)
{
EXPECT_MAT_NEAR(dst_whole, Mat(gdst_whole), threshold);
EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), threshold);
Mat roi, whole;
gdst_whole.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst_whole, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,408 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Shengen Yan, yanshengen@gmail.com
// Jiang Liyuan, lyuan001.good@163.com
// Rock Li, Rock.Li@amd.com
// Wu Zailong, bullet@yeah.net
// Xu Pang, pangxu010@163.com
// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_OPENCL
using namespace testing;
using namespace std;
using namespace cv;
typedef struct
{
short x;
short y;
} COOR;
COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, Size size, int sp, int sr, int maxIter, float eps, int *tab)
{
int isr2 = sr * sr;
int c0, c1, c2, c3;
int iter;
uchar *ptr = NULL;
uchar *pstart = NULL;
int revx = 0, revy = 0;
c0 = sptr[0];
c1 = sptr[1];
c2 = sptr[2];
c3 = sptr[3];
// iterate meanshift procedure
for(iter = 0; iter < maxIter; iter++ )
{
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
int minx = x0 - sp;
int miny = y0 - sp;
int maxx = x0 + sp;
int maxy = y0 + sp;
//deal with the image boundary
if(minx < 0) minx = 0;
if(miny < 0) miny = 0;
if(maxx >= size.width) maxx = size.width - 1;
if(maxy >= size.height) maxy = size.height - 1;
if(iter == 0)
{
pstart = sptr;
}
else
{
pstart = pstart + revy * sstep + (revx << 2); //point to the new position
}
ptr = pstart;
ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
{
int rowCount = 0;
int x = minx;
#if CV_ENABLE_UNROLLED
for( ; x + 4 <= maxx; x += 4, ptr += 16)
{
int t0, t1, t2;
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 1;
rowCount++;
}
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 2;
rowCount++;
}
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 3;
rowCount++;
}
}
#endif
for(; x <= maxx; x++, ptr += 4)
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
}
if(rowCount == 0)
continue;
count += rowCount;
sy += y * rowCount;
}
if( count == 0 )
break;
int x1 = sx / count;
int y1 = sy / count;
s0 = s0 / count;
s1 = s1 / count;
s2 = s2 / count;
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
//revise the pointer corresponding to the new (y0,x0)
revx = x1 - x0;
revy = y1 - y0;
x0 = x1;
y0 = y1;
c0 = s0;
c1 = s1;
c2 = s2;
if( stopFlag )
break;
} //for iter
dptr[0] = (uchar)c0;
dptr[1] = (uchar)c1;
dptr[2] = (uchar)c2;
dptr[3] = (uchar)c3;
COOR coor;
coor.x = (short)x0;
coor.y = (short)y0;
return coor;
}
void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, TermCriteria crit)
{
if( src_roi.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
CV_Assert( !(dst_roi.step & 0x3) );
if( !(crit.type & TermCriteria::MAX_ITER) )
crit.maxCount = 5;
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if( !(crit.type & TermCriteria::EPS) )
eps = 1.f;
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for(int i = 0; i < 512; i++)
tab[i] = (i - 255) * (i - 255);
uchar *sptr = src_roi.data;
uchar *dptr = dst_roi.data;
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
Size size = src_roi.size();
for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2))
{
for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4)
{
do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
}
}
}
void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, TermCriteria crit)
{
if( src_roi.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
(src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
CV_Assert( !(dstCoor_roi.step & 0x3) );
if( !(crit.type & TermCriteria::MAX_ITER) )
crit.maxCount = 5;
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if( !(crit.type & TermCriteria::EPS) )
eps = 1.f;
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for(int i = 0; i < 512; i++)
tab[i] = (i - 255) * (i - 255);
uchar *sptr = src_roi.data;
uchar *dptr = dst_roi.data;
short *dCoorptr = (short *)dstCoor_roi.data;
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
int dCoorstep = (int)dstCoor_roi.step >> 1;
Size size = src_roi.size();
for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1))
{
for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2)
{
*((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
}
}
}
//////////////////////////////// meanShift //////////////////////////////////////////
PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, TermCriteria, bool)
{
int type, typeCoor;
int sp, sr;
TermCriteria crit;
bool useRoi;
// src mat
Mat src, src_roi;
Mat dst, dst_roi;
Mat dstCoor, dstCoor_roi;
// ocl dst mat
ocl::oclMat gsrc, gsrc_roi;
ocl::oclMat gdst, gdst_roi;
ocl::oclMat gdstCoor, gdstCoor_roi;
virtual void SetUp()
{
type = GET_PARAM(0);
typeCoor = GET_PARAM(1);
sp = GET_PARAM(2);
sr = GET_PARAM(3);
crit = GET_PARAM(4);
useRoi = GET_PARAM(5);
}
void random_roi()
{
Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
generateOclMat(gsrc, gsrc_roi, src, roiSize, srcBorder);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 256);
generateOclMat(gdst, gdst_roi, dst, roiSize, dstBorder);
randomSubMat(dstCoor, dstCoor_roi, roiSize, dstBorder, typeCoor, 5, 256);
generateOclMat(gdstCoor, gdstCoor_roi, dstCoor, roiSize, dstBorder);
}
void Near(double threshold = 0.0)
{
Mat whole, roi;
gdst.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
void Near1(double threshold = 0.0)
{
Mat whole, roi;
gdstCoor.download(whole);
gdstCoor_roi.download(roi);
EXPECT_MAT_NEAR(dstCoor, whole, threshold);
EXPECT_MAT_NEAR(dstCoor_roi, roi, threshold);
}
};
/////////////////////////meanShiftFiltering/////////////////////////////
typedef meanShiftTestBase meanShiftFiltering;
OCL_TEST_P(meanShiftFiltering, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
meanShiftFiltering_(src_roi, dst_roi, sp, sr, crit);
ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
Near();
}
}
///////////////////////////meanShiftProc//////////////////////////////////
typedef meanShiftTestBase meanShiftProc;
OCL_TEST_P(meanShiftProc, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
meanShiftProc_(src_roi, dst_roi, dstCoor_roi, sp, sr, crit);
ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
Near();
Near1();
}
}
/////////////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine(
Values((MatType)CV_8UC4),
Values((MatType)CV_16SC2),
Values(5),
Values(6),
Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)),
Bool()
));
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
Values((MatType)CV_8UC4),
Values((MatType)CV_16SC2),
Values(5),
Values(6),
Values(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 5, 1)),
Bool()
));
#endif // HAVE_OPENCL

View File

@ -0,0 +1,371 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Shengen Yan, yanshengen@gmail.com
// Jiang Liyuan, lyuan001.good@163.com
// Rock Li, Rock.Li@amd.com
// Wu Zailong, bullet@yeah.net
// Xu Pang, pangxu010@163.com
// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_OPENCL
using namespace cv;
using namespace testing;
using namespace std;
static MatType noType = -1;
/////////////////////////////////////////////////////////////////////////////////////////////////
// warpAffine & warpPerspective
PARAM_TEST_CASE(WarpTestBase, MatType, Interpolation, bool, bool)
{
int type, interpolation;
Size dsize;
bool useRoi, mapInverse;
Mat src, dst_whole, src_roi, dst_roi;
ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
virtual void SetUp()
{
type = GET_PARAM(0);
interpolation = GET_PARAM(1);
mapInverse = GET_PARAM(2);
useRoi = GET_PARAM(3);
if (mapInverse)
interpolation |= WARP_INVERSE_MAP;
}
void random_roi()
{
Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst_whole, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
dsize = randomSize(1, MAX_VALUE);
}
void Near(double threshold = 0.0)
{
Mat whole, roi;
gdst_whole.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst_whole, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
};
/////warpAffine
typedef WarpTestBase WarpAffine;
OCL_TEST_P(WarpAffine, Mat)
{
static const double coeffs[2][3] =
{
{ cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
{ sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
};
static Mat M(2, 3, CV_64FC1, (void *)coeffs);
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
warpAffine(src_roi, dst_roi, M, dsize, interpolation);
ocl::warpAffine(gsrc_roi, gdst_roi, M, dsize, interpolation);
Near(1.0);
}
}
// warpPerspective
typedef WarpTestBase WarpPerspective;
OCL_TEST_P(WarpPerspective, Mat)
{
static const double coeffs[3][3] =
{
{ cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
{ sin(CV_PI / 6), cos(CV_PI / 6), -100.0 },
{ 0.0, 0.0, 1.0 }
};
static Mat M(3, 3, CV_64FC1, (void *)coeffs);
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
warpPerspective(src_roi, dst_roi, M, dsize, interpolation);
ocl::warpPerspective(gsrc_roi, gdst_roi, M, dsize, interpolation);
Near(1.0);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// remap
PARAM_TEST_CASE(Remap, MatDepth, Channels, pair<MatType, MatType>, Border, bool)
{
int srcType, map1Type, map2Type;
int borderType;
bool useRoi;
Scalar val;
Mat src, src_roi;
Mat dst, dst_roi;
Mat map1, map1_roi;
Mat map2, map2_roi;
// ocl mat with roi
ocl::oclMat gsrc, gsrc_roi;
ocl::oclMat gdst, gdst_roi;
ocl::oclMat gmap1, gmap1_roi;
ocl::oclMat gmap2, gmap2_roi;
virtual void SetUp()
{
srcType = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
map1Type = GET_PARAM(2).first;
map2Type = GET_PARAM(2).second;
borderType = GET_PARAM(3);
useRoi = GET_PARAM(4);
}
void random_roi()
{
val = randomScalar(-MAX_VALUE, MAX_VALUE);
Size srcROISize = randomSize(1, MAX_VALUE);
Size dstROISize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, srcROISize, srcBorder, srcType, 5, 256);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, dstROISize, dstBorder, srcType, -MAX_VALUE, MAX_VALUE);
int mapMaxValue = MAX_VALUE << 2;
Border map1Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(map1, map1_roi, dstROISize, map1Border, map1Type, -mapMaxValue, mapMaxValue);
Border map2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
if (map2Type != noType)
randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, -mapMaxValue, mapMaxValue);
generateOclMat(gsrc, gsrc_roi, src, srcROISize, srcBorder);
generateOclMat(gdst, gdst_roi, dst, dstROISize, dstBorder);
generateOclMat(gmap1, gmap1_roi, map1, dstROISize, map1Border);
if (noType != map2Type)
generateOclMat(gmap2, gmap2_roi, map2, dstROISize, map2Border);
}
void Near(double threshold = 0.0)
{
Mat whole, roi;
gdst.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
};
typedef Remap Remap_INTER_NEAREST;
OCL_TEST_P(Remap_INTER_NEAREST, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_NEAREST, borderType, val);
ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, INTER_NEAREST, borderType, val);
Near(1.0);
}
}
typedef Remap Remap_INTER_LINEAR;
OCL_TEST_P(Remap_INTER_LINEAR, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_LINEAR, borderType, val);
ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, INTER_LINEAR, borderType, val);
Near(2.0);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// resize
PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool)
{
int type, interpolation;
double fx, fy;
bool useRoi;
Mat src, dst_whole, src_roi, dst_roi;
ocl::oclMat gsrc_whole, gsrc_roi, gdst_whole, gdst_roi;
virtual void SetUp()
{
type = GET_PARAM(0);
fx = GET_PARAM(1);
fy = GET_PARAM(2);
interpolation = GET_PARAM(3);
useRoi = GET_PARAM(4);
}
void random_roi()
{
Size srcRoiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Size dstRoiSize;
dstRoiSize.width = cvRound(srcRoiSize.width * fx);
dstRoiSize.height = cvRound(srcRoiSize.height * fy);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst_whole, dst_roi, dstRoiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
generateOclMat(gsrc_whole, gsrc_roi, src, srcRoiSize, srcBorder);
generateOclMat(gdst_whole, gdst_roi, dst_whole, dstRoiSize, dstBorder);
}
void Near(double threshold = 0.0)
{
Mat whole, roi;
gdst_whole.download(whole);
gdst_roi.download(roi);
EXPECT_MAT_NEAR(dst_whole, whole, threshold);
EXPECT_MAT_NEAR(dst_roi, roi, threshold);
}
};
OCL_TEST_P(Resize, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
resize(src_roi, dst_roi, Size(), fx, fy, interpolation);
ocl::resize(gsrc_roi, gdst_roi, Size(), fx, fy, interpolation);
Near(1.0);
}
}
/////////////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpAffine, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC),
Bool(),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR, (Interpolation)INTER_CUBIC),
Bool(),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 3, 4),
Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
pair<MatType, MatType>((MatType)CV_32FC2, noType)),
Values((Border)BORDER_CONSTANT,
(Border)BORDER_REPLICATE,
(Border)BORDER_WRAP,
(Border)BORDER_REFLECT,
(Border)BORDER_REFLECT_101),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 3, 4),
Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
pair<MatType, MatType>((MatType)CV_32FC2, noType),
pair<MatType, MatType>((MatType)CV_16SC2, noType)),
Values((Border)BORDER_CONSTANT,
(Border)BORDER_REPLICATE,
(Border)BORDER_WRAP,
(Border)BORDER_REFLECT,
(Border)BORDER_REFLECT_101),
Bool()));
INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(0.5, 1.5, 2.0),
Values(0.5, 1.5, 2.0),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
Bool()));
#endif // HAVE_OPENCL