rewrote ocl::remap

This commit is contained in:
Ilya Lavrenov 2013-10-18 16:41:09 +04:00
parent 3fe74ad124
commit 2681cef5d7
2 changed files with 260 additions and 959 deletions

View File

@ -183,111 +183,88 @@ namespace cv
void remap( const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int borderType, const Scalar &borderValue )
{
Context *clCxt = src.clCxt;
bool supportsDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
if (!supportsDouble && src.depth() == CV_64F)
{
CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double");
return;
}
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) ||
(map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
CV_Assert(!map2.data || map2.size() == map1.size());
CV_Assert(dst.size() == map1.size());
CV_Assert(borderType == BORDER_CONSTANT);
dst.create(map1.size(), src.type());
string kernelName;
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const channelMap[] = { "", "", "2", "4", "4" };
const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
"BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
string kernelName = "remap";
if ( map1.type() == CV_32FC2 && !map2.data )
{
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNFConstant";
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNFConstant";
}
kernelName += "_32FC2";
else if (map1.type() == CV_16SC2 && !map2.data)
{
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNSConstant";
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNSConstant";
}
kernelName += "_16SC2";
else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
{
if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNF1Constant";
else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT)
kernelName = "remapNNF1Constant";
}
size_t blkSizeX = 16, blkSizeY = 16;
size_t glbSizeX;
int cols = dst.cols;
if (src.type() == CV_8UC1)
{
cols = (dst.cols + dst.offset % 4 + 3) / 4;
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
}
else if (src.type() == CV_32FC1 && interpolation == INTER_LINEAR)
{
cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4;
glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
}
kernelName += "_2_32FC1";
else
glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
CV_Error(CV_StsBadArg, "Unsupported map types");
size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
int ocn = dst.oclchannels();
size_t localThreads[3] = { 16, 16, 1};
size_t globalThreads[3] = { dst.cols, dst.rows, 1};
Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue);
std::string buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation],
borderMap[borderType], typeMap[src.depth()], channelMap[ocn]);
if (interpolation != INTER_NEAREST)
{
int wdepth = std::max(CV_32F, dst.depth());
if (!supportsDouble)
wdepth = std::min(CV_32F, wdepth);
buildOptions += format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s"
" -D convertToWT2=convert_%s2 -D WT2=%s2",
typeMap[wdepth], channelMap[ocn],
typeMap[src.depth()], channelMap[ocn], src.depth() < CV_32F ? "_sat_rte" : "",
typeMap[wdepth], channelMap[ocn],
typeMap[wdepth], typeMap[wdepth]);
}
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int map1_step = map1.step / map1.elemSize(), map1_offset = map1.offset / map1.elemSize();
int map2_step = map2.step / map2.elemSize(), map2_offset = map2.offset / map2.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
vector< pair<size_t, const void *> > args;
if (map1.channels() == 2)
{
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
else
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
if (map1.channels() == 1)
{
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
if (!map2.empty())
args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
else
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1_offset));
if (!map2.empty())
args.push_back( make_pair(sizeof(cl_int), (void *)&map2_offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1_step));
if (!map2.empty())
args.push_back( make_pair(sizeof(cl_int), (void *)&map2_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back( make_pair(scalar.elemSize(), (void *)scalar.data));
openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
}
////////////////////////////////////////////////////////////////////////////////////////////

File diff suppressed because it is too large Load Diff