diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp index a5e057ffca..6f611bbc34 100644 --- a/modules/ocl/perf/perf_blend.cpp +++ b/modules/ocl/perf/perf_blend.cpp @@ -47,48 +47,61 @@ #include "perf_precomp.hpp" using namespace perf; +using namespace cv; +using std::tr1::get; ///////////// blend //////////////////////// template -static void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, - const cv::Mat &weights1, const cv::Mat &weights2, - cv::Mat &result_gold) +static void blendLinearGold(const Mat &img1, const Mat &img2, + const Mat &weights1, const Mat &weights2, + Mat &result_gold) { + CV_Assert(img1.size() == img2.size() && img1.type() == img2.type()); + CV_Assert(weights1.size() == weights2.size() && weights1.size() == img1.size() && + weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1); + result_gold.create(img1.size(), img1.type()); int cn = img1.channels(); + int step1 = img1.cols * img1.channels(); for (int y = 0; y < img1.rows; ++y) { - const float *weights1_row = weights1.ptr(y); - const float *weights2_row = weights2.ptr(y); - const T *img1_row = img1.ptr(y); - const T *img2_row = img2.ptr(y); - T *result_gold_row = result_gold.ptr(y); + const float * const weights1_row = weights1.ptr(y); + const float * const weights2_row = weights2.ptr(y); + const T * const img1_row = img1.ptr(y); + const T * const img2_row = img2.ptr(y); + T * const result_gold_row = result_gold.ptr(y); - for (int x = 0; x < img1.cols * cn; ++x) + for (int x = 0; x < step1; ++x) { - int x1 = x * cn; - float w1 = weights1_row[x]; - float w2 = weights2_row[x]; - result_gold_row[x] = static_cast((img1_row[x1] * w1 - + img2_row[x1] * w2) / (w1 + w2 + 1e-5f)); + int x1 = x / cn; + float w1 = weights1_row[x1], w2 = weights2_row[x1]; + result_gold_row[x] = saturate_cast(((float)img1_row[x] * w1 + + (float)img2_row[x] * w2) / (w1 + w2 + 1e-5f)); } } } -typedef TestBaseWithParam blendLinearFixture; +typedef void (*blendFunction)(const Mat &img1, const Mat &img2, + const Mat &weights1, const Mat &weights2, + Mat &result_gold); -PERF_TEST_P(blendLinearFixture, blendLinear, OCL_TYPICAL_MAT_SIZES) +typedef Size_MatType blendLinearFixture; + +PERF_TEST_P(blendLinearFixture, blendLinear, ::testing::Combine( + OCL_TYPICAL_MAT_SIZES, testing::Values(CV_8UC1, CV_8UC3, CV_32FC1))) { - const Size srcSize = GetParam(); - const int type = CV_8UC1; + Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int srcType = get<1>(params); + const double eps = CV_MAT_DEPTH(srcType) <= CV_32S ? 1.0 : 0.2; - Mat src1(srcSize, type), src2(srcSize, CV_8UC1), dst; + Mat src1(srcSize, srcType), src2(srcSize, srcType), dst(srcSize, srcType); Mat weights1(srcSize, CV_32FC1), weights2(srcSize, CV_32FC1); - declare.in(src1, src2, WARMUP_RNG); + declare.in(src1, src2, WARMUP_RNG).out(dst); randu(weights1, 0.0f, 1.0f); randu(weights2, 0.0f, 1.0f); @@ -97,17 +110,20 @@ PERF_TEST_P(blendLinearFixture, blendLinear, OCL_TYPICAL_MAT_SIZES) ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst; ocl::oclMat oclWeights1(weights1), oclWeights2(weights2); - OCL_TEST_CYCLE() cv::ocl::blendLinear(oclSrc1, oclSrc2, oclWeights1, oclWeights2, oclDst); + OCL_TEST_CYCLE() ocl::blendLinear(oclSrc1, oclSrc2, oclWeights1, oclWeights2, oclDst); oclDst.download(dst); - SANITY_CHECK(dst); + SANITY_CHECK(dst, eps); } else if (RUN_PLAIN_IMPL) { - TEST_CYCLE() blendLinearGold(src1, src2, weights1, weights2, dst); + blendFunction funcs[] = { (blendFunction)blendLinearGold, (blendFunction)blendLinearGold }; + int funcIdx = CV_MAT_DEPTH(srcType) == CV_8UC1 ? 0 : 1; - SANITY_CHECK(dst); + TEST_CYCLE() (funcs[funcIdx])(src1, src2, weights1, weights2, dst); + + SANITY_CHECK(dst, eps); } else OCL_PERF_ELSE diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp index 1a5301f977..a2b70f033e 100644 --- a/modules/ocl/src/blend.cpp +++ b/modules/ocl/src/blend.cpp @@ -49,35 +49,51 @@ using namespace cv; using namespace cv::ocl; -void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, - oclMat &result) +void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2, + oclMat &dst) { - cv::ocl::Context *ctx = img1.clCxt; - assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt); - int channels = img1.oclchannels(); - int depth = img1.depth(); - int rows = img1.rows; - int cols = img1.cols; - int istep = img1.step1(); - int wstep = weights1.step1(); - size_t globalSize[] = {cols * channels / 4, rows, 1}; - size_t localSize[] = {256, 1, 1}; + CV_Assert(src1.depth() <= CV_32F); + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() && + weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1); + + dst.create(src1.size(), src1.type()); + + size_t globalSize[] = { dst.cols, dst.rows, 1}; + size_t localSize[] = { 16, 16, 1 }; + + int depth = dst.depth(), ocn = dst.oclchannels(); + int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize(); + int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize(); + int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize(); + int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize(); + int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); + + const char * const channelMap[] = { "", "", "2", "4", "4" }; + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s", + typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn], + depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]); vector< pair > args; - result.create(img1.size(), CV_MAKE_TYPE(depth,img1.channels())); - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&istep )); - args.push_back( make_pair( sizeof(cl_int), (void *)&wstep )); - std::string kernelName = "BlendLinear"; + args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_step )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_step )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); - openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth); - } + openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args, + -1, -1, buildOptions.c_str()); } diff --git a/modules/ocl/src/opencl/blend_linear.cl b/modules/ocl/src/opencl/blend_linear.cl index f612c03585..06a51f25cf 100644 --- a/modules/ocl/src/opencl/blend_linear.cl +++ b/modules/ocl/src/opencl/blend_linear.cl @@ -42,99 +42,37 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -__kernel void BlendLinear_C1_D0( - __global uchar4 *dst, - __global uchar4 *img1, - __global uchar4 *img2, - __global float4 *weight1, - __global float4 *weight2, - int rows, - int cols, - int istep, - int wstep - ) + +#if defined (DOUBLE_SUPPORT) +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +__kernel void blendLinear(__global const T * src1, int src1_offset, int src1_step, + __global const T * src2, int src2_offset, int src2_step, + __global const float * weight1, int weight1_offset, int weight1_step, + __global const float * weight2, int weight2_offset, int weight2_step, + __global T * dst, int dst_offset, int dst_step, + int rows, int cols) { - int idx = get_global_id(0); - int idy = get_global_id(1); - if (idx << 2 < cols && idy < rows) + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) { - int pos = mad24(idy,istep >> 2,idx); - int wpos = mad24(idy,wstep >> 2,idx); - float4 w1 = weight1[wpos], w2 = weight2[wpos]; - dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 + - convert_float4(img2[pos]) * w2) / (w1 + w2 + 1e-5f)); - } -} - -__kernel void BlendLinear_C4_D0( - __global uchar4 *dst, - __global uchar4 *img1, - __global uchar4 *img2, - __global float *weight1, - __global float *weight2, - int rows, - int cols, - int istep, - int wstep - ) -{ - int idx = get_global_id(0); - int idy = get_global_id(1); - if (idx < cols && idy < rows) - { - int pos = mad24(idy,istep >> 2,idx); - int wpos = mad24(idy,wstep, idx); - float w1 = weight1[wpos]; - float w2 = weight2[wpos]; - dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 + - convert_float4(img2[pos]) * w2) / (w1 + w2 + 1e-5f)); - } -} - - -__kernel void BlendLinear_C1_D5( - __global float4 *dst, - __global float4 *img1, - __global float4 *img2, - __global float4 *weight1, - __global float4 *weight2, - int rows, - int cols, - int istep, - int wstep - ) -{ - int idx = get_global_id(0); - int idy = get_global_id(1); - if (idx << 2 < cols && idy < rows) - { - int pos = mad24(idy,istep >> 2,idx); - int wpos = mad24(idy,wstep >> 2,idx); - float4 w1 = weight1[wpos], w2 = weight2[wpos]; - dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); - } -} - -__kernel void BlendLinear_C4_D5( - __global float4 *dst, - __global float4 *img1, - __global float4 *img2, - __global float *weight1, - __global float *weight2, - int rows, - int cols, - int istep, - int wstep - ) -{ - int idx = get_global_id(0); - int idy = get_global_id(1); - if (idx < cols && idy < rows) - { - int pos = mad24(idy,istep >> 2,idx); - int wpos = mad24(idy,wstep, idx); - float w1 = weight1[wpos]; - float w2 = weight2[wpos]; - dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); + int src1_index = mad24(y, src1_step, src1_offset + x); + int src2_index = mad24(y, src2_step, src2_offset + x); + int weight1_index = mad24(y, weight1_step, weight1_offset + x); + int weight2_index = mad24(y, weight2_step, weight2_offset + x); + int dst_index = mad24(y, dst_step, dst_offset + x); + + FT w1 = (FT)(weight1[weight1_index]), w2 = (FT)(weight2[weight2_index]); + FT den = w1 + w2 + (FT)(1e-5f); + FT num = w1 * convertToFT(src1[src1_index]) + w2 * convertToFT(src2[src2_index]); + + dst[dst_index] = convertToT(num / den); } } diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp index 63693749db..a5a61d1799 100644 --- a/modules/ocl/test/test_blend.cpp +++ b/modules/ocl/test/test_blend.cpp @@ -47,73 +47,124 @@ using namespace cv; using namespace cv::ocl; -using namespace cvtest; using namespace testing; using namespace std; -#ifdef HAVE_OPENCL + template -void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold) +static void blendLinearGold(const Mat &img1, const Mat &img2, + const Mat &weights1, const Mat &weights2, + Mat &result_gold) { + CV_Assert(img1.size() == img2.size() && img1.type() == img2.type()); + CV_Assert(weights1.size() == weights2.size() && weights1.size() == img1.size() && + weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1); + result_gold.create(img1.size(), img1.type()); int cn = img1.channels(); + int step1 = img1.cols * img1.channels(); for (int y = 0; y < img1.rows; ++y) { - const float *weights1_row = weights1.ptr(y); - const float *weights2_row = weights2.ptr(y); - const T *img1_row = img1.ptr(y); - const T *img2_row = img2.ptr(y); - T *result_gold_row = result_gold.ptr(y); + const float * const weights1_row = weights1.ptr(y); + const float * const weights2_row = weights2.ptr(y); + const T * const img1_row = img1.ptr(y); + const T * const img2_row = img2.ptr(y); + T * const result_gold_row = result_gold.ptr(y); - for (int x = 0; x < img1.cols * cn; ++x) + for (int x = 0; x < step1; ++x) { - float w1 = weights1_row[x / cn]; - float w2 = weights2_row[x / cn]; - result_gold_row[x] = static_cast((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f)); + int x1 = x / cn; + float w1 = weights1_row[x1], w2 = weights2_row[x1]; + result_gold_row[x] = saturate_cast(((float)img1_row[x] * w1 + + (float)img2_row[x] * w2) / (w1 + w2 + 1e-5f)); } } } -PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/) +PARAM_TEST_CASE(Blend, MatDepth, int, bool) { - cv::Size size; - int type; + int depth, channels; bool useRoi; + Mat src1, src2, weights1, weights2, dst; + Mat src1_roi, src2_roi, weights1_roi, weights2_roi, dst_roi; + oclMat gsrc1, gsrc2, gweights1, gweights2, gdst, gst; + oclMat gsrc1_roi, gsrc2_roi, gweights1_roi, gweights2_roi, gdst_roi; + virtual void SetUp() { - size = GET_PARAM(0); - type = GET_PARAM(1); + depth = GET_PARAM(0); + channels = GET_PARAM(1); + useRoi = GET_PARAM(2); + } + + void random_roi() + { + const int type = CV_MAKE_TYPE(depth, channels); + + const double upValue = 1200; + + Size roiSize = randomSize(1, 20); + Border src1Border = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src1, src1_roi, roiSize, src1Border, type, -upValue, upValue); + + Border src2Border = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src2, src2_roi, roiSize, src2Border, type, -upValue, upValue); + + Border weights1Border = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(weights1, weights1_roi, roiSize, weights1Border, CV_32FC1, -upValue, upValue); + + Border weights2Border = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(weights2, weights2_roi, roiSize, weights2Border, CV_32FC1, -upValue, upValue); + + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 16); + + generateOclMat(gsrc1, gsrc1_roi, src1, roiSize, src1Border); + generateOclMat(gsrc2, gsrc2_roi, src2, roiSize, src2Border); + generateOclMat(gweights1, gweights1_roi, weights1, roiSize, weights1Border); + generateOclMat(gweights2, gweights2_roi, weights2, roiSize, weights2Border); + generateOclMat(gdst, gdst_roi, dst, roiSize, dstBorder); + } + + void Near(double eps = 0.0) + { + Mat whole, roi; + gdst.download(whole); + gdst_roi.download(roi); + + EXPECT_MAT_NEAR(dst, whole, eps); + EXPECT_MAT_NEAR(dst_roi, roi, eps); } }; +typedef void (*blendLinearFunc)(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold); + OCL_TEST_P(Blend, Accuracy) { - int depth = CV_MAT_DEPTH(type); + for (int i = 0; i < LOOP_TIMES; ++i) + { + random_roi(); - cv::Mat img1 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0); - cv::Mat img2 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0); - cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); - cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); + cv::ocl::blendLinear(gsrc1_roi, gsrc2_roi, gweights1_roi, gweights2_roi, gdst_roi); - cv::ocl::oclMat gimg1(img1), gimg2(img2), gweights1(weights1), gweights2(weights2); - cv::ocl::oclMat dst; + static blendLinearFunc funcs[] = { + blendLinearGold, + blendLinearGold, + blendLinearGold, + blendLinearGold, + blendLinearGold, + blendLinearGold, + }; - cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst); - cv::Mat result; - cv::Mat result_gold; - dst.download(result); - if (depth == CV_8U) - blendLinearGold(img1, img2, weights1, weights2, result_gold); - else - blendLinearGold(img1, img2, weights1, weights2, result_gold); + blendLinearFunc func = funcs[depth]; + func(src1_roi, src2_roi, weights1_roi, weights2_roi, dst_roi); - EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.f : 1e-5f); + Near(depth <= CV_32S ? 1.0 : 0.2); + } } -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend, Combine( - DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)) - )); -#endif +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend, + Combine(testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F), + testing::Range(1, 5), Bool()));