mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 22:00:25 +08:00
new scheme of sqrSum
This commit is contained in:
parent
0ef16125ae
commit
653b99c9bd
@ -68,35 +68,47 @@ inline float normAcc_SQDIFF(float num, float denum)
|
||||
|
||||
#ifdef CALC_SUM
|
||||
|
||||
__kernel void calcSum(__global const uchar * templateptr, int template_step, int template_offset,
|
||||
int template_rows, int template_cols, __global float * result)
|
||||
__kernel void calcSum(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
int cols, int total, __global float * dst)
|
||||
{
|
||||
__global const T * template = (__global const T *)(templateptr + template_offset);
|
||||
int lid = get_local_id(0), id = get_global_id(0);
|
||||
|
||||
WT res = (WT)(0);
|
||||
__local WT localmem[WGS2_ALIGNED];
|
||||
WT accumulator = (WT)(0), tmp;
|
||||
|
||||
for (int y = 0; y < template_rows; ++y)
|
||||
for ( ; id < total; id += WGS)
|
||||
{
|
||||
for (int x = 0; x < template_cols; ++x)
|
||||
{
|
||||
WT value = convertToWT(template[x]);
|
||||
#ifdef SUM_2
|
||||
#if wdepth == 4
|
||||
res = mad24(value, value, res);
|
||||
#else
|
||||
res = mad(value, value, res);
|
||||
#endif
|
||||
#elif defined SUM_1
|
||||
res += value;
|
||||
#else
|
||||
#error "No operation is specified"
|
||||
#endif
|
||||
}
|
||||
int src_index = mad24(id / cols, src_step, mad24(id % cols, (int)sizeof(T), src_offset));
|
||||
__global const T * src = (__global const T *)(srcptr + src_index);
|
||||
|
||||
template = (__global const T *)((__global const uchar *)template + template_step);
|
||||
tmp = convertToWT(src[0]);
|
||||
#if wdepth == 4
|
||||
accumulator = mad24(tmp, tmp, accumulator);
|
||||
#else
|
||||
accumulator = mad(tmp, tmp, accumulator);
|
||||
#endif
|
||||
}
|
||||
|
||||
result[0] = convertToDT(res);
|
||||
if (lid < WGS2_ALIGNED)
|
||||
localmem[lid] = accumulator;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED)
|
||||
localmem[lid - WGS2_ALIGNED] += accumulator;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1)
|
||||
{
|
||||
if (lid < lsize)
|
||||
{
|
||||
int lid2 = lsize + lid;
|
||||
localmem[lid] += localmem[lid2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (lid == 0)
|
||||
dst[0] = convertToDT(localmem[0]);
|
||||
}
|
||||
|
||||
#elif defined CCORR
|
||||
|
@ -40,7 +40,6 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#define CV_OPENCL_RUN_ASSERT
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
////////////////////////////////////////////////// matchTemplate //////////////////////////////////////////////////////////
|
||||
@ -57,28 +56,36 @@ enum
|
||||
SUM_1 = 0, SUM_2 = 1
|
||||
};
|
||||
|
||||
static bool sumTemplate(InputArray _templ, UMat & result, int sum_type)
|
||||
static bool sumTemplate(InputArray _src, UMat & result)
|
||||
{
|
||||
CV_Assert(sum_type == SUM_1 || sum_type == SUM_2);
|
||||
|
||||
int type = _templ.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
int wdepth = std::max(CV_32S, depth), wtype = CV_MAKE_TYPE(wdepth, cn);
|
||||
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
|
||||
|
||||
int wgs2_aligned = 1;
|
||||
while (wgs2_aligned < (int)wgs)
|
||||
wgs2_aligned <<= 1;
|
||||
wgs2_aligned >>= 1;
|
||||
|
||||
char cvt[40];
|
||||
const char * const sumTypeToStr[] = { "SUM_1", "SUM_2" };
|
||||
ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
|
||||
format("-D CALC_SUM -D %s -D T=%s -D WT=%s -D convertToWT=%s -D cn=%d -D wdepth=%d",
|
||||
sumTypeToStr[sum_type], ocl::typeToStr(type), ocl::typeToStr(wtype),
|
||||
ocl::convertTypeStr(depth, wdepth, cn, cvt), cn, wdepth));
|
||||
format("-D CALC_SUM -D T=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d -D wdepth=%d",
|
||||
ocl::typeToStr(type), ocl::typeToStr(wtype), cn,
|
||||
ocl::convertTypeStr(depth, wdepth, cn, cvt),
|
||||
(int)wgs, wgs2_aligned, wdepth));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
result.create(1, 1, CV_32FC1);
|
||||
UMat templ = _templ.getUMat();
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnly(templ), ocl::KernelArg::PtrWriteOnly(result));
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
|
||||
resarg = ocl::KernelArg::PtrWriteOnly(result);
|
||||
|
||||
return k.runTask(false);
|
||||
k.args(srcarg, src.cols, (int)src.total(), resarg);
|
||||
|
||||
size_t globalsize = wgs;
|
||||
return k.run(1, &globalsize, &wgs, false);
|
||||
}
|
||||
|
||||
static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
|
||||
@ -123,7 +130,7 @@ static bool matchTemplate_CCORR_NORMED(InputArray _image, InputArray _templ, Out
|
||||
integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
|
||||
|
||||
UMat templ_sqsum;
|
||||
if (!sumTemplate(templ, templ_sqsum, SUM_2))
|
||||
if (!sumTemplate(templ, templ_sqsum))
|
||||
return false;
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
|
||||
@ -177,7 +184,7 @@ static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, Ou
|
||||
integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
|
||||
|
||||
UMat templ_sqsum;
|
||||
if (!sumTemplate(_templ, templ_sqsum, SUM_2))
|
||||
if (!sumTemplate(_templ, templ_sqsum))
|
||||
return false;
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
|
||||
|
Loading…
Reference in New Issue
Block a user