mirror of
https://github.com/opencv/opencv.git
synced 2025-01-06 02:08:12 +08:00
859 lines
26 KiB
Common Lisp
859 lines
26 KiB
Common Lisp
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
//
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
// If you do not agree to this license, do not download, install,
|
|
// copy or use the software.
|
|
//
|
|
//
|
|
// License Agreement
|
|
// For Open Source Computer Vision Library
|
|
//
|
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
|
// Third party copyrights are property of their respective owners.
|
|
//
|
|
// @Authors
|
|
// Peng Xiao, pengxiao@multicorewareinc.com
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
// are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other oclMaterials provided with the distribution.
|
|
//
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// This software is provided by the copyright holders and contributors as is and
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
//
|
|
//M*/
|
|
|
|
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
|
|
|
#if defined (DOUBLE_SUPPORT)
|
|
|
|
#ifdef cl_khr_fp64
|
|
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
|
#elif defined (cl_amd_fp64)
|
|
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
|
#endif
|
|
|
|
#define TYPE_IMAGE_SQSUM double
|
|
#else
|
|
#define TYPE_IMAGE_SQSUM float
|
|
#endif
|
|
|
|
#ifndef CN4
|
|
#define CN4 1
|
|
#else
|
|
#define CN4 4
|
|
#endif
|
|
|
|
//////////////////////////////////////////////////
|
|
// utilities
|
|
#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, (gidx + img_sqsums_offset + ox) * CN4)
|
|
#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
|
|
// normAcc* are accurate normalization routines which make GPU matchTemplate
|
|
// consistent with CPU one
|
|
float normAcc(float num, float denum)
|
|
{
|
|
if(fabs(num) < denum)
|
|
{
|
|
return num / denum;
|
|
}
|
|
if(fabs(num) < denum * 1.125f)
|
|
{
|
|
return num > 0 ? 1 : -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
float normAcc_SQDIFF(float num, float denum)
|
|
{
|
|
if(fabs(num) < denum)
|
|
{
|
|
return num / denum;
|
|
}
|
|
if(fabs(num) < denum * 1.125f)
|
|
{
|
|
return num > 0 ? 1 : -1;
|
|
}
|
|
return 1;
|
|
}
|
|
//////////////////////////////////////////////////////////////////////
|
|
// normalize
|
|
|
|
__kernel
|
|
void normalizeKernel_C1_D0
|
|
(
|
|
__global const float * img_sqsums,
|
|
__global float * res,
|
|
ulong tpl_sqsum,
|
|
int res_rows,
|
|
int res_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int img_sqsums_offset,
|
|
int img_sqsums_step,
|
|
int res_offset,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
img_sqsums_step /= sizeof(*img_sqsums);
|
|
img_sqsums_offset /= sizeof(*img_sqsums);
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
float image_sqsum_ = (float)(
|
|
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
|
|
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
|
|
res[res_idx] = normAcc(res[res_idx], sqrt(image_sqsum_ * tpl_sqsum));
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Prepared_SQDIFF_C1_D0
|
|
(
|
|
__global const TYPE_IMAGE_SQSUM * img_sqsums,
|
|
__global float * res,
|
|
ulong tpl_sqsum,
|
|
int res_rows,
|
|
int res_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int img_sqsums_offset,
|
|
int img_sqsums_step,
|
|
int res_offset,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
img_sqsums_step /= sizeof(*img_sqsums);
|
|
img_sqsums_offset /= sizeof(*img_sqsums);
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
float image_sqsum_ = (float)(
|
|
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
|
|
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
|
|
res[res_idx] = image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum;
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
|
|
(
|
|
__global const float * img_sqsums,
|
|
__global float * res,
|
|
ulong tpl_sqsum,
|
|
int res_rows,
|
|
int res_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int img_sqsums_offset,
|
|
int img_sqsums_step,
|
|
int res_offset,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
img_sqsums_step /= sizeof(*img_sqsums);
|
|
img_sqsums_offset /= sizeof(*img_sqsums);
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
float image_sqsum_ = (float)(
|
|
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
|
|
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
|
|
res[res_idx] = normAcc_SQDIFF(image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum,
|
|
sqrt(image_sqsum_ * tpl_sqsum));
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////
|
|
// SQDIFF
|
|
__kernel
|
|
void matchTemplate_Naive_SQDIFF_C1_D0
|
|
(
|
|
__global const uchar * img,
|
|
__global const uchar * tpl,
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int img_offset,
|
|
int tpl_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int tpl_step,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
int i,j;
|
|
int delta;
|
|
int sum = 0;
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
for(i = 0; i < tpl_rows; i ++)
|
|
{
|
|
// get specific rows of img data
|
|
__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
|
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
|
for(j = 0; j < tpl_cols; j ++)
|
|
{
|
|
delta = img_ptr[j] - tpl_ptr[j];
|
|
sum = mad24(delta, delta, sum);
|
|
}
|
|
}
|
|
res[res_idx] = sum;
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Naive_SQDIFF_C1_D5
|
|
(
|
|
__global const float * img,
|
|
__global const float * tpl,
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int img_offset,
|
|
int tpl_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int tpl_step,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
int i,j;
|
|
float delta;
|
|
float sum = 0;
|
|
img_step /= sizeof(*img);
|
|
img_offset /= sizeof(*img);
|
|
tpl_step /= sizeof(*tpl);
|
|
tpl_offset /= sizeof(*tpl);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
for(i = 0; i < tpl_rows; i ++)
|
|
{
|
|
// get specific rows of img data
|
|
__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
|
__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
|
for(j = 0; j < tpl_cols; j ++)
|
|
{
|
|
delta = img_ptr[j] - tpl_ptr[j];
|
|
sum = mad(delta, delta, sum);
|
|
}
|
|
}
|
|
res[res_idx] = sum;
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Naive_SQDIFF_C4_D0
|
|
(
|
|
__global const uchar4 * img,
|
|
__global const uchar4 * tpl,
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int img_offset,
|
|
int tpl_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int tpl_step,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
int i,j;
|
|
int4 delta;
|
|
int4 sum = (int4)(0, 0, 0, 0);
|
|
img_step /= sizeof(*img);
|
|
img_offset /= sizeof(*img);
|
|
tpl_step /= sizeof(*tpl);
|
|
tpl_offset /= sizeof(*tpl);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
for(i = 0; i < tpl_rows; i ++)
|
|
{
|
|
// get specific rows of img data
|
|
__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
|
__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
|
for(j = 0; j < tpl_cols; j ++)
|
|
{
|
|
//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
|
|
delta.x = img_ptr[j].x - tpl_ptr[j].x;
|
|
delta.y = img_ptr[j].y - tpl_ptr[j].y;
|
|
delta.z = img_ptr[j].z - tpl_ptr[j].z;
|
|
delta.w = img_ptr[j].w - tpl_ptr[j].w;
|
|
sum = mad24(delta, delta, sum);
|
|
}
|
|
}
|
|
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Naive_SQDIFF_C4_D5
|
|
(
|
|
__global const float4 * img,
|
|
__global const float4 * tpl,
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int img_offset,
|
|
int tpl_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int tpl_step,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
int i,j;
|
|
float4 delta;
|
|
float4 sum = (float4)(0, 0, 0, 0);
|
|
img_step /= sizeof(*img);
|
|
img_offset /= sizeof(*img);
|
|
tpl_step /= sizeof(*tpl);
|
|
tpl_offset /= sizeof(*tpl);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
for(i = 0; i < tpl_rows; i ++)
|
|
{
|
|
// get specific rows of img data
|
|
__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
|
__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
|
for(j = 0; j < tpl_cols; j ++)
|
|
{
|
|
//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect
|
|
delta.x = img_ptr[j].x - tpl_ptr[j].x;
|
|
delta.y = img_ptr[j].y - tpl_ptr[j].y;
|
|
delta.z = img_ptr[j].z - tpl_ptr[j].z;
|
|
delta.w = img_ptr[j].w - tpl_ptr[j].w;
|
|
sum = mad(delta, delta, sum);
|
|
}
|
|
}
|
|
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////
|
|
// CCORR
|
|
__kernel
|
|
void matchTemplate_Naive_CCORR_C1_D0
|
|
(
|
|
__global const uchar * img,
|
|
__global const uchar * tpl,
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int img_offset,
|
|
int tpl_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int tpl_step,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
int i,j;
|
|
int sum = 0;
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
for(i = 0; i < tpl_rows; i ++)
|
|
{
|
|
// get specific rows of img data
|
|
__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
|
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
|
for(j = 0; j < tpl_cols; j ++)
|
|
{
|
|
sum = mad24(img_ptr[j], tpl_ptr[j], sum);
|
|
}
|
|
}
|
|
res[res_idx] = sum;
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Naive_CCORR_C1_D5
|
|
(
|
|
__global const float * img,
|
|
__global const float * tpl,
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int img_offset,
|
|
int tpl_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int tpl_step,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
int i,j;
|
|
float sum = 0;
|
|
img_step /= sizeof(*img);
|
|
img_offset /= sizeof(*img);
|
|
tpl_step /= sizeof(*tpl);
|
|
tpl_offset /= sizeof(*tpl);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
for(i = 0; i < tpl_rows; i ++)
|
|
{
|
|
// get specific rows of img data
|
|
__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
|
__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
|
for(j = 0; j < tpl_cols; j ++)
|
|
{
|
|
sum = mad(img_ptr[j], tpl_ptr[j], sum);
|
|
}
|
|
}
|
|
res[res_idx] = sum;
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Naive_CCORR_C4_D0
|
|
(
|
|
__global const uchar4 * img,
|
|
__global const uchar4 * tpl,
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int img_offset,
|
|
int tpl_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int tpl_step,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
int i,j;
|
|
int4 sum = (int4)(0, 0, 0, 0);
|
|
img_step /= sizeof(*img);
|
|
img_offset /= sizeof(*img);
|
|
tpl_step /= sizeof(*tpl);
|
|
tpl_offset /= sizeof(*tpl);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
for(i = 0; i < tpl_rows; i ++)
|
|
{
|
|
// get specific rows of img data
|
|
__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
|
__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
|
for(j = 0; j < tpl_cols; j ++)
|
|
{
|
|
sum = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum);
|
|
}
|
|
}
|
|
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Naive_CCORR_C4_D5
|
|
(
|
|
__global const float4 * img,
|
|
__global const float4 * tpl,
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int img_offset,
|
|
int tpl_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int tpl_step,
|
|
int res_step
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
int i,j;
|
|
float4 sum = (float4)(0, 0, 0, 0);
|
|
img_step /= sizeof(*img);
|
|
img_offset /= sizeof(*img);
|
|
tpl_step /= sizeof(*tpl);
|
|
tpl_offset /= sizeof(*tpl);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
for(i = 0; i < tpl_rows; i ++)
|
|
{
|
|
// get specific rows of img data
|
|
__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset);
|
|
__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
|
|
for(j = 0; j < tpl_cols; j ++)
|
|
{
|
|
sum = mad(convert_float4(img_ptr[j]), convert_float4(tpl_ptr[j]), sum);
|
|
}
|
|
}
|
|
res[res_idx] = sum.x + sum.y + sum.z + sum.w;
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////
|
|
// CCOFF
|
|
__kernel
|
|
void matchTemplate_Prepared_CCOFF_C1_D0
|
|
(
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int res_offset,
|
|
int res_step,
|
|
__global const uint * img_sums,
|
|
int img_sums_offset,
|
|
int img_sums_step,
|
|
float tpl_sum
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
|
|
img_sums_offset /= sizeof(*img_sums);
|
|
img_sums_step /= sizeof(*img_sums);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
float sum = (float)(
|
|
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
|
|
res[res_idx] -= sum * tpl_sum;
|
|
}
|
|
}
|
|
__kernel
|
|
void matchTemplate_Prepared_CCOFF_C4_D0
|
|
(
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int res_offset,
|
|
int res_step,
|
|
__global const uint * img_sums_c0,
|
|
__global const uint * img_sums_c1,
|
|
__global const uint * img_sums_c2,
|
|
__global const uint * img_sums_c3,
|
|
int img_sums_offset,
|
|
int img_sums_step,
|
|
float tpl_sum_c0,
|
|
float tpl_sum_c1,
|
|
float tpl_sum_c2,
|
|
float tpl_sum_c3
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
|
|
img_sums_offset /= sizeof(*img_sums_c0);
|
|
img_sums_step /= sizeof(*img_sums_c0);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
float ccorr = res[res_idx];
|
|
ccorr -= tpl_sum_c0*(float)(
|
|
(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
|
|
ccorr -= tpl_sum_c1*(float)(
|
|
(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
|
|
ccorr -= tpl_sum_c2*(float)(
|
|
(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
|
|
ccorr -= tpl_sum_c3*(float)(
|
|
(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
|
|
res[res_idx] = ccorr;
|
|
}
|
|
}
|
|
|
|
__kernel
|
|
void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
|
|
(
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int res_offset,
|
|
int res_step,
|
|
float weight,
|
|
__global const uint * img_sums,
|
|
int img_sums_offset,
|
|
int img_sums_step,
|
|
__global const float * img_sqsums,
|
|
int img_sqsums_offset,
|
|
int img_sqsums_step,
|
|
float tpl_sum,
|
|
float tpl_sqsum
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
|
|
img_sqsums_step /= sizeof(*img_sqsums);
|
|
img_sqsums_offset /= sizeof(*img_sqsums);
|
|
img_sums_offset /= sizeof(*img_sums);
|
|
img_sums_step /= sizeof(*img_sums);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
float image_sum_ = (float)(
|
|
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
|
|
|
|
float image_sqsum_ = (float)(
|
|
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) -
|
|
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)]));
|
|
res[res_idx] = normAcc(res[res_idx] - image_sum_ * tpl_sum,
|
|
sqrt(tpl_sqsum * (image_sqsum_ - weight * image_sum_ * image_sum_)));
|
|
}
|
|
}
|
|
__kernel
|
|
void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
|
|
(
|
|
__global float * res,
|
|
int img_rows,
|
|
int img_cols,
|
|
int tpl_rows,
|
|
int tpl_cols,
|
|
int res_rows,
|
|
int res_cols,
|
|
int res_offset,
|
|
int res_step,
|
|
float weight,
|
|
__global const uint * img_sums_c0,
|
|
__global const uint * img_sums_c1,
|
|
__global const uint * img_sums_c2,
|
|
__global const uint * img_sums_c3,
|
|
int img_sums_offset,
|
|
int img_sums_step,
|
|
__global const float * img_sqsums_c0,
|
|
__global const float * img_sqsums_c1,
|
|
__global const float * img_sqsums_c2,
|
|
__global const float * img_sqsums_c3,
|
|
int img_sqsums_offset,
|
|
int img_sqsums_step,
|
|
float tpl_sum_c0,
|
|
float tpl_sum_c1,
|
|
float tpl_sum_c2,
|
|
float tpl_sum_c3,
|
|
float tpl_sqsum
|
|
)
|
|
{
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
|
|
img_sqsums_step /= sizeof(*img_sqsums_c0);
|
|
img_sqsums_offset /= sizeof(*img_sqsums_c0);
|
|
img_sums_offset /= sizeof(*img_sums_c0);
|
|
img_sums_step /= sizeof(*img_sums_c0);
|
|
res_step /= sizeof(*res);
|
|
res_offset /= sizeof(*res);
|
|
|
|
int res_idx = mad24(gidy, res_step, res_offset + gidx);
|
|
|
|
if(gidx < res_cols && gidy < res_rows)
|
|
{
|
|
float image_sum_c0 = (float)(
|
|
(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)]));
|
|
float image_sum_c1 = (float)(
|
|
(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)]));
|
|
float image_sum_c2 = (float)(
|
|
(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)]));
|
|
float image_sum_c3 = (float)(
|
|
(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)])
|
|
- (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)]));
|
|
|
|
float image_sqsum_c0 = (float)(
|
|
(img_sqsums_c0[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(tpl_cols, 0)]) -
|
|
(img_sqsums_c0[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(0, 0)]));
|
|
float image_sqsum_c1 = (float)(
|
|
(img_sqsums_c1[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(tpl_cols, 0)]) -
|
|
(img_sqsums_c1[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(0, 0)]));
|
|
float image_sqsum_c2 = (float)(
|
|
(img_sqsums_c2[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(tpl_cols, 0)]) -
|
|
(img_sqsums_c2[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(0, 0)]));
|
|
float image_sqsum_c3 = (float)(
|
|
(img_sqsums_c3[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(tpl_cols, 0)]) -
|
|
(img_sqsums_c3[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(0, 0)]));
|
|
|
|
float num = res[res_idx] -
|
|
image_sum_c0 * tpl_sum_c0 -
|
|
image_sum_c1 * tpl_sum_c1 -
|
|
image_sum_c2 * tpl_sum_c2 -
|
|
image_sum_c3 * tpl_sum_c3;
|
|
float denum = sqrt( tpl_sqsum * (
|
|
image_sqsum_c0 - weight * image_sum_c0 * image_sum_c0 +
|
|
image_sqsum_c1 - weight * image_sum_c1 * image_sum_c1 +
|
|
image_sqsum_c2 - weight * image_sum_c2 * image_sum_c2 +
|
|
image_sqsum_c3 - weight * image_sum_c0 * image_sum_c3)
|
|
);
|
|
res[res_idx] = normAcc(num, denum);
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
// extractFirstChannel
|
|
__kernel
|
|
void extractFirstChannel
|
|
(
|
|
const __global float4* img,
|
|
__global float* res,
|
|
int rows,
|
|
int cols,
|
|
int img_offset,
|
|
int res_offset,
|
|
int img_step,
|
|
int res_step
|
|
)
|
|
{
|
|
img_step /= sizeof(float4);
|
|
res_step /= sizeof(float);
|
|
img_offset /= sizeof(float4);
|
|
res_offset /= sizeof(float);
|
|
img += img_offset;
|
|
res += res_offset;
|
|
int gidx = get_global_id(0);
|
|
int gidy = get_global_id(1);
|
|
if(gidx < cols && gidy < rows)
|
|
{
|
|
res[gidx + gidy * res_step] = img[gidx + gidy * img_step].x;
|
|
}
|
|
}
|