mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 03:30:34 +08:00
Merge pull request #883 from bitwangyaoyao:2.4_fixMoments
This commit is contained in:
commit
87765c0f16
@ -327,7 +327,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
|
||||
mom->m12 = dstsum[8];
|
||||
mom->m03 = dstsum[9];
|
||||
delete [] dstsum;
|
||||
|
||||
openCLSafeCall(clReleaseMemObject(sum));
|
||||
icvCompleteMomentState( mom );
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,48 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if defined (DOUBLE_SUPPORT)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
@ -609,22 +654,33 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
|
||||
int y = wgidy*TILE_SIZE; // real Y index of pixel
|
||||
int x = wgidx*TILE_SIZE; // real X index of pixel
|
||||
int kcn = (cn==2)?2:4;
|
||||
int rstep = min(src_step/4, TILE_SIZE);
|
||||
src_step /= sizeof(*src_data);
|
||||
int rstep = min(src_step, TILE_SIZE);
|
||||
tileSize_height = min(TILE_SIZE, src_rows - y);
|
||||
tileSize_width = min(TILE_SIZE, src_cols -x);
|
||||
if(tileSize_width < TILE_SIZE)
|
||||
for(int i = tileSize_width; i < rstep; i++ )
|
||||
*((__global float*)src_data+(y+lidy)*src_step/4+x+i) = 0;
|
||||
int maxIdx = mul24(src_rows, src_cols);
|
||||
int yOff = (y+lidy)*src_step;
|
||||
int index;
|
||||
if(tileSize_width < TILE_SIZE && yOff < src_rows)
|
||||
for(int i = tileSize_width; i < rstep && (yOff+x+i) < maxIdx; i++ )
|
||||
*(src_data+yOff+x+i) = 0;
|
||||
if( coi > 0 )
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_F)
|
||||
{
|
||||
#pragma unroll
|
||||
for(int j=0; j<4; j++)
|
||||
tmp_coi[j] = *(src_data+(y+lidy)*src_step/4+(x+i+j)*kcn+coi-1);
|
||||
{
|
||||
index = yOff+(x+i+j)*kcn+coi-1;
|
||||
if (index < maxIdx)
|
||||
tmp_coi[j] = *(src_data+index);
|
||||
else
|
||||
tmp_coi[j] = 0;
|
||||
}
|
||||
tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
|
||||
}
|
||||
else
|
||||
for(int i=0; i < tileSize_width; i+=VLEN_F)
|
||||
tmp[i/VLEN_F] = (float4)(*(src_data+(y+lidy)*src_step/4+x+i),*(src_data+(y+lidy)*src_step/4+x+i+1),*(src_data+(y+lidy)*src_step/4+x+i+2),*(src_data+(y+lidy)*src_step/4+x+i+3));
|
||||
for(int i=0; i < tileSize_width && (yOff+x+i) < maxIdx; i+=VLEN_F)
|
||||
tmp[i/VLEN_F] = (*(__global float4 *)(src_data+yOff+x+i));
|
||||
float4 zero = (float4)(0);
|
||||
float4 full = (float4)(255);
|
||||
if( binary )
|
||||
@ -714,35 +770,59 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
|
||||
// accumulate moments computed in each tile
|
||||
dst_step /= sizeof(F);
|
||||
|
||||
int dst_x_off = mad24(wgidy, dst_cols, wgidx);
|
||||
int dst_off = 0;
|
||||
int max_dst_index = 10 * blocky * get_global_size(1);
|
||||
|
||||
// + m00 ( = m00' )
|
||||
*(dst_m + mad24(DST_ROW_00 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[0];
|
||||
dst_off = mad24(DST_ROW_00 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
*(dst_m + mad24(DST_ROW_10 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[1] + xm;
|
||||
dst_off = mad24(DST_ROW_10 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
*(dst_m + mad24(DST_ROW_01 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[2] + ym;
|
||||
dst_off = mad24(DST_ROW_01 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
*(dst_m + mad24(DST_ROW_20 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[3] + x * (mom[1] * 2 + xm);
|
||||
dst_off = mad24(DST_ROW_20 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
*(dst_m + mad24(DST_ROW_11 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
dst_off = mad24(DST_ROW_11 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
*(dst_m + mad24(DST_ROW_02 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[5] + y * (mom[2] * 2 + ym);
|
||||
dst_off = mad24(DST_ROW_02 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
*(dst_m + mad24(DST_ROW_30 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
dst_off = mad24(DST_ROW_30 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
*(dst_m + mad24(DST_ROW_21 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
dst_off = mad24(DST_ROW_21 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
*(dst_m + mad24(DST_ROW_12 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
dst_off = mad24(DST_ROW_12 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
*(dst_m + mad24(DST_ROW_03 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
dst_off = mad24(DST_ROW_03 * blocky, dst_step, dst_x_off);
|
||||
if (dst_off < max_dst_index)
|
||||
*(dst_m + dst_off) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user