diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index a60eb36302..88be72358f 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -1773,6 +1773,8 @@ namespace cv const oclMat &bu, const oclMat &bv, float pos, oclMat &newFrame, oclMat &buf); + //! computes moments of the rasterized shape or a vector of points + CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage); } } #if defined _MSC_VER && _MSC_VER >= 1200 diff --git a/modules/ocl/src/kernels/moments.cl b/modules/ocl/src/kernels/moments.cl new file mode 100644 index 0000000000..bd3001eeef --- /dev/null +++ b/modules/ocl/src/kernels/moments.cl @@ -0,0 +1,938 @@ +#if defined (DOUBLE_SUPPORT) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#else +typedef float double; +typedef float4 double4; +#define convert_double4 convert_float4 +#endif +//#pragma OPENCL EXTENSION cl_amd_printf:enable +//#if defined (DOUBLE_SUPPORT) +__kernel void icvContourMoments(int contour_total, + __global float* reader_oclmat_data, + __global double* dst_a00, + __global double* dst_a10, + __global double* dst_a01, + __global double* dst_a20, + __global double* dst_a11, + __global double* dst_a02, + __global double* dst_a30, + __global double* dst_a21, + __global double* dst_a12, + __global double* dst_a03) +{ + double xi_1, yi_1, xi_12, yi_12, xi, yi, xi2, yi2, dxy, xii_1, yii_1; + int idx = get_global_id(0); + + xi_1 = *(reader_oclmat_data + (get_global_id(0) << 1)); + yi_1 = *(reader_oclmat_data + (get_global_id(0) << 1) + 1); + xi_12 = xi_1 * xi_1; + yi_12 = yi_1 * yi_1; + + if(idx == contour_total - 1) + { + xi = *(reader_oclmat_data); + yi = *(reader_oclmat_data + 1); + } + else + { + xi = *(reader_oclmat_data + (idx + 1) * 2); + yi = *(reader_oclmat_data + (idx + 1) * 2 + 1); + } + + xi2 = xi * xi; + yi2 = yi * yi; + dxy = xi_1 * yi - xi * yi_1; + xii_1 = xi_1 + xi; + yii_1 = yi_1 + yi; + + dst_a00[idx] = dxy; + dst_a10[idx] = dxy * xii_1; + dst_a01[idx] = dxy * yii_1; + dst_a20[idx] = dxy * (xi_1 * xii_1 + xi2); + dst_a11[idx] = dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi)); + dst_a02[idx] = dxy * (yi_1 * yii_1 + yi2); + dst_a30[idx] = dxy * xii_1 * (xi_12 + xi2); + dst_a03[idx] = dxy * yii_1 * (yi_12 + yi2); + dst_a21[idx] = + dxy * (xi_12 * (3 * yi_1 + yi) + 2 * xi * xi_1 * yii_1 + + xi2 * (yi_1 + 3 * yi)); + dst_a12[idx] = + dxy * (yi_12 * (3 * xi_1 + xi) + 2 * yi * yi_1 * xii_1 + + yi2 * (xi_1 + 3 * xi)); +} +//#endif + +//#if defined (DOUBLE_SUPPORT) +__kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height, + __global double* dst_m00, + __global double* dst_m10, + __global double* dst_m01, + __global double* dst_m20, + __global double* dst_m11, + __global double* dst_m02, + __global double* dst_m30, + __global double* dst_m21, + __global double* dst_m12, + __global double* dst_m03, + int dst_cols, int dst_step, int type, int depth, int cn, int coi, int binary, int TILE_SIZE) +{ + uchar tmp_coi[16]; // get the coi data + uchar16 tmp[16]; + int VLEN_C = 16; // vector length of uchar + + int gidy = get_global_id(0); + int gidx = get_global_id(1); + int wgidy = get_group_id(0); + int wgidx = get_group_id(1); + int lidy = get_local_id(0); + int lidx = get_local_id(1); + int y = wgidy*TILE_SIZE; // vector length of uchar + int x = wgidx*TILE_SIZE; // vector length of uchar + int kcn = (cn==2)?2:4; + int rstep = min(src_step, TILE_SIZE); + tileSize_height = min(TILE_SIZE, src_rows - y); + tileSize_width = min(TILE_SIZE, src_cols - x); + + if( tileSize_width < TILE_SIZE ) + for(int i = tileSize_width; i < rstep; i++ ) + *((__global uchar*)src_data+(y+lidy)*src_step+x+i) = 0; + if( coi > 0 ) //channel of interest + for(int i = 0; i < tileSize_width; i += VLEN_C) + { + for(int j=0; j= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height) + { + m[9][lidy-bheight] = ((int)py) * sy; // m03 + m[8][lidy-bheight] = ((int)x1.s0) * sy; // m12 + m[7][lidy-bheight] = ((int)x2.s0) * lidy; // m21 + m[6][lidy-bheight] = x3.s0; // m30 + m[5][lidy-bheight] = x0.s0 * sy; // m02 + m[4][lidy-bheight] = x1.s0 * lidy; // m11 + m[3][lidy-bheight] = x2.s0; // m20 + m[2][lidy-bheight] = py; // m01 + m[1][lidy-bheight] = x1.s0; // m10 + m[0][lidy-bheight] = x0.s0; // m00 + } + else if(lidy < bheight) + { + lm[9] = ((int)py) * sy; // m03 + lm[8] = ((int)x1.s0) * sy; // m12 + lm[7] = ((int)x2.s0) * lidy; // m21 + lm[6] = x3.s0; // m30 + lm[5] = x0.s0 * sy; // m02 + lm[4] = x1.s0 * lidy; // m11 + lm[3] = x2.s0; // m20 + lm[2] = py; // m01 + lm[1] = x1.s0; // m10 + lm[0] = x0.s0; // m00 + } + barrier(CLK_LOCAL_MEM_FENCE); + for( int j = bheight; j >= 1; j = j/2 ) + { + if(lidy < j) + for( int i = 0; i < 10; i++ ) + lm[i] = lm[i] + m[i][lidy]; + barrier(CLK_LOCAL_MEM_FENCE); + if(lidy >= j/2&&lidy < j) + for( int i = 0; i < 10; i++ ) + m[i][lidy-j/2] = lm[i]; + barrier(CLK_LOCAL_MEM_FENCE); + } + if(lidy == 0&&lidx == 0) + { + for( int mt = 0; mt < 10; mt++ ) + mom[mt] = (double)lm[mt]; + if(binary) + { + double s = 1./255; + for( int mt = 0; mt < 10; mt++ ) + mom[mt] *= s; + } + double xm = x * mom[0], ym = y * mom[0]; + + // accumulate moments computed in each tile + + // + m00 ( = m00' ) + dst_m00[wgidy*dst_cols+wgidx] = mom[0]; + + // + m10 ( = m10' + x*m00' ) + dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm; + + // + m01 ( = m01' + y*m00' ) + dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym; + + // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) + dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm); + + // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) + dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1]; + + // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) + dst_m02[wgidy*dst_cols+wgidx] = mom[5] + y * (mom[2] * 2 + ym); + + // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) + dst_m30[wgidy*dst_cols+wgidx] = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); + + // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') + dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; + + // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') + dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; + + // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) + dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); + } +} +//#endif +//#if defined (DOUBLE_SUPPORT) +__kernel void dst_sum(int src_rows, int src_cols, int tile_height, int tile_width, int TILE_SIZE, __global double* sum, __global double* dst_m00, + __global double* dst_m10, + __global double* dst_m01, + __global double* dst_m20, + __global double* dst_m11, + __global double* dst_m02, + __global double* dst_m30, + __global double* dst_m21, + __global double* dst_m12, + __global double* dst_m03) +{ + int gidy = get_global_id(0); + int gidx = get_global_id(1); + int block_y = src_rows/tile_height; + int block_x = src_cols/tile_width; + int block_num; + + if(src_rows > TILE_SIZE && src_rows % TILE_SIZE != 0) + block_y ++; + if(src_cols > TILE_SIZE && src_cols % TILE_SIZE != 0) + block_x ++; + block_num = block_y * block_x; + __local double dst_sum[10][128]; + if(gidy<128-block_num) + for(int i=0; i<10; i++) + dst_sum[i][gidy+block_num]=0; + barrier(CLK_LOCAL_MEM_FENCE); + if(gidy0; lsize>>=1) + { + if(gidy TILE_SIZE && tileSize_width < TILE_SIZE) + for(int i=tileSize_width; i < rstep; i++ ) + *((__global ushort*)src_data+(y+lidy)*src_step/2+x+i) = 0; + if( coi > 0 ) + for(int i=0; i < tileSize_width; i+=VLEN_US) + { + for(int j=0; j= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height) + { + m[9][lidy-bheight] = ((long)py) * sy; // m03 + m[8][lidy-bheight] = ((long)x1.s0) * sy; // m12 + m[7][lidy-bheight] = ((long)x2.s0) * lidy; // m21 + m[6][lidy-bheight] = x3.s0; // m30 + m[5][lidy-bheight] = x0.s0 * sy; // m02 + m[4][lidy-bheight] = x1.s0 * lidy; // m11 + m[3][lidy-bheight] = x2.s0; // m20 + m[2][lidy-bheight] = py; // m01 + m[1][lidy-bheight] = x1.s0; // m10 + m[0][lidy-bheight] = x0.s0; // m00 + } + else if(lidy < bheight) + { + lm[9] = ((long)py) * sy; // m03 + lm[8] = ((long)x1.s0) * sy; // m12 + lm[7] = ((long)x2.s0) * lidy; // m21 + lm[6] = x3.s0; // m30 + lm[5] = x0.s0 * sy; // m02 + lm[4] = x1.s0 * lidy; // m11 + lm[3] = x2.s0; // m20 + lm[2] = py; // m01 + lm[1] = x1.s0; // m10 + lm[0] = x0.s0; // m00 + } + barrier(CLK_LOCAL_MEM_FENCE); + for( int j = TILE_SIZE/2; j >= 1; j = j/2 ) + { + if(lidy < j) + for( int i = 0; i < 10; i++ ) + lm[i] = lm[i] + m[i][lidy]; + barrier(CLK_LOCAL_MEM_FENCE); + if(lidy >= j/2&&lidy < j) + for( int i = 0; i < 10; i++ ) + m[i][lidy-j/2] = lm[i]; + barrier(CLK_LOCAL_MEM_FENCE); + } + if(lidy == 0&&lidx == 0) + { + for(int mt = 0; mt < 10; mt++ ) + mom[mt] = (double)lm[mt]; + + if(binary) + { + double s = 1./255; + for( int mt = 0; mt < 10; mt++ ) + mom[mt] *= s; + } + + double xm = x *mom[0], ym = y * mom[0]; + + // accumulate moments computed in each tile + + // + m00 ( = m00' ) + dst_m00[wgidy*dst_cols+wgidx] = mom[0]; + + // + m10 ( = m10' + x*m00' ) + dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm; + + // + m01 ( = m01' + y*m00' ) + dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym; + + // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) + dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm); + + // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) + dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1]; + + // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) + dst_m02[wgidy*dst_cols+wgidx] = mom[5] + y * (mom[2] * 2 + ym); + + // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) + dst_m30[wgidy*dst_cols+wgidx] = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); + + // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') + dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; + + // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') + dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; + + // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) + dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); + } +} +//#endif +//#if defined (DOUBLE_SUPPORT) +__kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height, + __global double* dst_m00, + __global double* dst_m10, + __global double* dst_m01, + __global double* dst_m20, + __global double* dst_m11, + __global double* dst_m02, + __global double* dst_m30, + __global double* dst_m21, + __global double* dst_m12, + __global double* dst_m03, + int dst_cols, int dst_step, + int type, int depth, int cn, int coi, int binary, const int TILE_SIZE) +{ + short tmp_coi[8]; // get the coi data + short8 tmp[32]; + int VLEN_S =8; // vector length of short + int gidy = get_global_id(0); + int gidx = get_global_id(1); + int wgidy = get_group_id(0); + int wgidx = get_group_id(1); + int lidy = get_local_id(0); + int lidx = get_local_id(1); + int y = wgidy*TILE_SIZE; // real Y index of pixel + int x = wgidx*TILE_SIZE; // real X index of pixel + int kcn = (cn==2)?2:4; + int rstep = min(src_step/2, TILE_SIZE); + tileSize_height = min(TILE_SIZE, src_rows - y); + tileSize_width = min(TILE_SIZE, src_cols -x); + if(tileSize_width < TILE_SIZE) + for(int i = tileSize_width; i < rstep; i++ ) + *((__global short*)src_data+(y+lidy)*src_step/2+x+i) = 0; + if( coi > 0 ) + for(int i=0; i < tileSize_width; i+=VLEN_S) + { + for(int j=0; j= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height) + { + m[9][lidy-bheight] = ((long)py) * sy; // m03 + m[8][lidy-bheight] = ((long)x1.s0) * sy; // m12 + m[7][lidy-bheight] = ((long)x2.s0) * lidy; // m21 + m[6][lidy-bheight] = x3.s0; // m30 + m[5][lidy-bheight] = x0.s0 * sy; // m02 + m[4][lidy-bheight] = x1.s0 * lidy; // m11 + m[3][lidy-bheight] = x2.s0; // m20 + m[2][lidy-bheight] = py; // m01 + m[1][lidy-bheight] = x1.s0; // m10 + m[0][lidy-bheight] = x0.s0; // m00 + } + else if(lidy < bheight) + { + lm[9] = ((long)py) * sy; // m03 + lm[8] = ((long)(x1.s0)) * sy; // m12 + lm[7] = ((long)(x2.s0)) * lidy; // m21 + lm[6] = x3.s0; // m30 + lm[5] = x0.s0 * sy; // m02 + lm[4] = x1.s0 * lidy; // m11 + lm[3] = x2.s0; // m20 + lm[2] = py; // m01 + lm[1] = x1.s0; // m10 + lm[0] = x0.s0; // m00 + } + barrier(CLK_LOCAL_MEM_FENCE); + for( int j = TILE_SIZE/2; j >=1; j = j/2 ) + { + if(lidy < j) + for( int i = 0; i < 10; i++ ) + lm[i] = lm[i] + m[i][lidy]; + barrier(CLK_LOCAL_MEM_FENCE); + if(lidy >= j/2&&lidy < j) + for( int i = 0; i < 10; i++ ) + m[i][lidy-j/2] = lm[i]; + barrier(CLK_LOCAL_MEM_FENCE); + } + if(lidy ==0 &&lidx ==0) + { + for(int mt = 0; mt < 10; mt++ ) + mom[mt] = (double)lm[mt]; + + if(binary) + { + double s = 1./255; + for( int mt = 0; mt < 10; mt++ ) + mom[mt] *= s; + } + + double xm = x * mom[0], ym = y*mom[0]; + + // accumulate moments computed in each tile + + // + m00 ( = m00' ) + dst_m00[wgidy*dst_cols+wgidx] = mom[0]; + + // + m10 ( = m10' + x*m00' ) + dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm; + + // + m01 ( = m01' + y*m00' ) + dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym; + + // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) + dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm); + + // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) + dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1]; + + // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) + dst_m02[wgidy*dst_cols+wgidx] = mom[5] + y * (mom[2] * 2 + ym); + + // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) + dst_m30[wgidy*dst_cols+wgidx] = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); + + // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') + dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; + + // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') + dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; + + // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) + dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); + } +} +//#endif +//#if defined (DOUBLE_SUPPORT) +__kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height, + __global double* dst_m00, + __global double* dst_m10, + __global double* dst_m01, + __global double* dst_m20, + __global double* dst_m11, + __global double* dst_m02, + __global double* dst_m30, + __global double* dst_m21, + __global double* dst_m12, + __global double* dst_m03, + int dst_cols, int dst_step, + int type, int depth, int cn, int coi, int binary, const int TILE_SIZE) +{ + float tmp_coi[4]; // get the coi data + float4 tmp[64] ; + int VLEN_F = 4; // vector length of float + int gidy = get_global_id(0); + int gidx = get_global_id(1); + int wgidy = get_group_id(0); + int wgidx = get_group_id(1); + int lidy = get_local_id(0); + int lidx = get_local_id(1); + int y = wgidy*TILE_SIZE; // real Y index of pixel + int x = wgidx*TILE_SIZE; // real X index of pixel + int kcn = (cn==2)?2:4; + int rstep = min(src_step/4, TILE_SIZE); + tileSize_height = min(TILE_SIZE, src_rows - y); + tileSize_width = min(TILE_SIZE, src_cols -x); + if(tileSize_width < TILE_SIZE) + for(int i = tileSize_width; i < rstep; i++ ) + *((__global float*)src_data+(y+lidy)*src_step/4+x+i) = 0; + if( coi > 0 ) + for(int i=0; i < tileSize_width; i+=VLEN_F) + { + for(int j=0; j<4; j++) + tmp_coi[j] = *(src_data+(y+lidy)*src_step/4+(x+i+j)*kcn+coi-1); + tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]); + } + else + for(int i=0; i < tileSize_width; i+=VLEN_F) + tmp[i/VLEN_F] = (float4)(*(src_data+(y+lidy)*src_step/4+x+i),*(src_data+(y+lidy)*src_step/4+x+i+1),*(src_data+(y+lidy)*src_step/4+x+i+2),*(src_data+(y+lidy)*src_step/4+x+i+3)); + float4 zero = (float4)(0); + float4 full = (float4)(255); + if( binary ) + for(int i=0; i < tileSize_width; i+=4) + tmp[i/VLEN_F] = (tmp[i/VLEN_F]!=zero)?full:zero; + double mom[10]; + __local double m[10][128]; + if(lidy == 0) + for(int i = 0; i < 10; i ++) + for(int j = 0; j < 128; j ++) + m[i][j] = 0; + barrier(CLK_LOCAL_MEM_FENCE); + double lm[10] = {0}; + double4 x0 = (double4)(0); + double4 x1 = (double4)(0); + double4 x2 = (double4)(0); + double4 x3 = (double4)(0); + for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_F ) + { + double4 v_xt = (double4)(xt, xt+1, xt+2, xt+3); + double4 p = convert_double4(tmp[xt/VLEN_F]); + double4 xp = v_xt * p, xxp = xp * v_xt; + x0 += p; + x1 += xp; + x2 += xxp; + x3 += xxp * v_xt; + } + x0.s0 += x0.s1 + x0.s2 + x0.s3; + x1.s0 += x1.s1 + x1.s2 + x1.s3; + x2.s0 += x2.s1 + x2.s2 + x2.s3; + x3.s0 += x3.s1 + x3.s2 + x3.s3; +/* + double py = lidy * x0.s0, sy = lidy*lidy; + int bheight = min(tileSize_height, TILE_SIZE/2); + if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height) + { + m[9][lidy-bheight] = ((double)py) * sy; // m03 + m[8][lidy-bheight] = ((double)x1.s0) * sy; // m12 + m[7][lidy-bheight] = ((double)x2.s0) * lidy; // m21 + m[6][lidy-bheight] = x3.s0; // m30 + m[5][lidy-bheight] = x0.s0 * sy; // m02 + m[4][lidy-bheight] = x1.s0 * lidy; // m11 + m[3][lidy-bheight] = x2.s0; // m20 + m[2][lidy-bheight] = py; // m01 + m[1][lidy-bheight] = x1.s0; // m10 + m[0][lidy-bheight] = x0.s0; // m00 + } + else if(lidy < bheight) + { + lm[9] = ((double)py) * sy; // m03 + lm[8] = ((double)x1.s0) * sy; // m12 + lm[7] = ((double)x2.s0) * lidy; // m21 + lm[6] = x3.s0; // m30 + lm[5] = x0.s0 * sy; // m02 + lm[4] = x1.s0 * lidy; // m11 + lm[3] = x2.s0; // m20 + lm[2] = py; // m01 + lm[1] = x1.s0; // m10 + lm[0] = x0.s0; // m00 + } + barrier(CLK_LOCAL_MEM_FENCE); + for( int j = TILE_SIZE/2; j >= 1; j = j/2 ) + { + if(lidy < j) + for( int i = 0; i < 10; i++ ) + lm[i] = lm[i] + m[i][lidy]; + barrier(CLK_LOCAL_MEM_FENCE); + if(lidy >= j/2&&lidy < j) + for( int i = 0; i < 10; i++ ) + m[i][lidy-j/2] = lm[i]; + barrier(CLK_LOCAL_MEM_FENCE); + } + if(lidy == 0&&lidx == 0) + { + for(int mt = 0; mt < 10; mt++ ) + mom[mt] = (double)lm[mt]; + + if(binary) + { + double s = 1./255; + for( int mt = 0; mt < 10; mt++ ) + mom[mt] *= s; + } + + double xm = x * mom[0], ym = y * mom[0]; + + // accumulate moments computed in each tile + + // + m00 ( = m00' ) + dst_m00[wgidy*dst_cols+wgidx]= mom[0]; + + // + m10 ( = m10' + x*m00' ) + dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm; + + // + m01 ( = m01' + y*m00' ) + dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym; + + // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) + dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm); + + // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) + dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1]; + + // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) + dst_m02[wgidy*dst_cols+wgidx]= mom[5] + y * (mom[2] * 2 + ym); + + // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) + dst_m30[wgidy*dst_cols+wgidx]= mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); + + // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') + dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; + + // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') + dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; + + // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) + dst_m03[wgidy*dst_cols+wgidx]= mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); + }*/ +} +//#endif +//#if defined (DOUBLE_SUPPORT) +__kernel void CvMoments_D6(__global double* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height, + __global double* dst_m00, + __global double* dst_m10, + __global double* dst_m01, + __global double* dst_m20, + __global double* dst_m11, + __global double* dst_m02, + __global double* dst_m30, + __global double* dst_m21, + __global double* dst_m12, + __global double* dst_m03, + int dst_cols, int dst_step, + int type, int depth, int cn, int coi, int binary, const int TILE_SIZE) +{ + double tmp_coi[4]; // get the coi data + double4 tmp[64]; + int VLEN_D = 4; // length of vetor + int gidy = get_global_id(0); + int gidx = get_global_id(1); + int wgidy = get_group_id(0); + int wgidx = get_group_id(1); + int lidy = get_local_id(0); + int lidx = get_local_id(1); + int y = wgidy*TILE_SIZE; // real Y index of pixel + int x = wgidx*TILE_SIZE; // real X index of pixel + int kcn = (cn==2)?2:4; + int rstep = min(src_step/8, TILE_SIZE); + tileSize_height = min(TILE_SIZE, src_rows - y); + tileSize_width = min(TILE_SIZE, src_cols - x); + + if(tileSize_width < TILE_SIZE) + for(int i = tileSize_width; i < rstep; i++ ) + *((__global double*)src_data+(y+lidy)*src_step/8+x+i) = 0; + if( coi > 0 ) + for(int i=0; i < tileSize_width; i+=VLEN_D) + { + for(int j=0; j<4; j++) + tmp_coi[j] = *(src_data+(y+lidy)*src_step/8+(x+i+j)*kcn+coi-1); + tmp[i/VLEN_D] = (double4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]); + } + else + for(int i=0; i < tileSize_width; i+=VLEN_D) + tmp[i/VLEN_D] = (double4)(*(src_data+(y+lidy)*src_step/8+x+i),*(src_data+(y+lidy)*src_step/8+x+i+1),*(src_data+(y+lidy)*src_step/8+x+i+2),*(src_data+(y+lidy)*src_step/8+x+i+3)); + double4 zero = (double4)(0); + double4 full = (double4)(255); + if( binary ) + for(int i=0; i < tileSize_width; i+=VLEN_D) + tmp[i/VLEN_D] = (tmp[i/VLEN_D]!=zero)?full:zero; + double mom[10]; + __local double m[10][128]; + if(lidy == 0) + for(int i=0; i<10; i++) + for(int j=0; j<128; j++) + m[i][j]=0; + barrier(CLK_LOCAL_MEM_FENCE); + double lm[10] = {0}; + double4 x0 = (double4)(0); + double4 x1 = (double4)(0); + double4 x2 = (double4)(0); + double4 x3 = (double4)(0); + for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_D ) + { + double4 v_xt = (double4)(xt, xt+1, xt+2, xt+3); + double4 p = tmp[xt/VLEN_D]; + double4 xp = v_xt * p, xxp = xp * v_xt; + x0 += p; + x1 += xp; + x2 += xxp; + x3 += xxp *v_xt; + } + x0.s0 += x0.s1 + x0.s2 + x0.s3; + x1.s0 += x1.s1 + x1.s2 + x1.s3; + x2.s0 += x2.s1 + x2.s2 + x2.s3; + x3.s0 += x3.s1 + x3.s2 + x3.s3; + + double py = lidy * x0.s0, sy = lidy*lidy; + int bheight = min(tileSize_height, TILE_SIZE/2); + if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height) + { + m[9][lidy-bheight] = ((double)py) * sy; // m03 + m[8][lidy-bheight] = ((double)x1.s0) * sy; // m12 + m[7][lidy-bheight] = ((double)x2.s0) * lidy; // m21 + m[6][lidy-bheight] = x3.s0; // m30 + m[5][lidy-bheight] = x0.s0 * sy; // m02 + m[4][lidy-bheight] = x1.s0 * lidy; // m11 + m[3][lidy-bheight] = x2.s0; // m20 + m[2][lidy-bheight] = py; // m01 + m[1][lidy-bheight] = x1.s0; // m10 + m[0][lidy-bheight] = x0.s0; // m00 + } + + else if(lidy < bheight) + { + lm[9] = ((double)py) * sy; // m03 + lm[8] = ((double)x1.s0) * sy; // m12 + lm[7] = ((double)x2.s0) * lidy; // m21 + lm[6] = x3.s0; // m30 + lm[5] = x0.s0 * sy; // m02 + lm[4] = x1.s0 * lidy; // m11 + lm[3] = x2.s0; // m20 + lm[2] = py; // m01 + lm[1] = x1.s0; // m10 + lm[0] = x0.s0; // m00 + } + barrier(CLK_LOCAL_MEM_FENCE); + for( int j = TILE_SIZE/2; j >= 1; j = j/2 ) + { + if(lidy < j) + for( int i = 0; i < 10; i++ ) + lm[i] = lm[i] + m[i][lidy]; + barrier(CLK_LOCAL_MEM_FENCE); + if(lidy >= j/2&&lidy < j) + for( int i = 0; i < 10; i++ ) + m[i][lidy-j/2] = lm[i]; + barrier(CLK_LOCAL_MEM_FENCE); + } + if(lidy == 0&&lidx == 0) + { + for( int mt = 0; mt < 10; mt++ ) + mom[mt] = (double)lm[mt]; + if(binary) + { + double s = 1./255; + for( int mt = 0; mt < 10; mt++ ) + mom[mt] *= s; + } + + double xm = x * mom[0], ym = y * mom[0]; + + // accumulate moments computed in each tile + + // + m00 ( = m00' ) + dst_m00[wgidy*dst_cols+wgidx] = mom[0]; + + // + m10 ( = m10' + x*m00' ) + dst_m10[wgidy*dst_cols+wgidx] = mom[1] + xm; + + // + m01 ( = m01' + y*m00' ) + dst_m01[wgidy*dst_cols+wgidx] = mom[2] + ym; + + // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) + dst_m20[wgidy*dst_cols+wgidx] = mom[3] + x * (mom[1] * 2 + xm); + + // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) + dst_m11[wgidy*dst_cols+wgidx] = mom[4] + x * (mom[2] + ym) + y * mom[1]; + + // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) + dst_m02[wgidy*dst_cols+wgidx] = mom[5] + y * (mom[2] * 2 + ym); + + // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) + dst_m30[wgidy*dst_cols+wgidx] = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); + + // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') + dst_m21[wgidy*dst_cols+wgidx] = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; + + // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') + dst_m12[wgidy*dst_cols+wgidx] = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; + + // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) + dst_m03[wgidy*dst_cols+wgidx] = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); + } +} +//#endif diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp new file mode 100644 index 0000000000..6979433ab3 --- /dev/null +++ b/modules/ocl/src/moments.cpp @@ -0,0 +1,380 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Sen Liu, sen@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" +#include +namespace cv +{ +namespace ocl +{ +extern const char *moments; + +// The function calculates center of gravity and the central second order moments +static void icvCompleteMomentState( CvMoments* moments ) +{ + double cx = 0, cy = 0; + double mu20, mu11, mu02; + + assert( moments != 0 ); + moments->inv_sqrt_m00 = 0; + + if( fabs(moments->m00) > DBL_EPSILON ) + { + double inv_m00 = 1. / moments->m00; + cx = moments->m10 * inv_m00; + cy = moments->m01 * inv_m00; + moments->inv_sqrt_m00 = std::sqrt( fabs(inv_m00) ); + } + + // mu20 = m20 - m10*cx + mu20 = moments->m20 - moments->m10 * cx; + // mu11 = m11 - m10*cy + mu11 = moments->m11 - moments->m10 * cy; + // mu02 = m02 - m01*cy + mu02 = moments->m02 - moments->m01 * cy; + + moments->mu20 = mu20; + moments->mu11 = mu11; + moments->mu02 = mu02; + + // mu30 = m30 - cx*(3*mu20 + cx*m10) + moments->mu30 = moments->m30 - cx * (3 * mu20 + cx * moments->m10); + mu11 += mu11; + // mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20 + moments->mu21 = moments->m21 - cx * (mu11 + cx * moments->m01) - cy * mu20; + // mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02 + moments->mu12 = moments->m12 - cy * (mu11 + cy * moments->m10) - cx * mu02; + // mu03 = m03 - cy*(3*mu02 + cy*m01) + moments->mu03 = moments->m03 - cy * (3 * mu02 + cy * moments->m01); +} + + +static void icvContourMoments( CvSeq* contour, CvMoments* mom ) +{ + if( contour->total ) + { + CvSeqReader reader; + int lpt = contour->total; + double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03; + int dst_type = cv::ocl::Context::getContext()->impl->double_support ? CV_64FC1 : CV_32FC1; + + cvStartReadSeq( contour, &reader, 0 ); + + cv::ocl::oclMat dst_a00(1,lpt,dst_type); + cv::ocl::oclMat dst_a10(1,lpt,dst_type); + cv::ocl::oclMat dst_a01(1,lpt,dst_type); + cv::ocl::oclMat dst_a20(1,lpt,dst_type); + cv::ocl::oclMat dst_a11(1,lpt,dst_type); + cv::ocl::oclMat dst_a02(1,lpt,dst_type); + cv::ocl::oclMat dst_a30(1,lpt,dst_type); + cv::ocl::oclMat dst_a21(1,lpt,dst_type); + cv::ocl::oclMat dst_a12(1,lpt,dst_type); + cv::ocl::oclMat dst_a03(1,lpt,dst_type); + size_t reader_size = lpt << 1; + cv::Mat reader_mat(1,reader_size,CV_32FC1); + + bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2; + + if( is_float ) + { + for(size_t i = 0; i < reader_size; ++i) + { + reader_mat.at(0, i++) = ((CvPoint2D32f*)(reader.ptr))->x; + reader_mat.at(0, i) = ((CvPoint2D32f*)(reader.ptr))->y; + CV_NEXT_SEQ_ELEM( contour->elem_size, reader ); + } + } + else + { + for(size_t i = 0; i < reader_size; ++i) + { + reader_mat.at(0, i++) = ((CvPoint*)(reader.ptr))->x; + reader_mat.at(0, i) = ((CvPoint*)(reader.ptr))->y; + CV_NEXT_SEQ_ELEM( contour->elem_size, reader ); + } + } + + cv::ocl::oclMat reader_oclmat(reader_mat); + int llength = std::min(lpt,128); + size_t localThreads[3] = { llength, 1, 1}; + size_t globalThreads[3] = { lpt, 1, 1}; + vector > args; + args.push_back( make_pair( sizeof(cl_int) , (void *)&contour->total )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a00.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a10.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a01.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a20.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a11.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a02.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a30.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a21.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a12.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a03.data )); + openCLExecuteKernel(dst_a00.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1); + + cv::Mat dst(dst_a00); + cv::Scalar s = cv::sum(dst); + a00 = s[0]; + dst = dst_a10; + s = cv::sum(dst); + a10 = s[0];//dstsum[1]; + dst = dst_a01; + s = cv::sum(dst); + a01 = s[0];//dstsum[2]; + dst = dst_a20; + s = cv::sum(dst); + a20 = s[0];//dstsum[3]; + dst = dst_a11; + s = cv::sum(dst); + a11 = s[0];//dstsum[4]; + dst = dst_a02; + s = cv::sum(dst); + a02 = s[0];//dstsum[5]; + dst = dst_a30; + s = cv::sum(dst); + a30 = s[0];//dstsum[6]; + dst = dst_a21; + s = cv::sum(dst); + a21 = s[0];//dstsum[7]; + dst = dst_a12; + s = cv::sum(dst); + a12 = s[0];//dstsum[8]; + dst = dst_a03; + s = cv::sum(dst); + a03 = s[0];//dstsum[9]; + + double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60; + if( fabs(a00) > FLT_EPSILON ) + { + if( a00 > 0 ) + { + db1_2 = 0.5; + db1_6 = 0.16666666666666666666666666666667; + db1_12 = 0.083333333333333333333333333333333; + db1_24 = 0.041666666666666666666666666666667; + db1_20 = 0.05; + db1_60 = 0.016666666666666666666666666666667; + } + else + { + db1_2 = -0.5; + db1_6 = -0.16666666666666666666666666666667; + db1_12 = -0.083333333333333333333333333333333; + db1_24 = -0.041666666666666666666666666666667; + db1_20 = -0.05; + db1_60 = -0.016666666666666666666666666666667; + } + + // spatial moments + mom->m00 = a00 * db1_2; + mom->m10 = a10 * db1_6; + mom->m01 = a01 * db1_6; + mom->m20 = a20 * db1_12; + mom->m11 = a11 * db1_24; + mom->m02 = a02 * db1_12; + mom->m30 = a30 * db1_20; + mom->m21 = a21 * db1_60; + mom->m12 = a12 * db1_60; + mom->m03 = a03 * db1_20; + + icvCompleteMomentState( mom ); + } + } +} + +static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) +{ + const int TILE_SIZE = 256; + int type, depth, cn, coi = 0; + CvMat stub, *mat = (CvMat*)array; + CvContour contourHeader; + CvSeq* contour = 0; + CvSeqBlock block; + if( CV_IS_SEQ( array )) + { + contour = (CvSeq*)array; + if( !CV_IS_SEQ_POINT_SET( contour )) + CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" ); + } + + if( !moments ) + CV_Error( CV_StsNullPtr, "" ); + + memset( mom, 0, sizeof(*mom)); + + if( !contour ) + { + + mat = cvGetMat( mat, &stub, &coi ); + type = CV_MAT_TYPE( mat->type ); + + if( type == CV_32SC2 || type == CV_32FC2 ) + { + contour = cvPointSeqFromMat( + CV_SEQ_KIND_CURVE | CV_SEQ_FLAG_CLOSED, + mat, &contourHeader, &block ); + } + } + if( contour ) + { + icvContourMoments( contour, mom ); + return; + } + + type = CV_MAT_TYPE( mat->type ); + depth = CV_MAT_DEPTH( type ); + cn = CV_MAT_CN( type ); + + cv::Size size = cvGetMatSize( mat ); + if( cn > 1 && coi == 0 ) + CV_Error( CV_StsBadArg, "Invalid image type" ); + + if( size.width <= 0 || size.height <= 0 ) + return; + + cv::Mat src0(mat); + cv::ocl::oclMat src(src0); + cv::Size tileSize; + int blockx,blocky; + if(size.width%TILE_SIZE == 0) + blockx = size.width/TILE_SIZE; + else + blockx = size.width/TILE_SIZE + 1; + if(size.height%TILE_SIZE == 0) + blocky = size.height/TILE_SIZE; + else + blocky = size.height/TILE_SIZE + 1; + cv::ocl::oclMat dst_m00(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m10(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m01(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m20(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m11(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m02(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m30(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m21(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m12(blocky, blockx, CV_64FC1); + cv::ocl::oclMat dst_m03(blocky, blockx, CV_64FC1); + cl_mem sum = openCLCreateBuffer(src.clCxt,CL_MEM_READ_WRITE,10*sizeof(double)); + int tile_width = std::min(size.width,TILE_SIZE); + int tile_height = std::min(size.height,TILE_SIZE); + size_t localThreads[3] = { tile_height, 1, 1}; + size_t globalThreads[3] = { size.height, blockx, 1}; + vector > args,args_sum; + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.width )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.height )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data )); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.step )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&type )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&depth )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&cn )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&coi )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&binary )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE )); + openCLExecuteKernel(dst_m00.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth); + + size_t localThreadss[3] = { 128, 1, 1}; + size_t globalThreadss[3] = { 128, 1, 1}; + args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); + args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); + args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_height )); + args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_width )); + args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&sum )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data )); + openCLExecuteKernel(dst_m00.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1); + double* dstsum = new double[10]; + memset(dstsum,0,10*sizeof(double)); + openCLReadBuffer(dst_m00.clCxt,sum,(void *)dstsum,10*sizeof(double)); + mom->m00 = dstsum[0]; + mom->m10 = dstsum[1]; + mom->m01 = dstsum[2]; + mom->m20 = dstsum[3]; + mom->m11 = dstsum[4]; + mom->m02 = dstsum[5]; + mom->m30 = dstsum[6]; + mom->m21 = dstsum[7]; + mom->m12 = dstsum[8]; + mom->m03 = dstsum[9]; + + icvCompleteMomentState( mom ); +} + +Moments ocl_moments( InputArray _array, bool binaryImage ) +{ + CvMoments om; + Mat arr = _array.getMat(); + CvMat c_array = arr; + ocl_cvMoments(&c_array, &om, binaryImage); + return om; +} + +} + +} + diff --git a/modules/ocl/test/test_moments.cpp b/modules/ocl/test/test_moments.cpp new file mode 100644 index 0000000000..715ad8963a --- /dev/null +++ b/modules/ocl/test/test_moments.cpp @@ -0,0 +1,72 @@ +#include "precomp.hpp" +#include +#include "opencv2/imgproc/imgproc_c.h" + +#ifdef HAVE_OPENCL + +using namespace cv; +using namespace cv::ocl; +using namespace cvtest; +using namespace testing; +using namespace std; +extern string workdir; +PARAM_TEST_CASE(MomentsTestBase, MatType, bool) +{ + int type; + cv::Mat mat1; + bool test_contours; + + virtual void SetUp() + { + type = GET_PARAM(0); + test_contours = GET_PARAM(1); + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(10*MWIDTH, 10*MHEIGHT); + mat1 = randomMat(rng, size, type, 5, 16, false); + } + + void Compare(Moments& cpu, Moments& gpu) + { + Mat gpu_dst, cpu_dst; + HuMoments(cpu, cpu_dst); + HuMoments(gpu, gpu_dst); + EXPECT_MAT_NEAR(gpu_dst,cpu_dst, .5, ""); + } + +}; +struct ocl_Moments : MomentsTestBase {}; + +TEST_P(ocl_Moments, Mat) +{ + bool binaryImage = 0; + SetUp(); + + for(int j = 0; j < LOOP_TIMES; j++) + { + if(test_contours) + { + Mat src = imread( workdir + "../cpp/pic3.png", 1 ); + Mat src_gray, canny_output; + cvtColor( src, src_gray, CV_BGR2GRAY ); + vector > contours; + vector hierarchy; + Canny( src_gray, canny_output, 100, 200, 3 ); + findContours( canny_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) ); + for( size_t i = 0; i < contours.size(); i++ ) + { + Moments m = moments( contours[i], false ); + Moments dm = ocl::ocl_moments( contours[i], false ); + Compare(m, dm); + } + } + cv::_InputArray _array(mat1); + cv::Moments CvMom = cv::moments(_array, binaryImage); + cv::Moments oclMom = cv::ocl::ocl_moments(_array, binaryImage); + + Compare(CvMom, oclMom); + + } +} +INSTANTIATE_TEST_CASE_P(Moments, ocl_Moments, Combine( + Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_64FC1), Values(true,false))); +#endif // HAVE_OPENCL