/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Peng Xiao, pengxiao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #ifdef OP_SOBEL #if cn != 3 #define loadpix(addr) convertToIntT(*(__global const ucharT *)(addr)) #define storepix(val, addr) *(__global shortT *)(addr) = convertToShortT(val) #define shortSize (int)sizeof(shortT) #else #define loadpix(addr) convertToIntT(vload3(0, (__global const uchar *)(addr))) #define storepix(val, addr) vstore3(convertToShortT(val), 0, (__global short *)(addr)) #define shortSize (int)sizeof(short) * cn #endif // Smoothing perpendicular to the derivative direction with a triangle filter // only support 3x3 Sobel kernel // h (-1) = 1, h (0) = 2, h (1) = 1 // h'(-1) = -1, h'(0) = 0, h'(1) = 1 // thus sobel 2D operator can be calculated as: // h'(x, y) = h'(x)h(y) for x direction // // src input 8bit single channel image data // dx_buf output dx buffer // dy_buf output dy buffer __kernel void calcSobelRowPass(__global const uchar * src, int src_step, int src_offset, int rows, int cols, __global uchar * dx_buf, int dx_buf_step, int dx_buf_offset, __global uchar * dy_buf, int dy_buf_step, int dy_buf_offset) { int gidx = get_global_id(0); int gidy = get_global_id(1); int lidx = get_local_id(0); int lidy = get_local_id(1); __local intT smem[16][18]; smem[lidy][lidx + 1] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(gidx, cn, src_offset))); if (lidx == 0) { smem[lidy][0] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(max(gidx - 1, 0), cn, src_offset))); smem[lidy][17] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(min(gidx + 16, cols - 1), cn, src_offset))); } barrier(CLK_LOCAL_MEM_FENCE); if (gidy < rows && gidx < cols) { storepix(smem[lidy][lidx + 2] - smem[lidy][lidx], dx_buf + mad24(gidy, dx_buf_step, mad24(gidx, shortSize, dx_buf_offset))); storepix(mad24(2, smem[lidy][lidx + 1], smem[lidy][lidx] + smem[lidy][lidx + 2]), dy_buf + mad24(gidy, dy_buf_step, mad24(gidx, shortSize, dy_buf_offset))); } } #elif defined OP_MAG_BUF || defined OP_MAG inline intT calc(shortT x, shortT y) { #ifdef L2GRAD intT intx = convertToIntT(x), inty = convertToIntT(y); return intx * intx + inty * inty; #else return convertToIntT( (x >= (shortT)(0) ? x : -x) + (y >= (shortT)(0) ? y : -y) ); #endif } #ifdef OP_MAG // calculate the magnitude of the filter pass combining both x and y directions // This is the non-buffered version(non-3x3 sobel) // // dx_buf dx buffer, calculated from calcSobelRowPass // dy_buf dy buffer, calculated from calcSobelRowPass // dx direvitive in x direction output // dy direvitive in y direction output // mag magnitude direvitive of xy output __kernel void calcMagnitude(__global const uchar * dxptr, int dx_step, int dx_offset, __global const uchar * dyptr, int dy_step, int dy_offset, __global uchar * magptr, int mag_step, int mag_offset, int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (y < rows && x < cols) { int dx_index = mad24(dx_step, y, mad24(x, (int)sizeof(short) * cn, dx_offset)); int dy_index = mad24(dy_step, y, mad24(x, (int)sizeof(short) * cn, dy_offset)); int mag_index = mad24(mag_step, y + 1, mad24(x + 1, (int)sizeof(int), mag_offset)); __global short * dx = (__global short *)(dxptr + dx_index); __global short * dy = (__global short *)(dyptr + dy_index); __global int * mag = (__global int *)(magptr + mag_index); int cmag = calc(dx[0], dy[0]); #if cn > 1 short cx = dx[0], cy = dy[0]; int pmag; #pragma unroll for (int i = 1; i < cn; ++i) { pmag = calc(dx[i], dy[i]); if (pmag > cmag) cmag = pmag, cx = dx[i], cy = dy[i]; } dx[0] = cx, dy[0] = cy; #endif mag[0] = cmag; } } #elif defined OP_MAG_BUF #if cn != 3 #define loadpix(addr) *(__global const shortT *)(addr) #define shortSize (int)sizeof(shortT) #else #define loadpix(addr) vload3(0, (__global const short *)(addr)) #define shortSize (int)sizeof(short)*cn #endif // calculate the magnitude of the filter pass combining both x and y directions // This is the buffered version(3x3 sobel) // // dx_buf dx buffer, calculated from calcSobelRowPass // dy_buf dy buffer, calculated from calcSobelRowPass // dx direvitive in x direction output // dy direvitive in y direction output // mag magnitude direvitive of xy output __kernel void calcMagnitude_buf(__global const uchar * dx_buf, int dx_buf_step, int dx_buf_offset, __global const uchar * dy_buf, int dy_buf_step, int dy_buf_offset, __global uchar * dx, int dx_step, int dx_offset, __global uchar * dy, int dy_step, int dy_offset, __global uchar * mag, int mag_step, int mag_offset, int rows, int cols) { int gidx = get_global_id(0); int gidy = get_global_id(1); int lidx = get_local_id(0); int lidy = get_local_id(1); __local shortT sdx[18][16]; __local shortT sdy[18][16]; sdx[lidy + 1][lidx] = loadpix(dx_buf + mad24(min(gidy, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset))); sdy[lidy + 1][lidx] = loadpix(dy_buf + mad24(min(gidy, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset))); if (lidy == 0) { sdx[0][lidx] = loadpix(dx_buf + mad24(clamp(gidy - 1, 0, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset))); sdx[17][lidx] = loadpix(dx_buf + mad24(min(gidy + 16, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset))); sdy[0][lidx] = loadpix(dy_buf + mad24(clamp(gidy - 1, 0, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset))); sdy[17][lidx] = loadpix(dy_buf + mad24(min(gidy + 16, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset))); } barrier(CLK_LOCAL_MEM_FENCE); if (gidx < cols && gidy < rows) { shortT x = sdx[lidy + 1][lidx] * (shortT)(2) + sdx[lidy][lidx] + sdx[lidy + 2][lidx]; shortT y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx]; #if cn == 1 *(__global short *)(dx + mad24(gidy, dx_step, mad24(gidx, shortSize, dx_offset))) = x; *(__global short *)(dy + mad24(gidy, dy_step, mad24(gidx, shortSize, dy_offset))) = y; *(__global int *)(mag + mad24(gidy + 1, mag_step, mad24(gidx + 1, (int)sizeof(int), mag_offset))) = calc(x, y); #elif cn == 3 intT magv = calc(x, y); short cx = x.x, cy = y.x; int cmag = magv.x; if (cmag < magv.y) cx = x.y, cy = y.y, cmag = magv.y; if (cmag < magv.z) cx = x.z, cy = y.z, cmag = magv.z; *(__global short *)(dx + mad24(gidy, dx_step, mad24(gidx, shortSize, dx_offset))) = cx; *(__global short *)(dy + mad24(gidy, dy_step, mad24(gidx, shortSize, dy_offset))) = cy; *(__global int *)(mag + mad24(gidy + 1, mag_step, mad24(gidx + 1, (int)sizeof(int), mag_offset))) = cmag; #endif } } #endif #elif defined OP_MAP ////////////////////////////////////////////////////////////////////////////////////////// // 0.4142135623730950488016887242097 is tan(22.5) #define CANNY_SHIFT 15 #define TG22 (int)(0.4142135623730950488016887242097f*(1< low_thresh) { short xs = *(__global const short *)(dx + mad24(gidy, dx_step, mad24(gidx, (int)sizeof(short) * cn, dx_offset))); short ys = *(__global const short *)(dy + mad24(gidy, dy_step, mad24(gidx, (int)sizeof(short) * cn, dy_offset))); int x = abs(xs), y = abs(ys); int tg22x = x * TG22; y <<= CANNY_SHIFT; if (y < tg22x) { if (m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2]) edge_type = 1 + (int)(m > high_thresh); } else { int tg67x = tg22x + (x << (1 + CANNY_SHIFT)); if (y > tg67x) { if (m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1]) edge_type = 1 + (int)(m > high_thresh); } else { int s = (xs ^ ys) < 0 ? -1 : 1; if (m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s]) edge_type = 1 + (int)(m > high_thresh); } } } *(__global int *)(map + mad24(map_step, gidy + 1, mad24(gidx + 1, (int)sizeof(int), + map_offset))) = edge_type; } } #undef CANNY_SHIFT #undef TG22 #elif defined OP_HYST_LOCAL struct PtrStepSz { __global uchar * ptr; int step, rows, cols; }; inline int get(struct PtrStepSz data, int y, int x) { return *(__global int *)(data.ptr + mad24(data.step, y + 1, (int)sizeof(int) * (x + 1))); } inline void set(struct PtrStepSz data, int y, int x, int value) { *(__global int *)(data.ptr + mad24(data.step, y + 1, (int)sizeof(int) * (x + 1))) = value; } // perform Hysteresis for pixel whose edge type is 1 // // If candidate pixel (edge type is 1) has a neighbour pixel (in 3x3 area) with type 2, it is believed to be part of an edge and // marked as edge. Each thread will iterate for 16 times to connect local edges. // Candidate pixel being identified as edge will then be tested if there is nearby potiential edge points. If there is, counter will // be incremented by 1 and the point location is stored. These potiential candidates will be processed further in next kernel. // // map raw edge type results calculated from calcMap. // stack the potiential edge points found in this kernel call // counter the number of potiential edge points __kernel void edgesHysteresisLocal(__global uchar * map_ptr, int map_step, int map_offset, __global ushort2 * st, __global unsigned int * counter, int rows, int cols) { struct PtrStepSz map = { map_ptr + map_offset, map_step, rows + 1, cols + 1 }; __local int smem[18][18]; int2 blockIdx = (int2)(get_group_id(0), get_group_id(1)); int2 blockDim = (int2)(get_local_size(0), get_local_size(1)); int2 threadIdx = (int2)(get_local_id(0), get_local_id(1)); const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? get(map, y, x) : 0; if (threadIdx.y == 0) smem[0][threadIdx.x + 1] = x < map.cols ? get(map, y - 1, x) : 0; if (threadIdx.y == blockDim.y - 1) smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? get(map, y + 1, x) : 0; if (threadIdx.x == 0) smem[threadIdx.y + 1][0] = y < map.rows ? get(map, y, x - 1) : 0; if (threadIdx.x == blockDim.x - 1) smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols && y < map.rows ? get(map, y, x + 1) : 0; if (threadIdx.x == 0 && threadIdx.y == 0) smem[0][0] = y > 0 && x > 0 ? get(map, y - 1, x - 1) : 0; if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0) smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? get(map, y - 1, x + 1) : 0; if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1) smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? get(map, y + 1, x - 1) : 0; if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1) smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? get(map, y + 1, x + 1) : 0; barrier(CLK_LOCAL_MEM_FENCE); if (x >= cols || y >= rows) return; int n; #pragma unroll for (int k = 0; k < 16; ++k) { n = 0; if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1) { n += smem[threadIdx.y ][threadIdx.x ] == 2; n += smem[threadIdx.y ][threadIdx.x + 1] == 2; n += smem[threadIdx.y ][threadIdx.x + 2] == 2; n += smem[threadIdx.y + 1][threadIdx.x ] == 2; n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2; n += smem[threadIdx.y + 2][threadIdx.x ] == 2; n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2; n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2; } if (n > 0) smem[threadIdx.y + 1][threadIdx.x + 1] = 2; } const int e = smem[threadIdx.y + 1][threadIdx.x + 1]; set(map, y, x, e); n = 0; if (e == 2) { n += smem[threadIdx.y ][threadIdx.x ] == 1; n += smem[threadIdx.y ][threadIdx.x + 1] == 1; n += smem[threadIdx.y ][threadIdx.x + 2] == 1; n += smem[threadIdx.y + 1][threadIdx.x ] == 1; n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1; n += smem[threadIdx.y + 2][threadIdx.x ] == 1; n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1; n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1; } if (n > 0) { const int ind = atomic_inc(counter); st[ind] = (ushort2)(x + 1, y + 1); } } #elif defined OP_HYST_GLOBAL __constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; __constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; #define stack_size 512 #define map_index mad24(map_step, pos.y, pos.x * (int)sizeof(int)) __kernel void edgesHysteresisGlobal(__global uchar * map, int map_step, int map_offset, __global ushort2 * st1, __global ushort2 * st2, __global int * counter, int rows, int cols, int count) { map += map_offset; int lidx = get_local_id(0); int grp_idx = get_group_id(0); int grp_idy = get_group_id(1); __local unsigned int s_counter, s_ind; __local ushort2 s_st[stack_size]; if (lidx == 0) s_counter = 0; barrier(CLK_LOCAL_MEM_FENCE); int ind = mad24(grp_idy, (int)get_local_size(0), grp_idx); if (ind < count) { ushort2 pos = st1[ind]; if (lidx < 8) { pos.x += c_dx[lidx]; pos.y += c_dy[lidx]; if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows && *(__global int *)(map + map_index) == 1) { *(__global int *)(map + map_index) = 2; ind = atomic_inc(&s_counter); s_st[ind] = pos; } } barrier(CLK_LOCAL_MEM_FENCE); while (s_counter > 0 && s_counter <= stack_size - get_local_size(0)) { const int subTaskIdx = lidx >> 3; const int portion = min(s_counter, (uint)(get_local_size(0)>> 3)); if (subTaskIdx < portion) pos = s_st[s_counter - 1 - subTaskIdx]; barrier(CLK_LOCAL_MEM_FENCE); if (lidx == 0) s_counter -= portion; barrier(CLK_LOCAL_MEM_FENCE); if (subTaskIdx < portion) { pos.x += c_dx[lidx & 7]; pos.y += c_dy[lidx & 7]; if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows && *(__global int *)(map + map_index) == 1) { *(__global int *)(map + map_index) = 2; ind = atomic_inc(&s_counter); s_st[ind] = pos; } } barrier(CLK_LOCAL_MEM_FENCE); } if (s_counter > 0) { if (lidx == 0) { ind = atomic_add(counter, s_counter); s_ind = ind - s_counter; } barrier(CLK_LOCAL_MEM_FENCE); ind = s_ind; for (int i = lidx; i < (int)s_counter; i += get_local_size(0)) st2[ind + i] = s_st[i]; } } } #undef map_index #undef stack_size #elif defined OP_EDGES // Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0. // map edge type mappings // dst edge output __kernel void getEdges(__global const uchar * mapptr, int map_step, int map_offset, __global uchar * dst, int dst_step, int dst_offset, int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); if (y < rows && x < cols) { int map_index = mad24(map_step, y + 1, mad24(x + 1, (int)sizeof(int), map_offset)); int dst_index = mad24(dst_step, y, x + dst_offset); __global const int * map = (__global const int *)(mapptr + map_index); dst[dst_index] = (uchar)(-(map[0] >> 1)); } } #endif