/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Zhang Ying, zhangying913@gmail.com // Niko Li, newlife20080214@gmail.com // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other GpuMaterials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ // resize kernel // Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported. // We shall support other types later if necessary. #if defined DOUBLE_SUPPORT #pragma OPENCL EXTENSION cl_khr_fp64:enable #define F double #else #define F float #endif #define INTER_RESIZE_COEF_BITS 11 #define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS) #define CAST_BITS (INTER_RESIZE_COEF_BITS << 1) #define CAST_SCALE (1.0f/(1<= (l) ? (x):((x)+1)) __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restrict src, int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) { int gx = get_global_id(0); int dy = get_global_id(1); float4 sx, u, xf; int4 x, DX; gx = (gx<<2) - (dstoffset_in_pixel&3); DX = (int4)(gx, gx+1, gx+2, gx+3); sx = (convert_float4(DX) + 0.5f) * ifx - 0.5f; xf = floor(sx); x = convert_int4(xf); u = sx - xf; float sy = ((dy+0.5f) * ify - 0.5f); int y = floor(sy); float v = sy - y; u = x < 0 ? 0 : u; u = (x >= src_cols) ? 0 : u; x = x < 0 ? 0 : x; x = (x >= src_cols) ? src_cols-1 : x; y<0 ? y=0,v=0 : y; y>=src_rows ? y=src_rows-1,v=0 : y; int4 U, U1; int V, V1; float4 utmp1, utmp2; float vtmp; float4 scale_vec = INTER_RESIZE_COEF_SCALE; utmp1 = u * scale_vec; utmp2 = scale_vec - utmp1; U = convert_int4(rint(utmp1)); U1 = convert_int4(rint(utmp2)); vtmp = v * INTER_RESIZE_COEF_SCALE; V = rint(vtmp); V1= rint(INTER_RESIZE_COEF_SCALE - vtmp); int y_ = INC(y,src_rows); int4 x_; x_ = ((x+1 >= src_cols) != 0) ? x : x+1; int4 val1, val2, val; int4 sdata1, sdata2, sdata3, sdata4; int4 pos1 = mad24((int4)y, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel); int4 pos2 = mad24((int4)y, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel); int4 pos3 = mad24((int4)y_, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel); int4 pos4 = mad24((int4)y_, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel); sdata1.s0 = src[pos1.s0]; sdata1.s1 = src[pos1.s1]; sdata1.s2 = src[pos1.s2]; sdata1.s3 = src[pos1.s3]; sdata2.s0 = src[pos2.s0]; sdata2.s1 = src[pos2.s1]; sdata2.s2 = src[pos2.s2]; sdata2.s3 = src[pos2.s3]; sdata3.s0 = src[pos3.s0]; sdata3.s1 = src[pos3.s1]; sdata3.s2 = src[pos3.s2]; sdata3.s3 = src[pos3.s3]; sdata4.s0 = src[pos4.s0]; sdata4.s1 = src[pos4.s1]; sdata4.s2 = src[pos4.s2]; sdata4.s3 = src[pos4.s3]; val1 = mul24(U1 , sdata1) + mul24(U , sdata2); val2 = mul24(U1 , sdata3) + mul24(U , sdata4); val = mul24((int4)V1 , val1) + mul24((int4)V , val2); val = ((val + (1<<(CAST_BITS-1))) >> CAST_BITS); pos4 = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel); pos4.y++; pos4.z+=2; pos4.w+=3; uchar4 uval = convert_uchar4_sat(val); int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0); if(con) { *(__global uchar4*)(dst + pos4.x)=uval; } else { if(gx >= 0 && gx < dst_cols && dy >= 0 && dy < dst_rows) { dst[pos4.x]=uval.x; } if(gx+1 >= 0 && gx+1 < dst_cols && dy >= 0 && dy < dst_rows) { dst[pos4.y]=uval.y; } if(gx+2 >= 0 && gx+2 < dst_cols && dy >= 0 && dy < dst_rows) { dst[pos4.z]=uval.z; } if(gx+3 >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows) { dst[pos4.w]=uval.w; } } } __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src, int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) { int dx = get_global_id(0); int dy = get_global_id(1); float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f); int x = floor(sx), y = floor(sy); float u = sx - x, v = sy - y; x<0 ? x=0,u=0 : x,u; x>=src_cols ? x=src_cols-1,u=0 : x,u; y<0 ? y=0,v=0 : y,v; y>=src_rows ? y=src_rows-1,v=0 : y,v; u = u * INTER_RESIZE_COEF_SCALE; v = v * INTER_RESIZE_COEF_SCALE; int U = rint(u); int V = rint(v); int U1= rint(INTER_RESIZE_COEF_SCALE - u); int V1= rint(INTER_RESIZE_COEF_SCALE - v); int y_ = INC(y,src_rows); int x_ = INC(x,src_cols); int4 srcpos; srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel); srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel); srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel); srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel); int4 data0 = convert_int4(src[srcpos.x]); int4 data1 = convert_int4(src[srcpos.y]); int4 data2 = convert_int4(src[srcpos.z]); int4 data3 = convert_int4(src[srcpos.w]); int4 val = mul24((int4)mul24(U1, V1) , data0) + mul24((int4)mul24(U, V1) , data1) +mul24((int4)mul24(U1, V) , data2)+mul24((int4)mul24(U, V) , data3); int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel); uchar4 uval = convert_uchar4((val + (1<<(CAST_BITS-1)))>>CAST_BITS); if(dx>=0 && dx=0 && dy=src_cols ? x=src_cols-1,u=0 : x,u; y<0 ? y=0,v=0 : y,v; y>=src_rows ? y=src_rows-1,v=0 : y,v; int y_ = INC(y,src_rows); int x_ = INC(x,src_cols); float u1 = 1.f-u; float v1 = 1.f-v; int4 srcpos; srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel); srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel); srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel); srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel); float data0 = src[srcpos.x]; float data1 = src[srcpos.y]; float data2 = src[srcpos.z]; float data3 = src[srcpos.w]; float val1 = u1 * data0 + u * data1 ; float val2 = u1 * data2 + u * data3; float val = v1 * val1 + v * val2; int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel); if(dx>=0 && dx=0 && dy=src_cols ? x=src_cols-1,u=0 : x; y<0 ? y=0,v=0 : y; y>=src_rows ? y=src_rows-1,v=0 : y; int y_ = INC(y,src_rows); int x_ = INC(x,src_cols); float u1 = 1.f-u; float v1 = 1.f-v; int4 srcpos; srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel); srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel); srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel); srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel); float4 s_data1, s_data2, s_data3, s_data4; s_data1 = src[srcpos.x]; s_data2 = src[srcpos.y]; s_data3 = src[srcpos.z]; s_data4 = src[srcpos.w]; float4 val = u1 * v1 * s_data1 + u * v1 * s_data2 +u1 * v *s_data3 + u * v *s_data4; int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel); if(dx>=0 && dx=0 && dy= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0); if(con) { *(__global uchar4*)(dst + pos.x)=val; } else { if(gx >= 0 && gx < dst_cols && dy >= 0 && dy < dst_rows) { dst[pos.x]=val.x; } if(gx+1 >= 0 && gx+1 < dst_cols && dy >= 0 && dy < dst_rows) { dst[pos.y]=val.y; } if(gx+2 >= 0 && gx+2 < dst_cols && dy >= 0 && dy < dst_rows) { dst[pos.z]=val.z; } if(gx+3 >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows) { dst[pos.w]=val.w; } } } __kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src, int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify ) { int dx = get_global_id(0); int dy = get_global_id(1); F s1 = dx*ifx; F s2 = dy*ify; int sx = fmin((float)floor(s1), (float)src_cols-1); int sy = fmin((float)floor(s2), (float)src_rows-1); int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel); int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel); if(dx>=0 && dx=0 && dy=0 && dx=0 && dy=0 && dx=0 && dy