/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Peng Xiao, pengxiao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other oclMaterials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable // Image read mode __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; // atomic add for 32bit floating point inline void atomic_addf(volatile __global float *source, const float operand) { union { unsigned int intVal; float floatVal; } newVal; union { unsigned int intVal; float floatVal; } prevVal; do { prevVal.floatVal = *source; newVal.floatVal = prevVal.floatVal + operand; } while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal); } __kernel void memsetKernel( float val, __global float * image, int width, int height, int step, // in element int offset ) { if(get_global_id(0) >= width || get_global_id(1) >= height) { return; } image += offset; image[get_global_id(0) + get_global_id(1) * step] = val; } __kernel void normalizeKernel( __global float * buffer, int width, int height, int step, int f_offset, int d_offset ) { __global float * factors = buffer + f_offset; __global float * dst = buffer + d_offset; int j = get_global_id(0); int i = get_global_id(1); if(j >= width || i >= height) { return; } float scale = factors[step * i + j]; float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale); dst[step * i + j] *= invScale; } __kernel void forwardWarpKernel( __global const float * src, __global float * buffer, __global const float * u, __global const float * v, const int w, const int h, const int flow_stride, const int image_stride, const int factor_offset, const int dst_offset, const float time_scale ) { int j = get_global_id(0); int i = get_global_id(1); if (i >= h || j >= w) return; volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset; volatile __global float * dst = (volatile __global float *)buffer + dst_offset; int flow_row_offset = i * flow_stride; int image_row_offset = i * image_stride; //bottom left corner of a target pixel float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f; float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f; // pixel containing bottom left corner float px; float py; float dx = modf(cx, &px); float dy = modf(cy, &py); // target pixel integer coords int tx; int ty; tx = (int) px; ty = (int) py; float value = src[image_row_offset + j]; float weight; // fill pixel containing bottom right corner if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0))) { weight = dx * dy; atomic_addf(dst + ty * image_stride + tx, value * weight); atomic_addf(normalization_factor + ty * image_stride + tx, weight); } // fill pixel containing bottom left corner tx -= 1; if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0))) { weight = (1.0f - dx) * dy; atomic_addf(dst + ty * image_stride + tx, value * weight); atomic_addf(normalization_factor + ty * image_stride + tx, weight); } // fill pixel containing upper left corner ty -= 1; if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0))) { weight = (1.0f - dx) * (1.0f - dy); atomic_addf(dst + ty * image_stride + tx, value * weight); atomic_addf(normalization_factor + ty * image_stride + tx, weight); } // fill pixel containing upper right corner tx += 1; if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0))) { weight = dx * (1.0f - dy); atomic_addf(dst + ty * image_stride + tx, value * weight); atomic_addf(normalization_factor + ty * image_stride + tx, weight); } } // define buffer offsets enum { O0_OS = 0, O1_OS, U_OS, V_OS, UR_OS, VR_OS }; __kernel void blendFramesKernel( image2d_t tex_src0, image2d_t tex_src1, __global float * buffer, __global float * out, int w, int h, int step, float theta ) { __global float * u = buffer + h * step * U_OS; __global float * v = buffer + h * step * V_OS; __global float * ur = buffer + h * step * UR_OS; __global float * vr = buffer + h * step * VR_OS; __global float * o0 = buffer + h * step * O0_OS; __global float * o1 = buffer + h * step * O1_OS; int ix = get_global_id(0); int iy = get_global_id(1); if(ix >= w || iy >= h) return; int pos = ix + step * iy; float _u = u[pos]; float _v = v[pos]; float _ur = ur[pos]; float _vr = vr[pos]; float x = (float)ix + 0.5f; float y = (float)iy + 0.5f; bool b0 = o0[pos] > 1e-4f; bool b1 = o1[pos] > 1e-4f; float2 coord0 = (float2)(x - _u * theta, y - _v * theta); float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta)); if (b0 && b1) { // pixel is visible on both frames out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) + read_imagef(tex_src1, sampler, coord1).x * theta; } else if (b0) { // visible on the first frame only out[pos] = read_imagef(tex_src0, sampler, coord0).x; } else { // visible on the second frame only out[pos] = read_imagef(tex_src1, sampler, coord1).x; } }