opencv/modules/ocl/src/opencl/interpolate_frames.cl

253 lines
7.7 KiB
Common Lisp
Raw Normal View History

2012-10-17 07:18:30 +08:00
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
// Image read mode
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
// atomic add for 32bit floating point
inline void atomic_addf(volatile __global float *source, const float operand) {
union {
unsigned int intVal;
float floatVal;
} newVal;
union {
unsigned int intVal;
float floatVal;
} prevVal;
do {
prevVal.floatVal = *source;
newVal.floatVal = prevVal.floatVal + operand;
} while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);
}
__kernel void memsetKernel(
float val,
__global float * image,
int width,
int height,
int step, // in element
int offset
)
{
if(get_global_id(0) >= width || get_global_id(1) >= height)
{
return;
}
image += offset;
image[get_global_id(0) + get_global_id(1) * step] = val;
}
__kernel void normalizeKernel(
__global float * buffer,
int width,
int height,
int step,
int f_offset,
int d_offset
)
{
__global float * factors = buffer + f_offset;
__global float * dst = buffer + d_offset;
int j = get_global_id(0);
int i = get_global_id(1);
if(j >= width || i >= height)
{
return;
}
float scale = factors[step * i + j];
float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale);
dst[step * i + j] *= invScale;
}
__kernel void forwardWarpKernel(
__global const float * src,
__global float * buffer,
__global const float * u,
__global const float * v,
const int w,
const int h,
const int flow_stride,
const int image_stride,
const int factor_offset,
const int dst_offset,
const float time_scale
)
{
int j = get_global_id(0);
int i = get_global_id(1);
if (i >= h || j >= w) return;
volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset;
volatile __global float * dst = (volatile __global float *)buffer + dst_offset;
int flow_row_offset = i * flow_stride;
int image_row_offset = i * image_stride;
//bottom left corner of a target pixel
float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f;
float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f;
// pixel containing bottom left corner
float px;
float py;
float dx = modf(cx, &px);
float dy = modf(cy, &py);
// target pixel integer coords
int tx;
int ty;
tx = (int) px;
ty = (int) py;
float value = src[image_row_offset + j];
float weight;
// fill pixel containing bottom right corner
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
{
weight = dx * dy;
atomic_addf(dst + ty * image_stride + tx, value * weight);
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
}
// fill pixel containing bottom left corner
tx -= 1;
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
{
weight = (1.0f - dx) * dy;
atomic_addf(dst + ty * image_stride + tx, value * weight);
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
}
// fill pixel containing upper left corner
ty -= 1;
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
{
weight = (1.0f - dx) * (1.0f - dy);
atomic_addf(dst + ty * image_stride + tx, value * weight);
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
}
// fill pixel containing upper right corner
tx += 1;
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
{
weight = dx * (1.0f - dy);
atomic_addf(dst + ty * image_stride + tx, value * weight);
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
}
}
// define buffer offsets
enum
{
O0_OS = 0,
O1_OS,
U_OS,
V_OS,
UR_OS,
VR_OS
};
__kernel void blendFramesKernel(
image2d_t tex_src0,
image2d_t tex_src1,
__global float * buffer,
__global float * out,
int w,
int h,
int step,
float theta
)
{
__global float * u = buffer + h * step * U_OS;
__global float * v = buffer + h * step * V_OS;
__global float * ur = buffer + h * step * UR_OS;
__global float * vr = buffer + h * step * VR_OS;
__global float * o0 = buffer + h * step * O0_OS;
__global float * o1 = buffer + h * step * O1_OS;
int ix = get_global_id(0);
int iy = get_global_id(1);
if(ix >= w || iy >= h) return;
int pos = ix + step * iy;
float _u = u[pos];
float _v = v[pos];
float _ur = ur[pos];
float _vr = vr[pos];
float x = (float)ix + 0.5f;
float y = (float)iy + 0.5f;
bool b0 = o0[pos] > 1e-4f;
bool b1 = o1[pos] > 1e-4f;
float2 coord0 = (float2)(x - _u * theta, y - _v * theta);
float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta));
if (b0 && b1)
{
// pixel is visible on both frames
out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) +
read_imagef(tex_src1, sampler, coord1).x * theta;
}
else if (b0)
{
// visible on the first frame only
out[pos] = read_imagef(tex_src0, sampler, coord0).x;
}
else
{
// visible on the second frame only
out[pos] = read_imagef(tex_src1, sampler, coord1).x;
}
}