2012-10-17 07:18:30 +08:00
|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// License Agreement
|
|
|
|
// For Open Source Computer Vision Library
|
|
|
|
//
|
|
|
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
|
|
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// @Authors
|
|
|
|
// Peng Xiao, pengxiao@multicorewareinc.com
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other oclMaterials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
|
|
// derived from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors as is and
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
|
|
|
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
|
|
|
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
|
|
|
|
|
|
|
// Image read mode
|
|
|
|
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
|
|
|
|
|
|
|
|
// atomic add for 32bit floating point
|
|
|
|
inline void atomic_addf(volatile __global float *source, const float operand) {
|
|
|
|
union {
|
|
|
|
unsigned int intVal;
|
|
|
|
float floatVal;
|
|
|
|
} newVal;
|
|
|
|
union {
|
|
|
|
unsigned int intVal;
|
|
|
|
float floatVal;
|
|
|
|
} prevVal;
|
|
|
|
do {
|
|
|
|
prevVal.floatVal = *source;
|
|
|
|
newVal.floatVal = prevVal.floatVal + operand;
|
|
|
|
} while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);
|
|
|
|
}
|
|
|
|
|
|
|
|
__kernel void memsetKernel(
|
|
|
|
float val,
|
|
|
|
__global float * image,
|
|
|
|
int width,
|
|
|
|
int height,
|
|
|
|
int step, // in element
|
|
|
|
int offset
|
|
|
|
)
|
|
|
|
{
|
|
|
|
if(get_global_id(0) >= width || get_global_id(1) >= height)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
image += offset;
|
|
|
|
image[get_global_id(0) + get_global_id(1) * step] = val;
|
|
|
|
}
|
|
|
|
|
|
|
|
__kernel void normalizeKernel(
|
|
|
|
__global float * buffer,
|
|
|
|
int width,
|
|
|
|
int height,
|
|
|
|
int step,
|
|
|
|
int f_offset,
|
|
|
|
int d_offset
|
|
|
|
)
|
|
|
|
{
|
|
|
|
__global float * factors = buffer + f_offset;
|
|
|
|
__global float * dst = buffer + d_offset;
|
|
|
|
|
|
|
|
int j = get_global_id(0);
|
|
|
|
int i = get_global_id(1);
|
|
|
|
|
|
|
|
if(j >= width || i >= height)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
float scale = factors[step * i + j];
|
|
|
|
float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale);
|
|
|
|
|
|
|
|
dst[step * i + j] *= invScale;
|
|
|
|
}
|
|
|
|
|
|
|
|
__kernel void forwardWarpKernel(
|
|
|
|
__global const float * src,
|
|
|
|
__global float * buffer,
|
|
|
|
__global const float * u,
|
|
|
|
__global const float * v,
|
|
|
|
const int w,
|
|
|
|
const int h,
|
|
|
|
const int flow_stride,
|
|
|
|
const int image_stride,
|
|
|
|
const int factor_offset,
|
|
|
|
const int dst_offset,
|
|
|
|
const float time_scale
|
|
|
|
)
|
|
|
|
{
|
|
|
|
int j = get_global_id(0);
|
|
|
|
int i = get_global_id(1);
|
|
|
|
|
|
|
|
if (i >= h || j >= w) return;
|
|
|
|
|
|
|
|
volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset;
|
|
|
|
volatile __global float * dst = (volatile __global float *)buffer + dst_offset;
|
|
|
|
|
|
|
|
int flow_row_offset = i * flow_stride;
|
|
|
|
int image_row_offset = i * image_stride;
|
|
|
|
|
|
|
|
//bottom left corner of a target pixel
|
|
|
|
float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f;
|
|
|
|
float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f;
|
|
|
|
// pixel containing bottom left corner
|
|
|
|
float px;
|
|
|
|
float py;
|
|
|
|
float dx = modf(cx, &px);
|
|
|
|
float dy = modf(cy, &py);
|
|
|
|
// target pixel integer coords
|
|
|
|
int tx;
|
|
|
|
int ty;
|
|
|
|
tx = (int) px;
|
|
|
|
ty = (int) py;
|
|
|
|
float value = src[image_row_offset + j];
|
|
|
|
float weight;
|
|
|
|
// fill pixel containing bottom right corner
|
|
|
|
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
|
|
|
{
|
|
|
|
weight = dx * dy;
|
|
|
|
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
|
|
|
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
|
|
|
}
|
|
|
|
|
|
|
|
// fill pixel containing bottom left corner
|
|
|
|
tx -= 1;
|
|
|
|
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
|
|
|
{
|
|
|
|
weight = (1.0f - dx) * dy;
|
|
|
|
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
|
|
|
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
|
|
|
}
|
|
|
|
|
|
|
|
// fill pixel containing upper left corner
|
|
|
|
ty -= 1;
|
|
|
|
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
|
|
|
{
|
|
|
|
weight = (1.0f - dx) * (1.0f - dy);
|
|
|
|
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
|
|
|
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
|
|
|
}
|
|
|
|
|
|
|
|
// fill pixel containing upper right corner
|
|
|
|
tx += 1;
|
|
|
|
if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))
|
|
|
|
{
|
|
|
|
weight = dx * (1.0f - dy);
|
|
|
|
atomic_addf(dst + ty * image_stride + tx, value * weight);
|
|
|
|
atomic_addf(normalization_factor + ty * image_stride + tx, weight);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// define buffer offsets
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
O0_OS = 0,
|
|
|
|
O1_OS,
|
|
|
|
U_OS,
|
|
|
|
V_OS,
|
|
|
|
UR_OS,
|
|
|
|
VR_OS
|
|
|
|
};
|
|
|
|
|
|
|
|
__kernel void blendFramesKernel(
|
|
|
|
image2d_t tex_src0,
|
|
|
|
image2d_t tex_src1,
|
|
|
|
__global float * buffer,
|
|
|
|
__global float * out,
|
|
|
|
int w,
|
|
|
|
int h,
|
|
|
|
int step,
|
|
|
|
float theta
|
|
|
|
)
|
|
|
|
{
|
|
|
|
__global float * u = buffer + h * step * U_OS;
|
|
|
|
__global float * v = buffer + h * step * V_OS;
|
|
|
|
__global float * ur = buffer + h * step * UR_OS;
|
|
|
|
__global float * vr = buffer + h * step * VR_OS;
|
|
|
|
__global float * o0 = buffer + h * step * O0_OS;
|
|
|
|
__global float * o1 = buffer + h * step * O1_OS;
|
|
|
|
|
|
|
|
int ix = get_global_id(0);
|
|
|
|
int iy = get_global_id(1);
|
|
|
|
|
|
|
|
if(ix >= w || iy >= h) return;
|
|
|
|
|
|
|
|
int pos = ix + step * iy;
|
|
|
|
|
|
|
|
float _u = u[pos];
|
|
|
|
float _v = v[pos];
|
|
|
|
|
|
|
|
float _ur = ur[pos];
|
|
|
|
float _vr = vr[pos];
|
|
|
|
|
|
|
|
float x = (float)ix + 0.5f;
|
|
|
|
float y = (float)iy + 0.5f;
|
|
|
|
bool b0 = o0[pos] > 1e-4f;
|
|
|
|
bool b1 = o1[pos] > 1e-4f;
|
|
|
|
|
|
|
|
float2 coord0 = (float2)(x - _u * theta, y - _v * theta);
|
|
|
|
float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta));
|
|
|
|
|
|
|
|
if (b0 && b1)
|
|
|
|
{
|
|
|
|
// pixel is visible on both frames
|
|
|
|
out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) +
|
|
|
|
read_imagef(tex_src1, sampler, coord1).x * theta;
|
|
|
|
}
|
|
|
|
else if (b0)
|
|
|
|
{
|
|
|
|
// visible on the first frame only
|
|
|
|
out[pos] = read_imagef(tex_src0, sampler, coord0).x;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// visible on the second frame only
|
|
|
|
out[pos] = read_imagef(tex_src1, sampler, coord1).x;
|
|
|
|
}
|
|
|
|
}
|