mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 14:13:15 +08:00
add PyrLK to ocl module
This commit is contained in:
parent
0e9405e591
commit
78e89890b0
@ -460,6 +460,18 @@ namespace cv
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat& m)
|
||||
{
|
||||
if (m.type() == type && m.rows >= rows && m.cols >= cols)
|
||||
m = m(Rect(0, 0, cols, rows));
|
||||
else
|
||||
m.create(rows, cols, type);
|
||||
}
|
||||
|
||||
inline void ensureSizeIsEnough(Size size, int type, oclMat& m)
|
||||
{
|
||||
ensureSizeIsEnough(size.height, size.width, type, m);
|
||||
}
|
||||
} /* end of namespace ocl */
|
||||
|
||||
} /* end of namespace cv */
|
||||
|
@ -1125,6 +1125,66 @@ namespace cv
|
||||
explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
|
||||
};
|
||||
|
||||
/////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
|
||||
class CV_EXPORTS PyrLKOpticalFlow
|
||||
{
|
||||
public:
|
||||
PyrLKOpticalFlow()
|
||||
{
|
||||
winSize = Size(21, 21);
|
||||
maxLevel = 3;
|
||||
iters = 30;
|
||||
derivLambda = 0.5;
|
||||
useInitialFlow = false;
|
||||
minEigThreshold = 1e-4f;
|
||||
getMinEigenVals = false;
|
||||
isDeviceArch11_ = false;
|
||||
}
|
||||
|
||||
void sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts,
|
||||
oclMat& status, oclMat* err = 0);
|
||||
|
||||
void dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err = 0);
|
||||
|
||||
Size winSize;
|
||||
int maxLevel;
|
||||
int iters;
|
||||
double derivLambda;
|
||||
bool useInitialFlow;
|
||||
float minEigThreshold;
|
||||
bool getMinEigenVals;
|
||||
|
||||
void releaseMemory()
|
||||
{
|
||||
dx_calcBuf_.release();
|
||||
dy_calcBuf_.release();
|
||||
|
||||
prevPyr_.clear();
|
||||
nextPyr_.clear();
|
||||
|
||||
dx_buf_.release();
|
||||
dy_buf_.release();
|
||||
}
|
||||
|
||||
private:
|
||||
void calcSharrDeriv(const oclMat& src, oclMat& dx, oclMat& dy);
|
||||
|
||||
void buildImagePyramid(const oclMat& img0, vector<oclMat>& pyr, bool withBorder);
|
||||
|
||||
oclMat dx_calcBuf_;
|
||||
oclMat dy_calcBuf_;
|
||||
|
||||
vector<oclMat> prevPyr_;
|
||||
vector<oclMat> nextPyr_;
|
||||
|
||||
oclMat dx_buf_;
|
||||
oclMat dy_buf_;
|
||||
|
||||
oclMat uPyr_[2];
|
||||
oclMat vPyr_[2];
|
||||
|
||||
bool isDeviceArch11_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
831
modules/ocl/src/kernels/pyrlk.cl
Normal file
831
modules/ocl/src/kernels/pyrlk.cl
Normal file
@ -0,0 +1,831 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
|
||||
__kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols * cn)
|
||||
{
|
||||
const uchar src_val0 = (src + (y > 0 ? y-1 : rows > 1 ? 1 : 0) * srcStep)[x];
|
||||
const uchar src_val1 = (src + y * srcStep)[x];
|
||||
const uchar src_val2 = (src + (y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0) * srcStep)[x];
|
||||
|
||||
((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
|
||||
((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_vertical_C4_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols * cn)
|
||||
{
|
||||
const uchar src_val0 = (src + (y > 0 ? y - 1 : 1) * srcStep)[x];
|
||||
const uchar src_val1 = (src + y * srcStep)[x];
|
||||
const uchar src_val2 = (src + (y < rows - 1 ? y + 1 : rows - 2) * srcStep)[x];
|
||||
|
||||
((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
|
||||
((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_horizontal_C1_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int colsn = cols * cn;
|
||||
|
||||
if (y < rows && x < colsn)
|
||||
{
|
||||
__global const short* dx_buf_row = dx_buf + y * dx_bufStep;
|
||||
__global const short* dy_buf_row = dy_buf + y * dy_bufStep;
|
||||
|
||||
const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
|
||||
const int xl = x - cn >= 0 ? x - cn : cn + x;
|
||||
|
||||
((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
|
||||
((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_horizontal_C4_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int colsn = cols * cn;
|
||||
|
||||
if (y < rows && x < colsn)
|
||||
{
|
||||
__global const short* dx_buf_row = dx_buf + y * dx_bufStep;
|
||||
__global const short* dy_buf_row = dy_buf + y * dy_bufStep;
|
||||
|
||||
const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
|
||||
const int xl = x - cn >= 0 ? x - cn : cn + x;
|
||||
|
||||
((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
|
||||
((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
|
||||
}
|
||||
}
|
||||
|
||||
#define W_BITS 14
|
||||
#define W_BITS1 14
|
||||
|
||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
int linearFilter_uchar(__global const uchar* src, int srcStep, int cn, float2 pt, int x, int y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
|
||||
int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
|
||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const uchar* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
|
||||
__global const uchar* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
|
||||
|
||||
return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1 - 5);
|
||||
}
|
||||
|
||||
int linearFilter_short(__global const short* src, int srcStep, int cn, float2 pt, int x, int y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
|
||||
int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
|
||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const short* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
|
||||
__global const short* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
|
||||
|
||||
return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1);
|
||||
}
|
||||
|
||||
float linearFilter_float(__global const float* src, int srcStep, int cn, float2 pt, float x, float y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
float iw00 = ((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
float iw01 = (a * (1.0f - b) * (1 << W_BITS));
|
||||
float iw10 = ((1.0f - a) * b * (1 << W_BITS));
|
||||
float iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const float* src_row = src + (int)(ipt.y + y) * srcStep / 4 + ipt.x * cn;
|
||||
__global const float* src_row1 = src + (int)(ipt.y + y + 1) * srcStep / 4 + ipt.x * cn;
|
||||
|
||||
return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5;
|
||||
}
|
||||
|
||||
void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
smem3[tid] = val3;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
smem2[tid] = val2 += smem2[tid + 128];
|
||||
smem3[tid] = val3 += smem3[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
smem2[tid] = val2 += smem2[tid + 64];
|
||||
smem3[tid] = val3 += smem3[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
volatile __local float* vmem2 = smem2;
|
||||
volatile __local float* vmem3 = smem3;
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem2[tid] = val2 += vmem2[tid + 32];
|
||||
vmem3[tid] = val3 += vmem3[tid + 32];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem2[tid] = val2 += vmem2[tid + 16];
|
||||
vmem3[tid] = val3 += vmem3[tid + 16];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem2[tid] = val2 += vmem2[tid + 8];
|
||||
vmem3[tid] = val3 += vmem3[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem2[tid] = val2 += vmem2[tid + 4];
|
||||
vmem3[tid] = val3 += vmem3[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem2[tid] = val2 += vmem2[tid + 2];
|
||||
vmem3[tid] = val3 += vmem3[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
vmem2[tid] = val2 += vmem2[tid + 1];
|
||||
vmem3[tid] = val3 += vmem3[tid + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
smem2[tid] = val2 += smem2[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
smem2[tid] = val2 += smem2[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
volatile __local float* vmem2 = smem2;
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem2[tid] = val2 += vmem2[tid + 32];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem2[tid] = val2 += vmem2[tid + 16];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem2[tid] = val2 += vmem2[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem2[tid] = val2 += vmem2[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem2[tid] = val2 += vmem2[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
vmem2[tid] = val2 += vmem2[tid + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void reduce1(float val1, __local float* smem1, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
}
|
||||
}
|
||||
|
||||
#define SCALE (1.0f / (1 << 20))
|
||||
|
||||
// Image read mode
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
|
||||
|
||||
__kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
|
||||
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status/*, __global float* err*/, const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr, char GET_MIN_EIGENVALS)
|
||||
{
|
||||
__local float smem1[256];
|
||||
__local float smem2[256];
|
||||
__local float smem3[256];
|
||||
|
||||
int c_halfWin_x = (c_winSize_x - 1) / 2;
|
||||
int c_halfWin_y = (c_winSize_y - 1) / 2;
|
||||
|
||||
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
|
||||
|
||||
float2 prevPt = prevPts[get_group_id(0)];
|
||||
prevPt.x *= (1.0f / (1 << level));
|
||||
prevPt.y *= (1.0f / (1 << level));
|
||||
|
||||
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
|
||||
{
|
||||
if (level == 0 && tid == 0)
|
||||
{
|
||||
status[get_group_id(0)] = 0;
|
||||
|
||||
//if (calcErr)
|
||||
// err[get_group_id(0)] = 0;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
prevPt.x -= c_halfWin_x;
|
||||
prevPt.y -= c_halfWin_y;
|
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives
|
||||
|
||||
float A11 = 0;
|
||||
float A12 = 0;
|
||||
float A22 = 0;
|
||||
|
||||
float I_patch[21][21];
|
||||
float dIdx_patch[21][21];
|
||||
float dIdy_patch[21][21];
|
||||
|
||||
for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i)
|
||||
{
|
||||
for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j)
|
||||
{
|
||||
float x = (prevPt.x + xBase + 0.5f);
|
||||
float y = (prevPt.y + yBase + 0.5f);
|
||||
|
||||
I_patch[i][j] = read_imagef(I, sampler, (float2)(x, y)).x;
|
||||
|
||||
float dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
|
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x);
|
||||
|
||||
float dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
|
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x);
|
||||
|
||||
dIdx_patch[i][j] = dIdx;
|
||||
dIdy_patch[i][j] = dIdy;
|
||||
|
||||
A11 += dIdx * dIdx;
|
||||
A12 += dIdx * dIdy;
|
||||
A22 += dIdy * dIdy;
|
||||
}
|
||||
}
|
||||
|
||||
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
A11 = smem1[0];
|
||||
A12 = smem2[0];
|
||||
A22 = smem3[0];
|
||||
|
||||
float D = A11 * A22 - A12 * A12;
|
||||
|
||||
//if (calcErr && GET_MIN_EIGENVALS && tid == 0)
|
||||
// err[get_group_id(0)] = minEig;
|
||||
|
||||
if (D < 1.192092896e-07f)
|
||||
{
|
||||
if (level == 0 && tid == 0)
|
||||
status[get_group_id(0)] = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
D = 1.f / D;
|
||||
|
||||
A11 *= D;
|
||||
A12 *= D;
|
||||
A22 *= D;
|
||||
|
||||
float2 nextPt = nextPts[get_group_id(0)];
|
||||
nextPt.x *= 2.0f;
|
||||
nextPt.y *= 2.0f;
|
||||
|
||||
nextPt.x -= c_halfWin_x;
|
||||
nextPt.y -= c_halfWin_y;
|
||||
|
||||
for (int k = 0; k < c_iters; ++k)
|
||||
{
|
||||
if (nextPt.x < -c_halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin_y || nextPt.y >= rows)
|
||||
{
|
||||
if (tid == 0 && level == 0)
|
||||
status[get_group_id(0)] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
float b1 = 0;
|
||||
float b2 = 0;
|
||||
|
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i)
|
||||
{
|
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j)
|
||||
{
|
||||
float a = (nextPt.x + x + 0.5f);
|
||||
float b = (nextPt.y + y + 0.5f);
|
||||
|
||||
float I_val = I_patch[i][j];
|
||||
float J_val = read_imagef(J, sampler, (float2)(a, b)).x;
|
||||
|
||||
float diff = (J_val - I_val) * 32.0f;
|
||||
|
||||
b1 += diff * dIdx_patch[i][j];
|
||||
b2 += diff * dIdy_patch[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
reduce2(b1, b2, smem1, smem2, tid);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
b1 = smem1[0];
|
||||
b2 = smem2[0];
|
||||
|
||||
float2 delta;
|
||||
delta.x = A12 * b2 - A22 * b1;
|
||||
delta.y = A12 * b1 - A11 * b2;
|
||||
|
||||
nextPt.x += delta.x;
|
||||
nextPt.y += delta.y;
|
||||
|
||||
if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f)
|
||||
break;
|
||||
}
|
||||
|
||||
float errval = 0.0f;
|
||||
if (calcErr)
|
||||
{
|
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i)
|
||||
{
|
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j)
|
||||
{
|
||||
float a = (nextPt.x + x + 0.5f);
|
||||
float b = (nextPt.y + y + 0.5f);
|
||||
|
||||
float I_val = I_patch[i][j];
|
||||
float J_val = read_imagef(J, sampler, (float2)(a, b)).x;
|
||||
|
||||
float diff = J_val - I_val;
|
||||
|
||||
errval += fabs((float)diff);
|
||||
}
|
||||
}
|
||||
|
||||
reduce1(errval, smem1, tid);
|
||||
}
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
nextPt.x += c_halfWin_x;
|
||||
nextPt.y += c_halfWin_y;
|
||||
|
||||
nextPts[get_group_id(0)] = nextPt;
|
||||
|
||||
//if (calcErr && !GET_MIN_EIGENVALS)
|
||||
// err[get_group_id(0)] = errval;
|
||||
}
|
||||
}
|
||||
__kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
|
||||
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status/*, __global float* err*/, const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr, char GET_MIN_EIGENVALS)
|
||||
{
|
||||
__local float smem1[256];
|
||||
__local float smem2[256];
|
||||
__local float smem3[256];
|
||||
|
||||
int c_halfWin_x = (c_winSize_x - 1) / 2;
|
||||
int c_halfWin_y = (c_winSize_y - 1) / 2;
|
||||
|
||||
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
|
||||
|
||||
float2 prevPt = prevPts[get_group_id(0)];
|
||||
prevPt.x *= (1.0f / (1 << level));
|
||||
prevPt.y *= (1.0f / (1 << level));
|
||||
|
||||
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
|
||||
{
|
||||
if (level == 0 && tid == 0)
|
||||
{
|
||||
status[get_group_id(0)] = 0;
|
||||
|
||||
//if (calcErr)
|
||||
// err[get_group_id(0)] = 0;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
prevPt.x -= c_halfWin_x;
|
||||
prevPt.y -= c_halfWin_y;
|
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives
|
||||
|
||||
float A11 = 0;
|
||||
float A12 = 0;
|
||||
float A22 = 0;
|
||||
|
||||
float4 I_patch[21][21];
|
||||
float4 dIdx_patch[21][21];
|
||||
float4 dIdy_patch[21][21];
|
||||
|
||||
for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i)
|
||||
{
|
||||
for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j)
|
||||
{
|
||||
float x = (prevPt.x + xBase + 0.5f);
|
||||
float y = (prevPt.y + yBase + 0.5f);
|
||||
|
||||
I_patch[i][j] = read_imagef(I, sampler, (float2)(x, y)).x;
|
||||
|
||||
float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
|
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)).x + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x);
|
||||
|
||||
float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)).x -
|
||||
(3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)).x + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)).x + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)).x);
|
||||
|
||||
dIdx_patch[i][j] = dIdx;
|
||||
dIdy_patch[i][j] = dIdy;
|
||||
|
||||
A11 += (dIdx * dIdx).x + (dIdx * dIdx).y + (dIdx * dIdx).z;
|
||||
A12 += (dIdx * dIdy).x + (dIdx * dIdy).y + (dIdx * dIdy).z;
|
||||
A22 += (dIdy * dIdy).x + (dIdy * dIdy).y + (dIdy * dIdy).z;
|
||||
}
|
||||
}
|
||||
|
||||
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
A11 = smem1[0];
|
||||
A12 = smem2[0];
|
||||
A22 = smem3[0];
|
||||
|
||||
float D = A11 * A22 - A12 * A12;
|
||||
|
||||
//if (calcErr && GET_MIN_EIGENVALS && tid == 0)
|
||||
// err[get_group_id(0)] = minEig;
|
||||
|
||||
if (D < 1.192092896e-07f)
|
||||
{
|
||||
if (level == 0 && tid == 0)
|
||||
status[get_group_id(0)] = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
D = 1.f / D;
|
||||
|
||||
A11 *= D;
|
||||
A12 *= D;
|
||||
A22 *= D;
|
||||
|
||||
float2 nextPt = nextPts[get_group_id(0)];
|
||||
nextPt.x *= 2.0f;
|
||||
nextPt.y *= 2.0f;
|
||||
|
||||
nextPt.x -= c_halfWin_x;
|
||||
nextPt.y -= c_halfWin_y;
|
||||
|
||||
for (int k = 0; k < c_iters; ++k)
|
||||
{
|
||||
if (nextPt.x < -c_halfWin_x || nextPt.x >= cols || nextPt.y < -c_halfWin_y || nextPt.y >= rows)
|
||||
{
|
||||
if (tid == 0 && level == 0)
|
||||
status[get_group_id(0)] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
float b1 = 0;
|
||||
float b2 = 0;
|
||||
|
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i)
|
||||
{
|
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j)
|
||||
{
|
||||
float a = (nextPt.x + x + 0.5f);
|
||||
float b = (nextPt.y + y + 0.5f);
|
||||
|
||||
float4 I_val = I_patch[i][j];
|
||||
float4 J_val = read_imagef(J, sampler, (float2)(a, b)).x;
|
||||
|
||||
float4 diff = (J_val - I_val) * 32.0f;
|
||||
|
||||
b1 += (diff * dIdx_patch[i][j]).x + (diff * dIdx_patch[i][j]).y + (diff * dIdx_patch[i][j]).z;
|
||||
b2 += (diff * dIdy_patch[i][j]).x + (diff * dIdy_patch[i][j]).y + (diff * dIdy_patch[i][j]).z;
|
||||
}
|
||||
}
|
||||
|
||||
reduce2(b1, b2, smem1, smem2, tid);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
b1 = smem1[0];
|
||||
b2 = smem2[0];
|
||||
|
||||
float2 delta;
|
||||
delta.x = A12 * b2 - A22 * b1;
|
||||
delta.y = A12 * b1 - A11 * b2;
|
||||
|
||||
nextPt.x += delta.x;
|
||||
nextPt.y += delta.y;
|
||||
|
||||
if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f)
|
||||
break;
|
||||
}
|
||||
|
||||
float errval = 0.0f;
|
||||
if (calcErr)
|
||||
{
|
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i)
|
||||
{
|
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j)
|
||||
{
|
||||
float a = (nextPt.x + x + 0.5f);
|
||||
float b = (nextPt.y + y + 0.5f);
|
||||
|
||||
float4 I_val = I_patch[i][j];
|
||||
float4 J_val = read_imagef(J, sampler, (float2)(a, b)).x;
|
||||
|
||||
float4 diff = J_val - I_val;
|
||||
|
||||
errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
|
||||
}
|
||||
}
|
||||
|
||||
reduce1(errval, smem1, tid);
|
||||
}
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
nextPt.x += c_halfWin_x;
|
||||
nextPt.y += c_halfWin_y;
|
||||
|
||||
nextPts[get_group_id(0)] = nextPt;
|
||||
|
||||
//if (calcErr && !GET_MIN_EIGENVALS)
|
||||
// err[get_group_id(0)] = errval;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void lkDense_C1_D0(image2d_t I, image2d_t J, __global float* u, int uStep, __global float* v, int vStep, __global const float* prevU, int prevUStep, __global const float* prevV, int prevVStep,
|
||||
const int rows, const int cols, /*__global float* err, int errStep, int cn,*/ int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
|
||||
{
|
||||
int c_halfWin_x = (c_winSize_x - 1) / 2;
|
||||
int c_halfWin_y = (c_winSize_y - 1) / 2;
|
||||
|
||||
const int patchWidth = get_local_size(0) + 2 * c_halfWin_x;
|
||||
const int patchHeight = get_local_size(1) + 2 * c_halfWin_y;
|
||||
|
||||
__local int smem[8192];
|
||||
|
||||
__local int* I_patch = smem;
|
||||
__local int* dIdx_patch = I_patch + patchWidth * patchHeight;
|
||||
__local int* dIdy_patch = dIdx_patch + patchWidth * patchHeight;
|
||||
|
||||
const int xBase = get_group_id(0) * get_local_size(0);
|
||||
const int yBase = get_group_id(1) * get_local_size(1);
|
||||
|
||||
sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
||||
|
||||
for (int i = get_local_id(1); i < patchHeight; i += get_local_size(1))
|
||||
{
|
||||
for (int j = get_local_id(0); j < patchWidth; j += get_local_size(0))
|
||||
{
|
||||
float x = xBase - c_halfWin_x + j + 0.5f;
|
||||
float y = yBase - c_halfWin_y + i + 0.5f;
|
||||
|
||||
I_patch[i * patchWidth + j] = read_imagei(I, sampleri, (float2)(x, y)).x;
|
||||
|
||||
// Sharr Deriv
|
||||
|
||||
dIdx_patch[i * patchWidth + j] = 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x+1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y+1)).x -
|
||||
(3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x-1, y)).x + 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x);
|
||||
|
||||
dIdy_patch[i * patchWidth + j] = 3 * read_imagei(I, sampleri, (float2)(x-1, y+1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y+1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y+1)).x -
|
||||
(3 * read_imagei(I, sampleri, (float2)(x-1, y-1)).x + 10 * read_imagei(I, sampleri, (float2)(x, y-1)).x + 3 * read_imagei(I, sampleri, (float2)(x+1, y-1)).x);
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives
|
||||
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (x >= cols || y >= rows)
|
||||
return;
|
||||
|
||||
int A11i = 0;
|
||||
int A12i = 0;
|
||||
int A22i = 0;
|
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i)
|
||||
{
|
||||
for (int j = 0; j < c_winSize_x; ++j)
|
||||
{
|
||||
int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
|
||||
int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
|
||||
|
||||
A11i += dIdx * dIdx;
|
||||
A12i += dIdx * dIdy;
|
||||
A22i += dIdy * dIdy;
|
||||
}
|
||||
}
|
||||
|
||||
float A11 = A11i;
|
||||
float A12 = A12i;
|
||||
float A22 = A22i;
|
||||
|
||||
float D = A11 * A22 - A12 * A12;
|
||||
|
||||
//if (calcErr && GET_MIN_EIGENVALS)
|
||||
// (err + y * errStep)[x] = minEig;
|
||||
|
||||
if (D < 1.192092896e-07f)
|
||||
{
|
||||
//if (calcErr)
|
||||
// err(y, x) = 3.402823466e+38f;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
D = 1.f / D;
|
||||
|
||||
A11 *= D;
|
||||
A12 *= D;
|
||||
A22 *= D;
|
||||
|
||||
float2 nextPt;
|
||||
nextPt.x = x + prevU[y/2 * prevUStep / 4 + x/2] * 2.0f;
|
||||
nextPt.y = y + prevV[y/2 * prevVStep / 4 + x/2] * 2.0f;
|
||||
|
||||
for (int k = 0; k < c_iters; ++k)
|
||||
{
|
||||
if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows)
|
||||
{
|
||||
//if (calcErr)
|
||||
// err(y, x) = 3.402823466e+38f;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int b1 = 0;
|
||||
int b2 = 0;
|
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i)
|
||||
{
|
||||
for (int j = 0; j < c_winSize_x; ++j)
|
||||
{
|
||||
int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j];
|
||||
int iJ = read_imagei(J, sampler, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f)).x;
|
||||
|
||||
int diff = (iJ - iI) * 32;
|
||||
|
||||
int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
|
||||
int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)];
|
||||
|
||||
b1 += diff * dIdx;
|
||||
b2 += diff * dIdy;
|
||||
}
|
||||
}
|
||||
|
||||
float2 delta;
|
||||
delta.x = A12 * b2 - A22 * b1;
|
||||
delta.y = A12 * b1 - A11 * b2;
|
||||
|
||||
nextPt.x += delta.x;
|
||||
nextPt.y += delta.y;
|
||||
|
||||
if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f)
|
||||
break;
|
||||
}
|
||||
|
||||
u[y * uStep / 4 + x] = nextPt.x - x;
|
||||
v[y * vStep / 4 + x] = nextPt.y - y;
|
||||
|
||||
if (calcErr)
|
||||
{
|
||||
int errval = 0;
|
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i)
|
||||
{
|
||||
for (int j = 0; j < c_winSize_x; ++j)
|
||||
{
|
||||
int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j];
|
||||
int iJ = read_imagei(J, sampler, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f)).x;
|
||||
|
||||
errval += abs(iJ - iI);
|
||||
}
|
||||
}
|
||||
|
||||
//err[y * errStep / 4 + x] = static_cast<float>(errval) / (c_winSize_x * c_winSize_y);
|
||||
}
|
||||
}
|
@ -916,6 +916,7 @@ oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
|
||||
CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");
|
||||
|
||||
hdr.cols = new_width;
|
||||
hdr.wholecols = new_width;
|
||||
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
|
||||
|
||||
return hdr;
|
||||
|
597
modules/ocl/src/pyrlk.cpp
Normal file
597
modules/ocl/src/pyrlk.cpp
Normal file
@ -0,0 +1,597 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat&, const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { }
|
||||
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { }
|
||||
|
||||
#else /* !defined (HAVE_OPENCL) */
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *pyrlk;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
struct dim3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct float2
|
||||
{
|
||||
float x, y;
|
||||
};
|
||||
|
||||
struct int2
|
||||
{
|
||||
int x, y;
|
||||
};
|
||||
|
||||
void calcSharrDeriv_run(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, oclMat& dIdx, oclMat& dIdy, int cn)
|
||||
{
|
||||
Context *clCxt = src.clCxt;
|
||||
|
||||
string kernelName = "calcSharrDeriv_vertical";
|
||||
|
||||
size_t localThreads[3] = { 32, 8, 1 };
|
||||
size_t globalThreads[3] = { src.cols, src.rows, 1};
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step ));
|
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
|
||||
|
||||
kernelName = "calcSharrDeriv_horizontal";
|
||||
|
||||
vector<pair<size_t , const void *> > args2;
|
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
|
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&src.cols ));
|
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
|
||||
args2.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data ));
|
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step ));
|
||||
args2.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data ));
|
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step ));
|
||||
args2.push_back( make_pair( sizeof(cl_mem), (void *)&dIdx.data ));
|
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&dIdx.step ));
|
||||
args2.push_back( make_pair( sizeof(cl_mem), (void *)&dIdy.data ));
|
||||
args2.push_back( make_pair( sizeof(cl_int), (void *)&dIdy.step ));
|
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args2, src.channels(), src.depth());
|
||||
}
|
||||
|
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::calcSharrDeriv(const oclMat& src, oclMat& dIdx, oclMat& dIdy)
|
||||
{
|
||||
CV_Assert(src.rows > 1 && src.cols > 1);
|
||||
CV_Assert(src.depth() == CV_8U);
|
||||
|
||||
const int cn = src.channels();
|
||||
|
||||
ensureSizeIsEnough(src.size(), CV_MAKETYPE(CV_16S, cn), dx_calcBuf_);
|
||||
ensureSizeIsEnough(src.size(), CV_MAKETYPE(CV_16S, cn), dy_calcBuf_);
|
||||
|
||||
calcSharrDeriv_run(src, dx_calcBuf_, dy_calcBuf_, dIdx, dIdy, cn);
|
||||
}
|
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::buildImagePyramid(const oclMat& img0, vector<oclMat>& pyr, bool withBorder)
|
||||
{
|
||||
pyr.resize(maxLevel + 1);
|
||||
|
||||
Size sz = img0.size();
|
||||
|
||||
Mat img0Temp;
|
||||
img0.download(img0Temp);
|
||||
|
||||
Mat pyrTemp;
|
||||
oclMat o;
|
||||
|
||||
for (int level = 0; level <= maxLevel; ++level)
|
||||
{
|
||||
oclMat temp;
|
||||
|
||||
if (withBorder)
|
||||
{
|
||||
temp.create(sz.height + winSize.height * 2, sz.width + winSize.width * 2, img0.type());
|
||||
}
|
||||
else
|
||||
{
|
||||
ensureSizeIsEnough(sz, img0.type(), pyr[level]);
|
||||
}
|
||||
|
||||
if (level == 0)
|
||||
pyr[level] = img0Temp;
|
||||
else
|
||||
pyrDown(pyr[level - 1], pyr[level]);
|
||||
|
||||
if (withBorder)
|
||||
copyMakeBorder(pyr[level], temp, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_REFLECT_101);
|
||||
|
||||
sz = Size((sz.width + 1) / 2, (sz.height + 1) / 2);
|
||||
|
||||
if (sz.width <= winSize.width || sz.height <= winSize.height)
|
||||
{
|
||||
maxLevel = level;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void calcPatchSize(cv::Size winSize, int cn, dim3& block, dim3& patch, bool isDeviceArch11)
|
||||
{
|
||||
winSize.width *= cn;
|
||||
|
||||
if (winSize.width > 32 && winSize.width > 2 * winSize.height)
|
||||
{
|
||||
block.x = isDeviceArch11 ? 16 : 32;
|
||||
block.y = 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
block.x = 16;
|
||||
block.y = isDeviceArch11 ? 8 : 16;
|
||||
}
|
||||
|
||||
patch.x = (winSize.width + block.x - 1) / block.x;
|
||||
patch.y = (winSize.height + block.y - 1) / block.y;
|
||||
|
||||
block.z = patch.z = 1;
|
||||
}
|
||||
}
|
||||
|
||||
struct MultiplyScalar
|
||||
{
|
||||
MultiplyScalar(double val_, double scale_) : val(val_), scale(scale_) {}
|
||||
double operator ()(double a) const
|
||||
{
|
||||
return (scale * a * val);
|
||||
}
|
||||
const double val;
|
||||
const double scale;
|
||||
};
|
||||
|
||||
void callF(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
|
||||
{
|
||||
Mat srcTemp;
|
||||
Mat dstTemp;
|
||||
src.download(srcTemp);
|
||||
dst.download(dstTemp);
|
||||
|
||||
int i;
|
||||
int j;
|
||||
int k;
|
||||
for(i = 0; i < srcTemp.rows; i++)
|
||||
{
|
||||
for(j = 0; j < srcTemp.cols; j++)
|
||||
{
|
||||
for(k = 0; k < srcTemp.channels(); k++)
|
||||
{
|
||||
((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dst = dstTemp;
|
||||
}
|
||||
|
||||
static inline bool isAligned(const unsigned char* ptr, size_t size)
|
||||
{
|
||||
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||
}
|
||||
|
||||
static inline bool isAligned(size_t step, size_t size)
|
||||
{
|
||||
return step % size == 0;
|
||||
}
|
||||
|
||||
void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
|
||||
{
|
||||
if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
|
||||
!isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double)))
|
||||
{
|
||||
callF(src, dst, op, mask);
|
||||
return;
|
||||
}
|
||||
|
||||
Mat srcTemp;
|
||||
Mat dstTemp;
|
||||
src.download(srcTemp);
|
||||
dst.download(dstTemp);
|
||||
|
||||
int x_shifted;
|
||||
|
||||
int i;
|
||||
int j;
|
||||
for(i = 0; i < srcTemp.rows; i++)
|
||||
{
|
||||
const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows;
|
||||
double* dstRow = (double*)dstTemp.data + i * dstTemp.rows;;
|
||||
|
||||
for(j = 0; j < srcTemp.cols; j++)
|
||||
{
|
||||
x_shifted = j * 4;
|
||||
|
||||
if(x_shifted + 4 - 1 < srcTemp.cols)
|
||||
{
|
||||
dstRow[x_shifted ] = op(srcRow[x_shifted ]);
|
||||
dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]);
|
||||
dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]);
|
||||
dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x)
|
||||
{
|
||||
((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void multiply(const oclMat& src1, double val, oclMat& dst, double scale = 1.0f);
|
||||
void multiply(const oclMat& src1, double val, oclMat& dst, double scale)
|
||||
{
|
||||
MultiplyScalar op(val, scale);
|
||||
//if(src1.channels() == 1 && dst.channels() == 1)
|
||||
//{
|
||||
// callT(src1, dst, op, 0);
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
callF(src1, dst, op, 0);
|
||||
//}
|
||||
}
|
||||
|
||||
cl_mem bindTexture(const oclMat& mat, int depth, int channels)
|
||||
{
|
||||
cl_mem texture;
|
||||
cl_image_format format;
|
||||
int err;
|
||||
if(depth == 0)
|
||||
{
|
||||
format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
}
|
||||
else if(depth == 5)
|
||||
{
|
||||
format.image_channel_data_type = CL_FLOAT;
|
||||
}
|
||||
if(channels == 1)
|
||||
{
|
||||
format.image_channel_order = CL_R;
|
||||
}
|
||||
else if(channels == 3)
|
||||
{
|
||||
format.image_channel_order = CL_RGB;
|
||||
}
|
||||
else if(channels == 4)
|
||||
{
|
||||
format.image_channel_order = CL_RGBA;
|
||||
}
|
||||
#if CL_VERSION_1_2
|
||||
cl_image_desc desc;
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = mat.cols;
|
||||
desc.image_height = mat.rows;
|
||||
desc.image_depth = NULL;
|
||||
desc.image_array_size = 1;
|
||||
desc.image_row_pitch = 0;
|
||||
desc.image_slice_pitch= 0;
|
||||
desc.buffer = NULL;
|
||||
desc.num_mip_levels = 0;
|
||||
desc.num_samples = 0;
|
||||
texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
||||
#else
|
||||
texture = clCreateImage2D(
|
||||
mat.clCxt->impl->clContext,
|
||||
CL_MEM_READ_WRITE,
|
||||
&format,
|
||||
mat.cols,
|
||||
mat.rows,
|
||||
0,
|
||||
NULL,
|
||||
&err);
|
||||
#endif
|
||||
size_t origin[] = { 0, 0, 0 };
|
||||
size_t region[] = { mat.cols, mat.rows, 1 };
|
||||
clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
|
||||
openCLSafeCall(err);
|
||||
|
||||
return texture;
|
||||
}
|
||||
|
||||
void lkSparse_run(oclMat& I, oclMat& J,
|
||||
const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err, bool GET_MIN_EIGENVALS, int ptcount,
|
||||
int level, dim3 block, dim3 patch, Size winSize, int iters)
|
||||
{
|
||||
Context *clCxt = I.clCxt;
|
||||
|
||||
string kernelName = "lkSparse";
|
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
size_t globalThreads[3] = { 16 * ptcount, 16, 1};
|
||||
|
||||
int cn = I.channels();
|
||||
|
||||
bool calcErr;
|
||||
if (err)
|
||||
{
|
||||
calcErr = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
calcErr = false;
|
||||
}
|
||||
calcErr = true;
|
||||
|
||||
cl_mem ITex = bindTexture(I, I.depth(), cn);
|
||||
cl_mem JTex = bindTexture(J, J.depth(), cn);
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex ));
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&prevPts.step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&nextPts.step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&status.data ));
|
||||
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(err->data) ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&level ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
|
||||
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
||||
args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));
|
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth());
|
||||
}
|
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err)
|
||||
{
|
||||
if (prevPts.empty())
|
||||
{
|
||||
nextPts.release();
|
||||
status.release();
|
||||
if (err) err->release();
|
||||
return;
|
||||
}
|
||||
|
||||
derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
|
||||
|
||||
iters = std::min(std::max(iters, 0), 100);
|
||||
|
||||
const int cn = prevImg.channels();
|
||||
|
||||
dim3 block, patch;
|
||||
calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);
|
||||
|
||||
CV_Assert(derivLambda >= 0);
|
||||
CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2);
|
||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
|
||||
CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
|
||||
CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);
|
||||
|
||||
if (useInitialFlow)
|
||||
CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
|
||||
else
|
||||
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
|
||||
|
||||
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
|
||||
oclMat temp2 = nextPts.reshape(1);
|
||||
//oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
|
||||
//ocl::multiply(temp1, scalar, temp2);
|
||||
::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
|
||||
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
|
||||
status.setTo(Scalar::all(1));
|
||||
|
||||
if (err)
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel + 1);
|
||||
nextPyr_.resize(maxLevel + 1);
|
||||
|
||||
if (cn == 1 || cn == 4)
|
||||
{
|
||||
prevImg.convertTo(prevPyr_[0], CV_32F);
|
||||
nextImg.convertTo(nextPyr_[0], CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
oclMat buf_;
|
||||
cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
|
||||
buf_.convertTo(prevPyr_[0], CV_32F);
|
||||
|
||||
cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
|
||||
buf_.convertTo(nextPyr_[0], CV_32F);
|
||||
}
|
||||
|
||||
for (int level = 1; level <= maxLevel; ++level)
|
||||
{
|
||||
pyrDown(prevPyr_[level - 1], prevPyr_[level]);
|
||||
pyrDown(nextPyr_[level - 1], nextPyr_[level]);
|
||||
}
|
||||
|
||||
// dI/dx ~ Ix, dI/dy ~ Iy
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
{
|
||||
lkSparse_run(prevPyr_[level], nextPyr_[level],
|
||||
prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols,
|
||||
level, block, patch, winSize, iters);
|
||||
}
|
||||
}
|
||||
|
||||
void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
|
||||
oclMat& prevU, oclMat& prevV, oclMat* err, Size winSize, int iters)
|
||||
{
|
||||
Context *clCxt = I.clCxt;
|
||||
|
||||
string kernelName = "lkDense";
|
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
size_t globalThreads[3] = { I.cols, I.rows, 1};
|
||||
|
||||
int cn = I.channels();
|
||||
|
||||
bool calcErr;
|
||||
if (err)
|
||||
{
|
||||
calcErr = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
calcErr = false;
|
||||
}
|
||||
|
||||
cl_mem ITex = bindTexture(I, I.depth(), cn);
|
||||
cl_mem JTex = bindTexture(J, J.depth(), cn);
|
||||
|
||||
int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
|
||||
const int patchWidth = 16 + 2 * halfWin.x;
|
||||
const int patchHeight = 16 + 2 * halfWin.y;
|
||||
size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
|
||||
|
||||
vector<pair<size_t , const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex ));
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&u.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&u.step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&v.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&v.step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&prevU.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&prevU.step ));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&prevV.data ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&prevV.step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
|
||||
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data ));
|
||||
//args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
|
||||
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth());
|
||||
}
|
||||
|
||||
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err)
|
||||
{
|
||||
CV_Assert(prevImg.type() == CV_8UC1);
|
||||
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
|
||||
CV_Assert(maxLevel >= 0);
|
||||
CV_Assert(winSize.width > 2 && winSize.height > 2);
|
||||
|
||||
if (err)
|
||||
err->create(prevImg.size(), CV_32FC1);
|
||||
|
||||
prevPyr_.resize(maxLevel + 1);
|
||||
nextPyr_.resize(maxLevel + 1);
|
||||
|
||||
prevPyr_[0] = prevImg;
|
||||
nextImg.convertTo(nextPyr_[0], CV_32F);
|
||||
|
||||
for (int level = 1; level <= maxLevel; ++level)
|
||||
{
|
||||
pyrDown(prevPyr_[level - 1], prevPyr_[level]);
|
||||
pyrDown(nextPyr_[level - 1], nextPyr_[level]);
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
|
||||
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
|
||||
uPyr_[1].setTo(Scalar::all(0));
|
||||
vPyr_[1].setTo(Scalar::all(0));
|
||||
|
||||
Size winSize2i(winSize.width, winSize.height);
|
||||
|
||||
int idx = 0;
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
{
|
||||
int idx2 = (idx + 1) & 1;
|
||||
|
||||
lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
|
||||
level == 0 ? err : 0, winSize2i, iters);
|
||||
|
||||
if (level > 0)
|
||||
idx = idx2;
|
||||
}
|
||||
|
||||
uPyr_[idx].copyTo(u);
|
||||
vPyr_[idx].copyTo(v);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
167
modules/ocl/test/test_pyrlk.cpp
Normal file
167
modules/ocl/test/test_pyrlk.cpp
Normal file
@ -0,0 +1,167 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <iomanip>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace cvtest;
|
||||
using namespace testing;
|
||||
using namespace std;
|
||||
|
||||
//#define DUMP
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// BroxOpticalFlow
|
||||
|
||||
#define BROX_OPTICAL_FLOW_DUMP_FILE "opticalflow/brox_optical_flow.bin"
|
||||
#define BROX_OPTICAL_FLOW_DUMP_FILE_CC20 "opticalflow/brox_optical_flow_cc20.bin"
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PyrLKOpticalFlow
|
||||
|
||||
//IMPLEMENT_PARAM_CLASS(UseGray, bool)
|
||||
|
||||
PARAM_TEST_CASE(Sparse, bool, bool)
|
||||
{
|
||||
bool useGray;
|
||||
bool UseSmart;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
UseSmart = GET_PARAM(0);
|
||||
useGray = GET_PARAM(0);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Sparse, Mat)
|
||||
{
|
||||
cv::Mat frame0 = readImage("../../../samples/gpu/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
ASSERT_FALSE(frame0.empty());
|
||||
|
||||
cv::Mat frame1 = readImage("../../../samples/gpu/rubberwhale2.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
ASSERT_FALSE(frame1.empty());
|
||||
|
||||
cv::Mat gray_frame;
|
||||
if (useGray)
|
||||
gray_frame = frame0;
|
||||
else
|
||||
cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY);
|
||||
|
||||
std::vector<cv::Point2f> pts;
|
||||
cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0);
|
||||
|
||||
cv::ocl::oclMat d_pts;
|
||||
cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void*)&pts[0]);
|
||||
d_pts.upload(pts_mat);
|
||||
|
||||
cv::ocl::PyrLKOpticalFlow pyrLK;
|
||||
|
||||
cv::ocl::oclMat oclFrame0;
|
||||
cv::ocl::oclMat oclFrame1;
|
||||
cv::ocl::oclMat d_nextPts;
|
||||
cv::ocl::oclMat d_status;
|
||||
cv::ocl::oclMat d_err;
|
||||
|
||||
oclFrame0 = frame0;
|
||||
oclFrame1 = frame1;
|
||||
|
||||
pyrLK.sparse(oclFrame0, oclFrame1, d_pts, d_nextPts, d_status, &d_err);
|
||||
|
||||
std::vector<cv::Point2f> nextPts(d_nextPts.cols);
|
||||
cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*)&nextPts[0]);
|
||||
d_nextPts.download(nextPts_mat);
|
||||
|
||||
std::vector<unsigned char> status(d_status.cols);
|
||||
cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void*)&status[0]);
|
||||
d_status.download(status_mat);
|
||||
|
||||
std::vector<float> err(d_err.cols);
|
||||
cv::Mat err_mat(1, d_err.cols, CV_32FC1, (void*)&err[0]);
|
||||
d_err.download(err_mat);
|
||||
|
||||
std::vector<cv::Point2f> nextPts_gold;
|
||||
std::vector<unsigned char> status_gold;
|
||||
std::vector<float> err_gold;
|
||||
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts_gold, status_gold, err_gold);
|
||||
|
||||
ASSERT_EQ(nextPts_gold.size(), nextPts.size());
|
||||
ASSERT_EQ(status_gold.size(), status.size());
|
||||
|
||||
size_t mistmatch = 0;
|
||||
for (size_t i = 0; i < nextPts.size(); ++i)
|
||||
{
|
||||
if (status[i] != status_gold[i])
|
||||
{
|
||||
++mistmatch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (status[i])
|
||||
{
|
||||
cv::Point2i a = nextPts[i];
|
||||
cv::Point2i b = nextPts_gold[i];
|
||||
|
||||
bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
|
||||
//float errdiff = std::abs(err[i] - err_gold[i]);
|
||||
float errdiff = 0.0f;
|
||||
|
||||
if (!eq || errdiff > 1e-1)
|
||||
++mistmatch;
|
||||
}
|
||||
}
|
||||
|
||||
double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size() * 2);
|
||||
|
||||
ASSERT_LE(bad_ratio, 0.05f);
|
||||
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine(
|
||||
Values(false, true),
|
||||
Values(false)));
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
Loading…
Reference in New Issue
Block a user