Merge pull request #8863 from LukeZheZhu:pyrlk_small_winsize

This commit is contained in:
Alexander Alekhin 2017-06-08 20:15:04 +00:00
commit ea93bcc347
3 changed files with 54 additions and 24 deletions

View File

@ -849,7 +849,7 @@ namespace
return false; return false;
if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2) if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
return false; return false;
if (winSize.width < 16 || winSize.height < 16 || if (winSize.width < 8 || winSize.height < 8 ||
winSize.width > 24 || winSize.height > 24) winSize.width > 24 || winSize.height > 24)
return false; return false;
calcPatchSize(); calcPatchSize();
@ -967,11 +967,17 @@ namespace
size_t globalThreads[3] = { 8 * (size_t)ptcount, 8}; size_t globalThreads[3] = { 8 * (size_t)ptcount, 8};
char calcErr = (0 == level) ? 1 : 0; char calcErr = (0 == level) ? 1 : 0;
int wsx = 1, wsy = 1;
if(winSize.width < 16)
wsx = 0;
if(winSize.height < 16)
wsy = 0;
cv::String build_options; cv::String build_options;
if (isDeviceCPU()) if (isDeviceCPU())
build_options = " -D CPU"; build_options = " -D CPU";
else else
build_options = cv::format("-D WAVE_SIZE=%d", waveSize); build_options = cv::format("-D WAVE_SIZE=%d -D WSX=%d -D WSY=%d",
waveSize, wsx, wsy);
ocl::Kernel kernel; ocl::Kernel kernel;
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options)) if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))

View File

@ -258,9 +258,9 @@ inline void GetPatch(image2d_t J, float x, float y,
*b2 = mad(diff, *Dy, *b2); *b2 = mad(diff, *Dy, *b2);
} }
inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval) inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval, float w)
{ {
float diff = (((read_imagef(J, sampler, (float2)(x,y)).x * 16384) + 256) / 512) - (((*Pch * 16384) + 256) /512); float diff = ((((read_imagef(J, sampler, (float2)(x,y)).x * 16384) + 256) / 512) - (((*Pch * 16384) + 256) /512)) * w;
*errval += fabs(diff); *errval += fabs(diff);
} }
@ -310,10 +310,34 @@ __kernel void lkSparse(image2d_t I, image2d_t J,
int xsize=get_local_size(0); int xsize=get_local_size(0);
int ysize=get_local_size(1); int ysize=get_local_size(1);
int k; int k;
#ifdef CPU
float wx0 = 1.0f;
float wy0 = 1.0f;
int xBase = mad24(xsize, 2, xid); int xBase = mad24(xsize, 2, xid);
int yBase = mad24(ysize, 2, yid); int yBase = mad24(ysize, 2, yid);
float wx = (xBase < c_winSize_x) ? 1 : 0; float wx1 = (xBase < c_winSize_x) ? 1 : 0;
float wy = (yBase < c_winSize_y) ? 1 : 0; float wy1 = (yBase < c_winSize_y) ? 1 : 0;
#else
#if WSX == 1
float wx0 = 1.0f;
int xBase = mad24(xsize, 2, xid);
float wx1 = (xBase < c_winSize_x) ? 1 : 0;
#else
int xBase = mad24(xsize, 1, xid);
float wx0 = (xBase < c_winSize_x) ? 1 : 0;
float wx1 = 0.0f;
#endif
#if WSY == 1
float wy0 = 1.0f;
int yBase = mad24(ysize, 2, yid);
float wy1 = (yBase < c_winSize_y) ? 1 : 0;
#else
int yBase = mad24(ysize, 1, yid);
float wy0 = (yBase < c_winSize_y) ? 1 : 0;
float wy1 = 0.0f;
#endif
#endif
float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1); float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
@ -354,39 +378,39 @@ __kernel void lkSparse(image2d_t I, image2d_t J,
SetPatch(IPatchLocal, 0, 1, SetPatch(IPatchLocal, 0, 1,
&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1], &I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],
&A11, &A12, &A22,1); &A11, &A12, &A22,wx0);
SetPatch(IPatchLocal, 0, 2, SetPatch(IPatchLocal, 0, 2,
&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2], &I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],
&A11, &A12, &A22,wx); &A11, &A12, &A22,wx1);
} }
{ {
SetPatch(IPatchLocal, 1, 0, SetPatch(IPatchLocal, 1, 0,
&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0], &I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],
&A11, &A12, &A22,1); &A11, &A12, &A22,wy0);
SetPatch(IPatchLocal, 1,1, SetPatch(IPatchLocal, 1,1,
&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1], &I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],
&A11, &A12, &A22,1); &A11, &A12, &A22,wx0*wy0);
SetPatch(IPatchLocal, 1,2, SetPatch(IPatchLocal, 1,2,
&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2], &I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],
&A11, &A12, &A22,wx); &A11, &A12, &A22,wx1*wy0);
} }
{ {
SetPatch(IPatchLocal, 2,0, SetPatch(IPatchLocal, 2,0,
&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0], &I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],
&A11, &A12, &A22,wy); &A11, &A12, &A22,wy1);
SetPatch(IPatchLocal, 2,1, SetPatch(IPatchLocal, 2,1,
&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1], &I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],
&A11, &A12, &A22,wy); &A11, &A12, &A22,wx0*wy1);
SetPatch(IPatchLocal, 2,2, SetPatch(IPatchLocal, 2,2,
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2], &I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
&A11, &A12, &A22,wx*wy); &A11, &A12, &A22,wx1*wy1);
} }
@ -496,24 +520,24 @@ __kernel void lkSparse(image2d_t I, image2d_t J,
if (calcErr) if (calcErr)
{ {
{ {
GetError(J, loc0.x, loc0.y, &I_patch[0][0], &D); GetError(J, loc0.x, loc0.y, &I_patch[0][0], &D, 1);
GetError(J, loc1.x, loc0.y, &I_patch[0][1], &D); GetError(J, loc1.x, loc0.y, &I_patch[0][1], &D, wx0);
} }
{ {
GetError(J, loc0.x, loc1.y, &I_patch[1][0], &D); GetError(J, loc0.x, loc1.y, &I_patch[1][0], &D, wy0);
GetError(J, loc1.x, loc1.y, &I_patch[1][1], &D); GetError(J, loc1.x, loc1.y, &I_patch[1][1], &D, wx0*wy0);
} }
if(xBase < c_winSize_x) if(xBase < c_winSize_x)
{ {
GetError(J, loc2.x, loc0.y, &I_patch[0][2], &D); GetError(J, loc2.x, loc0.y, &I_patch[0][2], &D, wx1);
GetError(J, loc2.x, loc1.y, &I_patch[1][2], &D); GetError(J, loc2.x, loc1.y, &I_patch[1][2], &D, wx1*wy0);
} }
if(yBase < c_winSize_y) if(yBase < c_winSize_y)
{ {
GetError(J, loc0.x, loc2.y, &I_patch[2][0], &D); GetError(J, loc0.x, loc2.y, &I_patch[2][0], &D, wy1);
GetError(J, loc1.x, loc2.y, &I_patch[2][1], &D); GetError(J, loc1.x, loc2.y, &I_patch[2][1], &D, wx0*wy1);
if(xBase < c_winSize_x) if(xBase < c_winSize_x)
GetError(J, loc2.x, loc2.y, &I_patch[2][2], &D); GetError(J, loc2.x, loc2.y, &I_patch[2][2], &D, wx1*wy1);
} }
reduce1(D, smem1, tid); reduce1(D, smem1, tid);

View File

@ -144,7 +144,7 @@ OCL_TEST_P(PyrLKOpticalFlow, Mat)
OCL_INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlow, OCL_INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlow,
Combine( Combine(
Values(21, 25), Values(11, 15, 21, 25),
Values(3, 5) Values(3, 5)
) )
); );