diff --git a/modules/cudaoptflow/src/cuda/pyrlk.cu b/modules/cudaoptflow/src/cuda/pyrlk.cu index 2f2865057f..e3cca57f3a 100644 --- a/modules/cudaoptflow/src/cuda/pyrlk.cu +++ b/modules/cudaoptflow/src/cuda/pyrlk.cu @@ -51,6 +51,8 @@ #include "opencv2/core/cuda/filters.hpp" #include "opencv2/core/cuda/border_interpolate.hpp" +#include + using namespace cv::cuda; using namespace cv::cuda::device; @@ -923,15 +925,15 @@ namespace pyrlk float x = xBase - c_halfWin_x + j + 0.5f; float y = yBase - c_halfWin_y + i + 0.5f; - I_patch[i * patchWidth + j] = tex2D(tex_Ib, x, y); + I_patch[i * patchWidth + j] = tex2D(tex_If, x, y); // Sharr Deriv - dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_Ib, x+1, y-1) + 10 * tex2D(tex_Ib, x+1, y) + 3 * tex2D(tex_Ib, x+1, y+1) - - (3 * tex2D(tex_Ib, x-1, y-1) + 10 * tex2D(tex_Ib, x-1, y) + 3 * tex2D(tex_Ib, x-1, y+1)); + dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_If, x+1, y-1) + 10 * tex2D(tex_If, x+1, y) + 3 * tex2D(tex_If, x+1, y+1) - + (3 * tex2D(tex_If, x-1, y-1) + 10 * tex2D(tex_If, x-1, y) + 3 * tex2D(tex_If, x-1, y+1)); - dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_Ib, x-1, y+1) + 10 * tex2D(tex_Ib, x, y+1) + 3 * tex2D(tex_Ib, x+1, y+1) - - (3 * tex2D(tex_Ib, x-1, y-1) + 10 * tex2D(tex_Ib, x, y-1) + 3 * tex2D(tex_Ib, x+1, y-1)); + dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_If, x-1, y+1) + 10 * tex2D(tex_If, x, y+1) + 3 * tex2D(tex_If, x+1, y+1) - + (3 * tex2D(tex_If, x-1, y-1) + 10 * tex2D(tex_If, x, y-1) + 3 * tex2D(tex_If, x+1, y-1)); } } @@ -943,6 +945,7 @@ namespace pyrlk if (x >= cols || y >= rows) return; + int A11i = 0; int A12i = 0; int A22i = 0; @@ -970,7 +973,6 @@ namespace pyrlk { if (calcErr) err(y, x) = numeric_limits::max(); - return; } @@ -1014,6 +1016,7 @@ namespace pyrlk } } + float2 delta; delta.x = A12 * b2 - A22 * b1; delta.y = A12 * b1 - A11 * b2; @@ -1083,11 +1086,11 @@ namespace pyrlk funcs[patch.y - 1][patch.x - 1](I, J, I.rows, I.cols, prevPts, nextPts, status, err, ptcount, level, block, stream); } - static void dense(PtrStepSzb I, PtrStepSz J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream) + static void dense(PtrStepSz I, PtrStepSz J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream) { dim3 block(16, 16); dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y)); - Tex_I<1, uchar>::bindTexture_(I); + Tex_I<1, T>::bindTexture_(I); Tex_J<1, T>::bindTexture_(J); int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2); diff --git a/modules/cudaoptflow/src/pyrlk.cpp b/modules/cudaoptflow/src/pyrlk.cpp index c7f706087b..d1704473c7 100644 --- a/modules/cudaoptflow/src/pyrlk.cpp +++ b/modules/cudaoptflow/src/pyrlk.cpp @@ -61,7 +61,7 @@ namespace pyrlk static void sparse(PtrStepSz::vec_type> I, PtrStepSz::vec_type> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, int level, dim3 block, dim3 patch, cudaStream_t stream); - static void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, + static void dense(PtrStepSzf I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream); }; @@ -236,7 +236,9 @@ namespace prevPyr_.resize(maxLevel_ + 1); nextPyr_.resize(maxLevel_ + 1); - prevPyr_[0] = prevImg; + //prevPyr_[0] = prevImg; + + prevImg.convertTo(prevPyr_[0], CV_32F, stream); nextImg.convertTo(nextPyr_[0], CV_32F, stream); for (int level = 1; level <= maxLevel_; ++level)