diff --git a/modules/core/include/opencv2/core/gpumat.hpp b/modules/core/include/opencv2/core/gpumat.hpp index d1fd1501ee..1ce35aabf3 100644 --- a/modules/core/include/opencv2/core/gpumat.hpp +++ b/modules/core/include/opencv2/core/gpumat.hpp @@ -214,6 +214,8 @@ namespace cv { namespace gpu CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m); CV_EXPORTS void ensureSizeIsEnough(Size size, int type, GpuMat& m); + CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat); + //////////////////////////////////////////////////////////////////////// // Error handling @@ -459,6 +461,13 @@ namespace cv { namespace gpu else m.create(rows, cols, type); } + + inline GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat) + { + if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) + return mat(Rect(0, 0, cols, rows)); + return mat = GpuMat(rows, cols, type); + } }} #endif // __cplusplus diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 39836bea39..20ce90239f 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1819,6 +1819,70 @@ private: vector vPyr_; }; + +class CV_EXPORTS FarnebackOpticalFlow +{ +public: + FarnebackOpticalFlow() + { + numLevels = 5; + pyrScale = 0.5; + fastPyramids = false; + winSize = 13; + numIters = 10; + polyN = 5; + polySigma = 1.1; + flags = 0; + } + + int numLevels; + double pyrScale; + bool fastPyramids; + int winSize; + int numIters; + int polyN; + double polySigma; + int flags; + + void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null()); + + void releaseMemory() + { + frames_[0].release(); + frames_[1].release(); + I_[0].release(); + I_[1].release(); + M_.release(); + bufM_.release(); + R_[0].release(); + R_[1].release(); + tmp_[0].release(); + tmp_[1].release(); + pyramid0_.clear(); + pyramid1_.clear(); + } + +private: + void prepareGaussian( + int n, double sigma, float *g, float *xg, float *xxg, + double &ig11, double &ig03, double &ig33, double &ig55); + + void setPolynomialExpansionConsts(int n, double sigma); + + void updateFlow_boxFilter( + const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy, + GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]); + + void updateFlow_gaussianBlur( + const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy, + GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]); + + GpuMat frames_[2]; + GpuMat I_[2], M_, bufM_, R_[2], tmp_[2]; + std::vector pyramid0_, pyramid1_; +}; + + //! Interpolate frames (images) using provided optical flow (displacement field). //! frame0 - frame 0 (32-bit floating point images, single channel) //! frame1 - frame 1 (the same type and size) diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp index 11ccda4452..569b460eab 100644 --- a/modules/gpu/src/cudastream.cpp +++ b/modules/gpu/src/cudastream.cpp @@ -81,6 +81,7 @@ namespace cv { namespace gpu struct Stream::Impl { + static cudaStream_t getStream(const Impl* impl) { return impl ? impl->stream : 0; } cudaStream_t stream; int ref_counter; }; @@ -95,7 +96,10 @@ namespace }; } -CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl ? stream.impl->stream : 0; }; +CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) +{ + return Stream::Impl::getStream(stream.impl); +}; void cv::gpu::Stream::create() { @@ -143,7 +147,7 @@ Stream& cv::gpu::Stream::operator=(const Stream& stream) bool cv::gpu::Stream::queryIfComplete() { - cudaError_t err = cudaStreamQuery( impl->stream ); + cudaError_t err = cudaStreamQuery( Impl::getStream(impl) ); if (err == cudaErrorNotReady || err == cudaSuccess) return err == cudaSuccess; @@ -152,19 +156,19 @@ bool cv::gpu::Stream::queryIfComplete() return false; } -void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); } +void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( Impl::getStream(impl) ) ); } void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) { // if not -> allocation will be done, but after that dst will not point to page locked memory CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() ); - devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); + devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); } -void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } +void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); } -void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } -void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } -void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); } +void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); } +void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); } +void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToDevice); } void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) { @@ -173,7 +177,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) { - cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, impl->stream) ); + cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); return; } if (src.depth() == CV_8U) @@ -183,12 +187,12 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) { int val = saturate_cast(s[0]); - cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, impl->stream) ); + cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); return; } } - setTo(src, s, impl->stream); + setTo(src, s, Impl::getStream(impl)); } void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) @@ -198,7 +202,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) CV_Assert(mask.type() == CV_8UC1); - setTo(src, val, mask, impl->stream); + setTo(src, val, mask, Impl::getStream(impl)); } void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) @@ -226,7 +230,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, psrc = &(temp = src); dst.create( src.size(), rtype ); - convertTo(src, dst, alpha, beta, impl->stream); + convertTo(src, dst, alpha, beta, Impl::getStream(impl)); } cv::gpu::Stream::operator bool() const diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index b8cf9cf366..d796bc355e 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -65,6 +65,7 @@ #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/calib3d/calib3d.hpp" #include "opencv2/core/internal.hpp" +#include "opencv2/video/video.hpp" #define OPENCV_GPU_UNUSED(x) (void)x diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp index 8d9cc43f59..8826457162 100644 --- a/modules/gpu/test/test_precomp.hpp +++ b/modules/gpu/test/test_precomp.hpp @@ -44,6 +44,7 @@ #include #include +#include #include #include #include diff --git a/modules/gpu/test/test_video.cpp b/modules/gpu/test/test_video.cpp index b29036c2b0..9cda85111d 100644 --- a/modules/gpu/test/test_video.cpp +++ b/modules/gpu/test/test_video.cpp @@ -423,3 +423,74 @@ TEST_P(PyrLKOpticalFlowSparse, Accuracy) INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, Combine(ALL_DEVICES, Bool())); #endif // HAVE_CUDA + + +PARAM_TEST_CASE(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo, double, int, int, bool) +{ + Mat frame0, frame1; + + double pyrScale; + int polyN; + double polySigma; + int flags; + bool useInitFlow; + + virtual void SetUp() + { + frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); ASSERT_FALSE(frame1.empty()); + + cv::gpu::setDevice(GET_PARAM(0).deviceID()); + + pyrScale = GET_PARAM(1); + polyN = GET_PARAM(2); + polySigma = polyN <= 5 ? 1.1 : 1.5; + flags = GET_PARAM(3); + useInitFlow = GET_PARAM(4); + } +}; + +TEST_P(FarnebackOpticalFlowTest, Accuracy) +{ + using namespace cv; + + gpu::FarnebackOpticalFlow calc; + calc.pyrScale = pyrScale; + calc.polyN = polyN; + calc.polySigma = polySigma; + calc.flags = flags; + + gpu::GpuMat d_flowx, d_flowy; + calc(gpu::GpuMat(frame0), gpu::GpuMat(frame1), d_flowx, d_flowy); + + Mat flow; + if (useInitFlow) + { + Mat flowxy[] = {(Mat)d_flowx, (Mat)d_flowy}; + merge(flowxy, 2, flow); + } + + if (useInitFlow) + { + calc.flags |= OPTFLOW_USE_INITIAL_FLOW; + calc(gpu::GpuMat(frame0), gpu::GpuMat(frame1), d_flowx, d_flowy); + } + + calcOpticalFlowFarneback( + frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, + calc.numIters, calc.polyN, calc.polySigma, calc.flags); + + std::vector flowxy; split(flow, flowxy); + /*std::cout << checkSimilarity(flowxy[0], (Mat)d_flowx) << " " + << checkSimilarity(flowxy[1], (Mat)d_flowy) << std::endl;*/ + EXPECT_LT(checkSimilarity(flowxy[0], (Mat)d_flowx), 0.1); + EXPECT_LT(checkSimilarity(flowxy[1], (Mat)d_flowy), 0.1); +} + +INSTANTIATE_TEST_CASE_P(Video, FarnebackOpticalFlowTest, + Combine(ALL_DEVICES, + Values(0.3, 0.5, 0.8), + Values(5, 7), + Values(0, (int)cv::OPTFLOW_FARNEBACK_GAUSSIAN), + Values(false, true))); diff --git a/samples/gpu/basketball1.png b/samples/gpu/basketball1.png new file mode 100644 index 0000000000..53b2dbaad1 Binary files /dev/null and b/samples/gpu/basketball1.png differ diff --git a/samples/gpu/basketball2.png b/samples/gpu/basketball2.png new file mode 100644 index 0000000000..1d069b965c Binary files /dev/null and b/samples/gpu/basketball2.png differ diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 57583c4052..1a99fa8b31 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -1183,3 +1183,36 @@ TEST(PyrLKOpticalFlow) GPU_OFF; } } + + +TEST(FarnebackOpticalFlow) +{ + const string names[] = {"rubberwhale", "basketball"}; + for (size_t i = 0; i < sizeof(names)/sizeof(*names); ++i) { + for (int fastPyramids = 0; fastPyramids < 2; ++fastPyramids) { + for (int useGaussianBlur = 0; useGaussianBlur < 2; ++useGaussianBlur) { + + SUBTEST << "dataset=" << names[i] << ", fastPyramids=" << fastPyramids << ", useGaussianBlur=" << useGaussianBlur; + Mat frame0 = imread(abspath(names[i] + "1.png"), IMREAD_GRAYSCALE); + Mat frame1 = imread(abspath(names[i] + "2.png"), IMREAD_GRAYSCALE); + if (frame0.empty()) throw runtime_error("can't open " + names[i] + "1.png"); + if (frame1.empty()) throw runtime_error("can't open " + names[i] + "2.png"); + + gpu::FarnebackOpticalFlow calc; + calc.fastPyramids = fastPyramids; + calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0; + + gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy; + calc(d_frame0, d_frame1, d_flowx, d_flowy); + GPU_ON; + calc(d_frame0, d_frame1, d_flowx, d_flowy); + GPU_OFF; + + Mat flow; + calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags); + CPU_ON; + calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags); + CPU_OFF; + + }}} +}