diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 0c9f709b15..26bf624c66 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -21,7 +21,7 @@ source_group("Src\\Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs}) source_group("Device" FILES ${lib_device_hdrs}) source_group("Device\\Detail" FILES ${lib_device_hdrs_detail}) -if (HAVE_CUDA) +if(HAVE_CUDA) file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp" "src/nvidia/*.h*") file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu") set(ncv_files ${ncv_srcs} ${ncv_cuda}) @@ -104,3 +104,7 @@ ocv_add_accuracy_tests(FILES "Include" ${test_hdrs} FILES "Src" ${test_srcs} ${nvidia}) ocv_add_perf_tests() + +if(HAVE_CUDA) + add_subdirectory(perf4au) +endif() diff --git a/modules/gpu/doc/data_structures.rst b/modules/gpu/doc/data_structures.rst index 68e702a793..1291cf9bb6 100644 --- a/modules/gpu/doc/data_structures.rst +++ b/modules/gpu/doc/data_structures.rst @@ -271,41 +271,37 @@ This class encapsulates a queue of asynchronous calls. Some functions have overl class CV_EXPORTS Stream { public: - Stream(); - ~Stream(); + Stream(); + ~Stream(); - Stream(const Stream&); - Stream& operator=(const Stream&); + Stream(const Stream&); + Stream& operator=(const Stream&); - bool queryIfComplete(); - void waitForCompletion(); + bool queryIfComplete(); + void waitForCompletion(); - //! downloads asynchronously. - // Warning! cv::Mat must point to page locked memory - (i.e. to CudaMem data or to its subMat) - void enqueueDownload(const GpuMat& src, CudaMem& dst); - void enqueueDownload(const GpuMat& src, Mat& dst); + void enqueueDownload(const GpuMat& src, CudaMem& dst); + void enqueueDownload(const GpuMat& src, Mat& dst); - //! uploads asynchronously. - // Warning! cv::Mat must point to page locked memory - (i.e. to CudaMem data or to its ROI) - void enqueueUpload(const CudaMem& src, GpuMat& dst); - void enqueueUpload(const Mat& src, GpuMat& dst); + void enqueueUpload(const CudaMem& src, GpuMat& dst); + void enqueueUpload(const Mat& src, GpuMat& dst); - void enqueueCopy(const GpuMat& src, GpuMat& dst); + void enqueueCopy(const GpuMat& src, GpuMat& dst); - void enqueueMemSet(const GpuMat& src, Scalar val); - void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask); + void enqueueMemSet(const GpuMat& src, Scalar val); + void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask); - // converts matrix type, ex from float to uchar depending on type - void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, - double a = 1, double b = 0); + void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, + double a = 1, double b = 0); + + typedef void (*StreamCallback)(Stream& stream, int status, void* userData); + void enqueueHostCallback(StreamCallback callback, void* userData); }; gpu::Stream::queryIfComplete --------------------------------- +---------------------------- Returns ``true`` if the current stream queue is finished. Otherwise, it returns false. .. ocv:function:: bool gpu::Stream::queryIfComplete() @@ -313,13 +309,73 @@ Returns ``true`` if the current stream queue is finished. Otherwise, it returns gpu::Stream::waitForCompletion ----------------------------------- +------------------------------ Blocks the current CPU thread until all operations in the stream are complete. .. ocv:function:: void gpu::Stream::waitForCompletion() +gpu::Stream::enqueueDownload +---------------------------- +Copies data from device to host. + +.. ocv:function:: void gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) + +.. ocv:function:: void gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) + +.. note:: ``cv::Mat`` must point to page locked memory (i.e. to ``CudaMem`` data or to its subMat) or must be registered with :ocv:func:`gpu::registerPageLocked` . + + + +gpu::Stream::enqueueUpload +-------------------------- +Copies data from host to device. + +.. ocv:function:: void gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst) + +.. ocv:function:: void gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) + +.. note:: ``cv::Mat`` must point to page locked memory (i.e. to ``CudaMem`` data or to its subMat) or must be registered with :ocv:func:`gpu::registerPageLocked` . + + + +gpu::Stream::enqueueCopy +------------------------ +Copies data from device to device. + +.. ocv:function:: void gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) + + + +gpu::Stream::enqueueMemSet +-------------------------- +Initializes or sets device memory to a value. + +.. ocv:function:: void gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val) + +.. ocv:function:: void gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) + + + +gpu::Stream::enqueueConvert +--------------------------- +Converts matrix type, ex from float to uchar depending on type. + +.. ocv:function:: void gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0) + + + +gpu::Stream::enqueueHostCallback +-------------------------------- +Adds a callback to be called on the host after all currently enqueued items in the stream have completed. + +.. ocv:function:: void gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData) + +.. note:: Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization that may depend on outstanding device work or other callbacks that are not mandated to run earlier. Callbacks without a mandated order (in independent streams) execute in undefined order and may be serialized. + + + gpu::StreamAccessor ------------------- .. ocv:struct:: gpu::StreamAccessor diff --git a/modules/gpu/doc/matrix_reductions.rst b/modules/gpu/doc/matrix_reductions.rst index 538267eb7a..e9229f8a81 100644 --- a/modules/gpu/doc/matrix_reductions.rst +++ b/modules/gpu/doc/matrix_reductions.rst @@ -32,6 +32,8 @@ Returns the norm of a matrix (or difference of two matrices). .. ocv:function:: double gpu::norm(const GpuMat& src1, int normType, GpuMat& buf) +.. ocv:function:: double gpu::norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf) + .. ocv:function:: double gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2) :param src1: Source matrix. Any matrices except 64F are supported. @@ -40,6 +42,8 @@ Returns the norm of a matrix (or difference of two matrices). :param normType: Norm type. ``NORM_L1`` , ``NORM_L2`` , and ``NORM_INF`` are supported for now. + :param mask: optional operation mask; it must have the same size as ``src1`` and ``CV_8UC1`` type. + :param buf: Optional buffer to avoid extra memory allocations. It is resized automatically. .. seealso:: :ocv:func:`norm` @@ -54,8 +58,12 @@ Returns the sum of matrix elements. .. ocv:function:: Scalar gpu::sum(const GpuMat& src, GpuMat& buf) +.. ocv:function:: Scalar gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) + :param src: Source image of any depth except for ``CV_64F`` . + :param mask: optional operation mask; it must have the same size as ``src1`` and ``CV_8UC1`` type. + :param buf: Optional buffer to avoid extra memory allocations. It is resized automatically. .. seealso:: :ocv:func:`sum` @@ -70,8 +78,12 @@ Returns the sum of absolute values for matrix elements. .. ocv:function:: Scalar gpu::absSum(const GpuMat& src, GpuMat& buf) +.. ocv:function:: Scalar gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) + :param src: Source image of any depth except for ``CV_64F`` . + :param mask: optional operation mask; it must have the same size as ``src1`` and ``CV_8UC1`` type. + :param buf: Optional buffer to avoid extra memory allocations. It is resized automatically. @@ -84,8 +96,12 @@ Returns the squared sum of matrix elements. .. ocv:function:: Scalar gpu::sqrSum(const GpuMat& src, GpuMat& buf) +.. ocv:function:: Scalar gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) + :param src: Source image of any depth except for ``CV_64F`` . + :param mask: optional operation mask; it must have the same size as ``src1`` and ``CV_8UC1`` type. + :param buf: Optional buffer to avoid extra memory allocations. It is resized automatically. diff --git a/modules/gpu/doc/operations_on_matrices.rst b/modules/gpu/doc/operations_on_matrices.rst index 7f586a1b02..d1762f442a 100644 --- a/modules/gpu/doc/operations_on_matrices.rst +++ b/modules/gpu/doc/operations_on_matrices.rst @@ -242,3 +242,33 @@ Converts polar coordinates into Cartesian. :param stream: Stream for the asynchronous version. .. seealso:: :ocv:func:`polarToCart` + + + +gpu::normalize +-------------- +Normalizes the norm or value range of an array. + +.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()) + +.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf) + + :param src: input array. + + :param dst: output array of the same size as ``src`` . + + :param alpha: norm value to normalize to or the lower range boundary in case of the range normalization. + + :param beta: upper range boundary in case of the range normalization; it is not used for the norm normalization. + + :param normType: normalization type (see the details below). + + :param dtype: when negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``. + + :param mask: optional operation mask. + + :param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically. + + :param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically. + +.. seealso:: :ocv:func:`normalize` diff --git a/modules/gpu/doc/per_element_operations.rst b/modules/gpu/doc/per_element_operations.rst index a59875e646..2670ba3233 100644 --- a/modules/gpu/doc/per_element_operations.rst +++ b/modules/gpu/doc/per_element_operations.rst @@ -276,6 +276,8 @@ Compares elements of two matrices. .. ocv:function:: void gpu::compare( const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()) + :param a: First source matrix. :param b: Second source matrix with the same size and type as ``a`` . diff --git a/modules/gpu/include/opencv2/gpu/device/warp.hpp b/modules/gpu/include/opencv2/gpu/device/warp.hpp index d4b0b8d8f7..0f1dc794ab 100644 --- a/modules/gpu/include/opencv2/gpu/device/warp.hpp +++ b/modules/gpu/include/opencv2/gpu/device/warp.hpp @@ -97,6 +97,25 @@ namespace cv { namespace gpu { namespace device return out; } + template + static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op) + { + const unsigned int lane = laneId(); + + if (lane < 16) + { + T partial = ptr[lane]; + + ptr[lane] = partial = op(partial, ptr[lane + 16]); + ptr[lane] = partial = op(partial, ptr[lane + 8]); + ptr[lane] = partial = op(partial, ptr[lane + 4]); + ptr[lane] = partial = op(partial, ptr[lane + 2]); + ptr[lane] = partial = op(partial, ptr[lane + 1]); + } + + return *ptr; + } + template static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) { @@ -109,4 +128,4 @@ namespace cv { namespace gpu { namespace device }; }}} // namespace cv { namespace gpu { namespace device -#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */ \ No newline at end of file +#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */ diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 60cff99f6c..7cc57e49af 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -145,43 +145,49 @@ public: ~Stream(); Stream(const Stream&); - Stream& operator=(const Stream&); + Stream& operator =(const Stream&); bool queryIfComplete(); void waitForCompletion(); - //! downloads asynchronously. + //! downloads asynchronously // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat) void enqueueDownload(const GpuMat& src, CudaMem& dst); void enqueueDownload(const GpuMat& src, Mat& dst); - //! uploads asynchronously. + //! uploads asynchronously // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI) void enqueueUpload(const CudaMem& src, GpuMat& dst); void enqueueUpload(const Mat& src, GpuMat& dst); + //! copy asynchronously void enqueueCopy(const GpuMat& src, GpuMat& dst); + //! memory set asynchronously void enqueueMemSet(GpuMat& src, Scalar val); void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask); - // converts matrix type, ex from float to uchar depending on type - void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0); + //! converts matrix type, ex from float to uchar depending on type + void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0); + + //! adds a callback to be called on the host after all currently enqueued items in the stream have completed + typedef void (*StreamCallback)(Stream& stream, int status, void* userData); + void enqueueHostCallback(StreamCallback callback, void* userData); static Stream& Null(); operator bool() const; private: + struct Impl; + + explicit Stream(Impl* impl); void create(); void release(); - struct Impl; Impl *impl; friend struct StreamAccessor; - - explicit Stream(Impl* impl); }; @@ -459,6 +465,12 @@ CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, //! supports only floating-point source CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null()); +//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values +CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, + int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()); +CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b, + int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf); + //////////////////////////// Per-element operations //////////////////////////////////// @@ -527,6 +539,7 @@ CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream //! compares elements of two arrays (c = a b) CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); +CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); //! performs per-elements bit-wise inversion CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); @@ -854,6 +867,11 @@ CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float th CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray()); +//! HoughLinesP + +//! finds line segments in the black-n-white image using probabalistic Hough transform +CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096); + //! HoughCircles struct HoughCirclesBuf @@ -912,11 +930,8 @@ CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuM //! supports NORM_INF, NORM_L1, NORM_L2 //! supports all matrices except 64F CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2); - -//! computes norm of array -//! supports NORM_INF, NORM_L1, NORM_L2 -//! supports all matrices except 64F CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf); +CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf); //! computes norm of the difference between two arrays //! supports NORM_INF, NORM_L1, NORM_L2 @@ -926,45 +941,33 @@ CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM //! computes sum of array elements //! supports only single channel images CV_EXPORTS Scalar sum(const GpuMat& src); - -//! computes sum of array elements -//! supports only single channel images CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf); +CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); //! computes sum of array elements absolute values //! supports only single channel images CV_EXPORTS Scalar absSum(const GpuMat& src); - -//! computes sum of array elements absolute values -//! supports only single channel images CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf); +CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); //! computes squared sum of array elements //! supports only single channel images CV_EXPORTS Scalar sqrSum(const GpuMat& src); - -//! computes squared sum of array elements -//! supports only single channel images CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf); +CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); //! finds global minimum and maximum array elements and returns their values CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat()); - -//! finds global minimum and maximum array elements and returns their values CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf); //! finds global minimum and maximum array elements and returns their values with locations CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, const GpuMat& mask=GpuMat()); - -//! finds global minimum and maximum array elements and returns their values with locations CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf); //! counts non-zero array elements CV_EXPORTS int countNonZero(const GpuMat& src); - -//! counts non-zero array elements CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf); //! reduces a matrix to a vector @@ -1982,6 +1985,113 @@ private: }; +// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method +// +// see reference: +// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". +// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". +class CV_EXPORTS OpticalFlowDual_TVL1_GPU +{ +public: + OpticalFlowDual_TVL1_GPU(); + + void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy); + + void collectGarbage(); + + /** + * Time step of the numerical scheme. + */ + double tau; + + /** + * Weight parameter for the data term, attachment parameter. + * This is the most relevant parameter, which determines the smoothness of the output. + * The smaller this parameter is, the smoother the solutions we obtain. + * It depends on the range of motions of the images, so its value should be adapted to each image sequence. + */ + double lambda; + + /** + * Weight parameter for (u - v)^2, tightness parameter. + * It serves as a link between the attachment and the regularization terms. + * In theory, it should have a small value in order to maintain both parts in correspondence. + * The method is stable for a large range of values of this parameter. + */ + double theta; + + /** + * Number of scales used to create the pyramid of images. + */ + int nscales; + + /** + * Number of warpings per scale. + * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale. + * This is a parameter that assures the stability of the method. + * It also affects the running time, so it is a compromise between speed and accuracy. + */ + int warps; + + /** + * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. + * A small value will yield more accurate solutions at the expense of a slower convergence. + */ + double epsilon; + + /** + * Stopping criterion iterations number used in the numerical scheme. + */ + int iterations; + + bool useInitialFlow; + +private: + void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2); + + std::vector I0s; + std::vector I1s; + std::vector u1s; + std::vector u2s; + + GpuMat I1x_buf; + GpuMat I1y_buf; + + GpuMat I1w_buf; + GpuMat I1wx_buf; + GpuMat I1wy_buf; + + GpuMat grad_buf; + GpuMat rho_c_buf; + + GpuMat p11_buf; + GpuMat p12_buf; + GpuMat p21_buf; + GpuMat p22_buf; + + GpuMat diff_buf; + GpuMat norm_buf; +}; + + +//! Calculates optical flow for 2 images using block matching algorithm */ +CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, + Size block_size, Size shift_size, Size max_range, bool use_previous, + GpuMat& velx, GpuMat& vely, GpuMat& buf, + Stream& stream = Stream::Null()); + +class CV_EXPORTS FastOpticalFlowBM +{ +public: + void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null()); + +private: + GpuMat buffer; + GpuMat extended_I0; + GpuMat extended_I1; +}; + + //! Interpolate frames (images) using provided optical flow (displacement field). //! frame0 - frame 0 (32-bit floating point images, single channel) //! frame1 - frame 1 (the same type and size) diff --git a/modules/gpu/perf/perf_core.cpp b/modules/gpu/perf/perf_core.cpp index cfd572dc16..b97c4999cd 100644 --- a/modules/gpu/perf/perf_core.cpp +++ b/modules/gpu/perf/perf_core.cpp @@ -647,6 +647,39 @@ PERF_TEST_P(Sz_Depth_Code, Core_CompareMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITH } } +////////////////////////////////////////////////////////////////////// +// CompareScalar + +PERF_TEST_P(Sz_Depth_Code, Core_CompareScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH, ALL_CMP_CODES)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int cmp_code = GET_PARAM(2); + + cv::Mat src(size, depth); + fillRandom(src); + + cv::Scalar s = cv::Scalar::all(100); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat d_dst; + + TEST_CYCLE() cv::gpu::compare(d_src, s, d_dst, cmp_code); + + GPU_SANITY_CHECK(d_dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::compare(src, s, dst, cmp_code); + + CPU_SANITY_CHECK(dst); + } +} + ////////////////////////////////////////////////////////////////////// // BitwiseNot @@ -1598,7 +1631,7 @@ PERF_TEST_P(Sz_Depth_Norm, Core_Norm, Combine( cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat d_buf; - TEST_CYCLE() dst = cv::gpu::norm(d_src, normType, d_buf); + TEST_CYCLE() dst = cv::gpu::norm(d_src, normType, cv::gpu::GpuMat(), d_buf); } else { @@ -1668,7 +1701,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine( cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat d_buf; - TEST_CYCLE() dst = cv::gpu::sum(d_src, d_buf); + TEST_CYCLE() dst = cv::gpu::sum(d_src, cv::gpu::GpuMat(), d_buf); } else { @@ -1703,7 +1736,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine( cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat d_buf; - TEST_CYCLE() dst = cv::gpu::absSum(d_src, d_buf); + TEST_CYCLE() dst = cv::gpu::absSum(d_src, cv::gpu::GpuMat(), d_buf); SANITY_CHECK(dst, 1e-6); } @@ -1737,7 +1770,7 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine( cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat d_buf; - TEST_CYCLE() dst = cv::gpu::sqrSum(d_src, d_buf); + TEST_CYCLE() dst = cv::gpu::sqrSum(d_src, cv::gpu::GpuMat(), d_buf); SANITY_CHECK(dst, 1e-6); } @@ -1893,4 +1926,48 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine( } } +////////////////////////////////////////////////////////////////////// +// Normalize + +DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType); + +PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(NormType(cv::NORM_INF), + NormType(cv::NORM_L1), + NormType(cv::NORM_L2), + NormType(cv::NORM_MINMAX)) + )) +{ + cv::Size size = GET_PARAM(0); + int type = GET_PARAM(1); + int norm_type = GET_PARAM(2); + + double alpha = 1; + double beta = 0; + + cv::Mat src(size, type); + fillRandom(src); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat d_dst; + cv::gpu::GpuMat d_norm_buf, d_cvt_buf; + + TEST_CYCLE() cv::gpu::normalize(d_src, d_dst, alpha, beta, norm_type, type, cv::gpu::GpuMat(), d_norm_buf, d_cvt_buf); + + GPU_SANITY_CHECK(d_dst, 1); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::normalize(src, dst, alpha, beta, norm_type, type); + + CPU_SANITY_CHECK(dst, 1); + } +} + } // namespace diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index 3f399cd5fe..e3d488ec94 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -1706,6 +1706,16 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidGetLayer, Combine(GPU_TYPICAL_MAT_S } namespace { + struct Vec4iComparator + { + bool operator()(const cv::Vec4i& a, const cv::Vec4i b) const + { + if (a[0] != b[0]) return a[0] < b[0]; + else if(a[1] != b[1]) return a[1] < b[1]; + else if(a[2] != b[2]) return a[2] < b[2]; + else return a[3] < b[3]; + } + }; struct Vec3fComparator { bool operator()(const cv::Vec3f& a, const cv::Vec3f b) const @@ -1784,6 +1794,62 @@ PERF_TEST_P(Sz, ImgProc_HoughLines, GPU_TYPICAL_MAT_SIZES) } } +////////////////////////////////////////////////////////////////////// +// HoughLinesP + +DEF_PARAM_TEST_1(Image, std::string); + +PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "stitching/a1.png")) +{ + declare.time(30.0); + + std::string fileName = getDataPath(GetParam()); + + const float rho = 1.0f; + const float theta = static_cast(CV_PI / 180.0); + const int threshold = 100; + const int minLineLenght = 50; + const int maxLineGap = 5; + + cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE); + + cv::Mat mask; + cv::Canny(image, mask, 50, 100); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_mask(mask); + cv::gpu::GpuMat d_lines; + cv::gpu::HoughLinesBuf d_buf; + + cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); + + TEST_CYCLE() + { + cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); + } + + cv::Mat h_lines(d_lines); + cv::Vec4i* begin = h_lines.ptr(); + cv::Vec4i* end = h_lines.ptr() + h_lines.cols; + std::sort(begin, end, Vec4iComparator()); + SANITY_CHECK(h_lines); + } + else + { + std::vector lines; + cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap); + + TEST_CYCLE() + { + cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap); + } + + std::sort(lines.begin(), lines.end(), Vec4iComparator()); + SANITY_CHECK(lines); + } +} + ////////////////////////////////////////////////////////////////////// // HoughCircles diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index b18cb17dfb..83213a1613 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -394,6 +394,173 @@ PERF_TEST_P(ImagePair, Video_FarnebackOpticalFlow, } } +////////////////////////////////////////////////////// +// OpticalFlowDual_TVL1 + +PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1, + Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) +{ + declare.time(20); + + cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_frame0(frame0); + cv::gpu::GpuMat d_frame1(frame1); + cv::gpu::GpuMat d_flowx; + cv::gpu::GpuMat d_flowy; + + cv::gpu::OpticalFlowDual_TVL1_GPU d_alg; + + d_alg(d_frame0, d_frame1, d_flowx, d_flowy); + + TEST_CYCLE() + { + d_alg(d_frame0, d_frame1, d_flowx, d_flowy); + } + + GPU_SANITY_CHECK(d_flowx); + GPU_SANITY_CHECK(d_flowy); + } + else + { + cv::Mat flow; + + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + + alg->calc(frame0, frame1, flow); + + TEST_CYCLE() + { + alg->calc(frame0, frame1, flow); + } + + CPU_SANITY_CHECK(flow); + } +} + +////////////////////////////////////////////////////// +// OpticalFlowBM + +void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr, + cv::Size bSize, cv::Size shiftSize, cv::Size maxRange, int usePrevious, + cv::Mat& velx, cv::Mat& vely) +{ + cv::Size sz((curr.cols - bSize.width + shiftSize.width)/shiftSize.width, (curr.rows - bSize.height + shiftSize.height)/shiftSize.height); + + velx.create(sz, CV_32FC1); + vely.create(sz, CV_32FC1); + + CvMat cvprev = prev; + CvMat cvcurr = curr; + + CvMat cvvelx = velx; + CvMat cvvely = vely; + + cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely); +} + +PERF_TEST_P(ImagePair, Video_OpticalFlowBM, + Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) +{ + declare.time(400); + + cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::Size block_size(16, 16); + cv::Size shift_size(1, 1); + cv::Size max_range(16, 16); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_frame0(frame0); + cv::gpu::GpuMat d_frame1(frame1); + cv::gpu::GpuMat d_velx, d_vely, buf; + + cv::gpu::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, d_velx, d_vely, buf); + + TEST_CYCLE() + { + cv::gpu::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, d_velx, d_vely, buf); + } + + GPU_SANITY_CHECK(d_velx); + GPU_SANITY_CHECK(d_vely); + } + else + { + cv::Mat velx, vely; + + calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely); + + TEST_CYCLE() + { + calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely); + } + + CPU_SANITY_CHECK(velx); + CPU_SANITY_CHECK(vely); + } +} + +PERF_TEST_P(ImagePair, Video_FastOpticalFlowBM, + Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) +{ + declare.time(400); + + cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::Size block_size(16, 16); + cv::Size shift_size(1, 1); + cv::Size max_range(16, 16); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_frame0(frame0); + cv::gpu::GpuMat d_frame1(frame1); + cv::gpu::GpuMat d_velx, d_vely; + + cv::gpu::FastOpticalFlowBM fastBM; + + fastBM(d_frame0, d_frame1, d_velx, d_vely, max_range.width, block_size.width); + + TEST_CYCLE() + { + fastBM(d_frame0, d_frame1, d_velx, d_vely, max_range.width, block_size.width); + } + + GPU_SANITY_CHECK(d_velx); + GPU_SANITY_CHECK(d_vely); + } + else + { + cv::Mat velx, vely; + + calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely); + + TEST_CYCLE() + { + calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely); + } + + CPU_SANITY_CHECK(velx); + CPU_SANITY_CHECK(vely); + } +} + ////////////////////////////////////////////////////// // FGDStatModel diff --git a/modules/gpu/perf/utility.hpp b/modules/gpu/perf/utility.hpp index 09b84f53aa..6782b93768 100644 --- a/modules/gpu/perf/utility.hpp +++ b/modules/gpu/perf/utility.hpp @@ -17,7 +17,7 @@ CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONS CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA) #define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all()) -CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING) +CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX) const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4; CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA) diff --git a/modules/gpu/perf4au/CMakeLists.txt b/modules/gpu/perf4au/CMakeLists.txt new file mode 100644 index 0000000000..7452203826 --- /dev/null +++ b/modules/gpu/perf4au/CMakeLists.txt @@ -0,0 +1,28 @@ +set(PERF4AU_REQUIRED_DEPS opencv_core opencv_imgproc opencv_highgui opencv_video opencv_legacy opencv_gpu opencv_ts) + +ocv_check_dependencies(${PERF4AU_REQUIRED_DEPS}) + +set(the_target gpu_perf4au) +project(${the_target}) + +ocv_include_modules(${PERF4AU_REQUIRED_DEPS}) + +if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function") +endif() + +file(GLOB srcs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp *.h *.hpp) +add_executable(${the_target} ${srcs}) + +target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${PERF4AU_REQUIRED_DEPS}) + +if(ENABLE_SOLUTION_FOLDERS) + set_target_properties(${the_target} PROPERTIES FOLDER "tests performance") +endif() + +if(WIN32) + if(MSVC AND NOT BUILD_SHARED_LIBS) + set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") + endif() +endif() + diff --git a/modules/gpu/perf4au/im1_1280x800.jpg b/modules/gpu/perf4au/im1_1280x800.jpg new file mode 100644 index 0000000000..bdbbd4aee9 Binary files /dev/null and b/modules/gpu/perf4au/im1_1280x800.jpg differ diff --git a/modules/gpu/perf4au/im2_1280x800.jpg b/modules/gpu/perf4au/im2_1280x800.jpg new file mode 100644 index 0000000000..ae49640a95 Binary files /dev/null and b/modules/gpu/perf4au/im2_1280x800.jpg differ diff --git a/modules/gpu/perf4au/main.cpp b/modules/gpu/perf4au/main.cpp new file mode 100644 index 0000000000..80d97ea806 --- /dev/null +++ b/modules/gpu/perf4au/main.cpp @@ -0,0 +1,490 @@ +#include +#ifdef HAVE_CVCONFIG_H +#include "cvconfig.h" +#endif +#include "opencv2/core/core.hpp" +#include "opencv2/gpu/gpu.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/video/video.hpp" +#include "opencv2/legacy/legacy.hpp" +#include "opencv2/ts/ts.hpp" +#include "opencv2/ts/ts_perf.hpp" + +static void printOsInfo() +{ +#if defined _WIN32 +# if defined _WIN64 + printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"); fflush(stdout); +# else + printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"); fflush(stdout); +# endif +#elif defined linux +# if defined _LP64 + printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"); fflush(stdout); +# else + printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"); fflush(stdout); +# endif +#elif defined __APPLE__ +# if defined _LP64 + printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"); fflush(stdout); +# else + printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"); fflush(stdout); +# endif +#endif +} + +static void printCudaInfo() +{ + const int deviceCount = cv::gpu::getCudaEnabledDeviceCount(); + + printf("[----------]\n"); fflush(stdout); + printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount); fflush(stdout); + printf("[----------]\n"); fflush(stdout); + + for (int i = 0; i < deviceCount; ++i) + { + cv::gpu::DeviceInfo info(i); + + printf("[----------]\n"); fflush(stdout); + printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()); fflush(stdout); + printf("[ ] \tCompute capability: %d.%d\n", info.majorVersion(), info.minorVersion()); fflush(stdout); + printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()); fflush(stdout); + printf("[ ] \tTotal memory: %d Mb\n", static_cast(static_cast(info.totalMemory() / 1024.0) / 1024.0)); fflush(stdout); + printf("[ ] \tFree memory: %d Mb\n", static_cast(static_cast(info.freeMemory() / 1024.0) / 1024.0)); fflush(stdout); + if (!info.isCompatible()) + printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n"); + printf("[----------]\n"); fflush(stdout); + } +} + +int main(int argc, char* argv[]) +{ + printOsInfo(); + printCudaInfo(); + + perf::Regression::Init("nv_perf_test"); + perf::TestBase::Init(argc, argv); + testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} + +#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name +#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name + +////////////////////////////////////////////////////////// +// HoughLinesP + +DEF_PARAM_TEST_1(Image, std::string); + +PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg"))) +{ + declare.time(30.0); + + std::string fileName = GetParam(); + + const float rho = 1.f; + const float theta = 1.f; + const int threshold = 40; + const int minLineLenght = 20; + const int maxLineGap = 5; + + cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_image(image); + cv::gpu::GpuMat d_lines; + cv::gpu::HoughLinesBuf d_buf; + + cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); + + TEST_CYCLE() + { + cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); + } + } + else + { + cv::Mat mask; + cv::Canny(image, mask, 50, 100); + + std::vector lines; + cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap); + + TEST_CYCLE() + { + cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap); + } + } + + SANITY_CHECK(0); +} + +////////////////////////////////////////////////////////// +// GoodFeaturesToTrack + +DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth); + +PERF_TEST_P(Image_Depth, GoodFeaturesToTrack, + testing::Combine( + testing::Values(std::string("im1_1280x800.jpg")), + testing::Values(CV_8U, CV_16U) + )) +{ + declare.time(60); + + const std::string fileName = std::tr1::get<0>(GetParam()); + const int depth = std::tr1::get<1>(GetParam()); + + const int maxCorners = 5000; + const double qualityLevel = 0.05; + const int minDistance = 5; + const int blockSize = 3; + const bool useHarrisDetector = true; + const double k = 0.05; + + cv::Mat src = cv::imread(fileName, cv::IMREAD_GRAYSCALE); + if (src.empty()) + FAIL() << "Unable to load source image [" << fileName << "]"; + + if (depth != CV_8U) + src.convertTo(src, depth); + + cv::Mat mask(src.size(), CV_8UC1, cv::Scalar::all(1)); + mask(cv::Rect(0, 0, 100, 100)).setTo(cv::Scalar::all(0)); + + if (PERF_RUN_GPU()) + { + cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k); + + cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat d_mask(mask); + cv::gpu::GpuMat d_pts; + + d_detector(d_src, d_pts, d_mask); + + TEST_CYCLE() + { + d_detector(d_src, d_pts, d_mask); + } + } + else + { + if (depth != CV_8U) + FAIL() << "Unsupported depth"; + + cv::Mat pts; + + cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k); + + TEST_CYCLE() + { + cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k); + } + } + + SANITY_CHECK(0); +} + +////////////////////////////////////////////////////////// +// OpticalFlowPyrLKSparse + +typedef std::pair string_pair; + +DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool); + +PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(CV_8U, CV_16U), + testing::Bool() + )) +{ + declare.time(60); + + const string_pair fileNames = std::tr1::get<0>(GetParam()); + const int depth = std::tr1::get<1>(GetParam()); + const bool graySource = std::tr1::get<2>(GetParam()); + + // PyrLK params + const cv::Size winSize(15, 15); + const int maxLevel = 5; + const cv::TermCriteria criteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 30, 0.01); + + // GoodFeaturesToTrack params + const int maxCorners = 5000; + const double qualityLevel = 0.05; + const int minDistance = 5; + const int blockSize = 3; + const bool useHarrisDetector = true; + const double k = 0.05; + + cv::Mat src1 = cv::imread(fileNames.first, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); + if (src1.empty()) + FAIL() << "Unable to load source image [" << fileNames.first << "]"; + + cv::Mat src2 = cv::imread(fileNames.second, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); + if (src2.empty()) + FAIL() << "Unable to load source image [" << fileNames.second << "]"; + + cv::Mat gray_src; + if (graySource) + gray_src = src1; + else + cv::cvtColor(src1, gray_src, cv::COLOR_BGR2GRAY); + + cv::Mat pts; + cv::goodFeaturesToTrack(gray_src, pts, maxCorners, qualityLevel, minDistance, cv::noArray(), blockSize, useHarrisDetector, k); + + if (depth != CV_8U) + { + src1.convertTo(src1, depth); + src2.convertTo(src2, depth); + } + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_src1(src1); + cv::gpu::GpuMat d_src2(src2); + cv::gpu::GpuMat d_pts(pts.reshape(2, 1)); + cv::gpu::GpuMat d_nextPts; + cv::gpu::GpuMat d_status; + + cv::gpu::PyrLKOpticalFlow d_pyrLK; + d_pyrLK.winSize = winSize; + d_pyrLK.maxLevel = maxLevel; + d_pyrLK.iters = criteria.maxCount; + d_pyrLK.useInitialFlow = false; + + d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status); + + TEST_CYCLE() + { + d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status); + } + } + else + { + if (depth != CV_8U) + FAIL() << "Unsupported depth"; + + cv::Mat nextPts; + cv::Mat status; + + cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria); + + TEST_CYCLE() + { + cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria); + } + } + + SANITY_CHECK(0); +} + +////////////////////////////////////////////////////////// +// OpticalFlowFarneback + +DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth); + +PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(CV_8U, CV_16U) + )) +{ + declare.time(500); + + const string_pair fileNames = std::tr1::get<0>(GetParam()); + const int depth = std::tr1::get<1>(GetParam()); + + const double pyrScale = 0.5; + const int numLevels = 6; + const int winSize = 7; + const int numIters = 15; + const int polyN = 7; + const double polySigma = 1.5; + const int flags = cv::OPTFLOW_USE_INITIAL_FLOW; + + cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); + if (src1.empty()) + FAIL() << "Unable to load source image [" << fileNames.first << "]"; + + cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); + if (src2.empty()) + FAIL() << "Unable to load source image [" << fileNames.second << "]"; + + if (depth != CV_8U) + { + src1.convertTo(src1, depth); + src2.convertTo(src2, depth); + } + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_src1(src1); + cv::gpu::GpuMat d_src2(src2); + cv::gpu::GpuMat d_u(src1.size(), CV_32FC1, cv::Scalar::all(0)); + cv::gpu::GpuMat d_v(src1.size(), CV_32FC1, cv::Scalar::all(0)); + + cv::gpu::FarnebackOpticalFlow d_farneback; + d_farneback.pyrScale = pyrScale; + d_farneback.numLevels = numLevels; + d_farneback.winSize = winSize; + d_farneback.numIters = numIters; + d_farneback.polyN = polyN; + d_farneback.polySigma = polySigma; + d_farneback.flags = flags; + + d_farneback(d_src1, d_src2, d_u, d_v); + + TEST_CYCLE_N(10) + { + d_farneback(d_src1, d_src2, d_u, d_v); + } + } + else + { + if (depth != CV_8U) + FAIL() << "Unsupported depth"; + + cv::Mat flow(src1.size(), CV_32FC2, cv::Scalar::all(0)); + + cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags); + + TEST_CYCLE_N(10) + { + cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags); + } + } + + SANITY_CHECK(0); +} + +////////////////////////////////////////////////////////// +// OpticalFlowBM + +void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr, + cv::Size bSize, cv::Size shiftSize, cv::Size maxRange, int usePrevious, + cv::Mat& velx, cv::Mat& vely) +{ + cv::Size sz((curr.cols - bSize.width + shiftSize.width)/shiftSize.width, (curr.rows - bSize.height + shiftSize.height)/shiftSize.height); + + velx.create(sz, CV_32FC1); + vely.create(sz, CV_32FC1); + + CvMat cvprev = prev; + CvMat cvcurr = curr; + + CvMat cvvelx = velx; + CvMat cvvely = vely; + + cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely); +} + +DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size); + +PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(cv::Size(16, 16)), + testing::Values(cv::Size(2, 2)), + testing::Values(cv::Size(16, 16)) + )) +{ + declare.time(3000); + + const string_pair fileNames = std::tr1::get<0>(GetParam()); + const cv::Size block_size = std::tr1::get<1>(GetParam()); + const cv::Size shift_size = std::tr1::get<2>(GetParam()); + const cv::Size max_range = std::tr1::get<3>(GetParam()); + + cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); + if (src1.empty()) + FAIL() << "Unable to load source image [" << fileNames.first << "]"; + + cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); + if (src2.empty()) + FAIL() << "Unable to load source image [" << fileNames.second << "]"; + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_src1(src1); + cv::gpu::GpuMat d_src2(src2); + cv::gpu::GpuMat d_velx, d_vely, buf; + + cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf); + + TEST_CYCLE_N(10) + { + cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf); + } + } + else + { + cv::Mat velx, vely; + + calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); + + TEST_CYCLE_N(10) + { + calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); + } + } + + SANITY_CHECK(0); +} + +PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(cv::Size(16, 16)), + testing::Values(cv::Size(1, 1)), + testing::Values(cv::Size(16, 16)) + )) +{ + declare.time(3000); + + const string_pair fileNames = std::tr1::get<0>(GetParam()); + const cv::Size block_size = std::tr1::get<1>(GetParam()); + const cv::Size shift_size = std::tr1::get<2>(GetParam()); + const cv::Size max_range = std::tr1::get<3>(GetParam()); + + cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); + if (src1.empty()) + FAIL() << "Unable to load source image [" << fileNames.first << "]"; + + cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); + if (src2.empty()) + FAIL() << "Unable to load source image [" << fileNames.second << "]"; + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_src1(src1); + cv::gpu::GpuMat d_src2(src2); + cv::gpu::GpuMat d_velx, d_vely; + + cv::gpu::FastOpticalFlowBM fastBM; + + fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width); + + TEST_CYCLE_N(10) + { + fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width); + } + } + else + { + cv::Mat velx, vely; + + calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); + + TEST_CYCLE_N(10) + { + calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); + } + } + + SANITY_CHECK(0); +} diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index 242febded9..7e0aaab680 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -59,6 +59,8 @@ void cv::gpu::magnitudeSqr(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { thr void cv::gpu::phase(const GpuMat&, const GpuMat&, GpuMat&, bool, Stream&) { throw_nogpu(); } void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_nogpu(); } void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_nogpu(); } +void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_nogpu(); } +void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -529,4 +531,47 @@ void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream)); } +//////////////////////////////////////////////////////////////////////// +// normalize + +void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask) +{ + GpuMat norm_buf; + GpuMat cvt_buf; + normalize(src, dst, a, b, norm_type, dtype, mask, norm_buf, cvt_buf); +} + +void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf) +{ + double scale = 1, shift = 0; + if (norm_type == NORM_MINMAX) + { + double smin = 0, smax = 0; + double dmin = std::min(a, b), dmax = std::max(a, b); + minMax(src, &smin, &smax, mask, norm_buf); + scale = (dmax - dmin) * (smax - smin > numeric_limits::epsilon() ? 1.0 / (smax - smin) : 0.0); + shift = dmin - smin * scale; + } + else if (norm_type == NORM_L2 || norm_type == NORM_L1 || norm_type == NORM_INF) + { + scale = norm(src, norm_type, mask, norm_buf); + scale = scale > numeric_limits::epsilon() ? a / scale : 0.0; + shift = 0; + } + else + { + CV_Error(CV_StsBadArg, "Unknown/unsupported norm type"); + } + + if (mask.empty()) + { + src.convertTo(dst, dtype, scale, shift); + } + else + { + src.convertTo(cvt_buf, dtype, scale, shift); + cvt_buf.copyTo(dst, mask); + } +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 4b52cc7dd3..27fb61ff70 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -1954,6 +1954,226 @@ namespace arithm template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); } +////////////////////////////////////////////////////////////////////////////////////// +// cmpScalar + +namespace arithm +{ +#define TYPE_VEC(type, cn) typename TypeVec::vec_type + + template struct CmpScalar; + template + struct CmpScalar : unary_function + { + const T val; + + __host__ explicit CmpScalar(T val_) : val(val_) {} + + __device__ __forceinline__ uchar operator()(T src) const + { + Cmp op; + return op(src, val); + } + }; + template + struct CmpScalar : unary_function + { + const TYPE_VEC(T, 2) val; + + __host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {} + + __device__ __forceinline__ TYPE_VEC(uchar, 2) operator()(const TYPE_VEC(T, 2) & src) const + { + Cmp op; + return VecTraits::make(op(src.x, val.x), op(src.y, val.y)); + } + }; + template + struct CmpScalar : unary_function + { + const TYPE_VEC(T, 3) val; + + __host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {} + + __device__ __forceinline__ TYPE_VEC(uchar, 3) operator()(const TYPE_VEC(T, 3) & src) const + { + Cmp op; + return VecTraits::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z)); + } + }; + template + struct CmpScalar : unary_function + { + const TYPE_VEC(T, 4) val; + + __host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {} + + __device__ __forceinline__ TYPE_VEC(uchar, 4) operator()(const TYPE_VEC(T, 4) & src) const + { + Cmp op; + return VecTraits::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w)); + } + }; + +#undef TYPE_VEC +} + +namespace cv { namespace gpu { namespace device +{ + template struct TransformFunctorTraits< arithm::CmpScalar > : arithm::ArithmFuncTraits + { + }; +}}} + +namespace arithm +{ + template