From e746b3e8ae5e982e2ae71b8167b4478bf722e82e Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Aug 2011 08:15:31 +0000 Subject: [PATCH] added buffered version of pyrDown and pyrUp added stream support to downsample, upsample, pyrUp and pyrDown --- modules/gpu/include/opencv2/gpu/gpu.hpp | 415 +++++++------ modules/gpu/src/cuda/imgproc.cu | 64 +- modules/gpu/src/imgproc_gpu.cpp | 90 ++- modules/gpu/test/test_features2d.cpp | 775 ------------------------ samples/gpu/performance/tests.cpp | 48 ++ 5 files changed, 377 insertions(+), 1015 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 393611e7fe..0d7b3f8cef 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -441,6 +441,191 @@ namespace cv explicit Stream(Impl* impl); }; + + + //////////////////////////////// Filter Engine //////////////////////////////// + + /*! + The Base Class for 1D or Row-wise Filters + + This is the base class for linear or non-linear filters that process 1D data. + In particular, such filters are used for the "horizontal" filtering parts in separable filters. + */ + class CV_EXPORTS BaseRowFilter_GPU + { + public: + BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {} + virtual ~BaseRowFilter_GPU() {} + virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; + int ksize, anchor; + }; + + /*! + The Base Class for Column-wise Filters + + This is the base class for linear or non-linear filters that process columns of 2D arrays. + Such filters are used for the "vertical" filtering parts in separable filters. + */ + class CV_EXPORTS BaseColumnFilter_GPU + { + public: + BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {} + virtual ~BaseColumnFilter_GPU() {} + virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; + int ksize, anchor; + }; + + /*! + The Base Class for Non-Separable 2D Filters. + + This is the base class for linear or non-linear 2D filters. + */ + class CV_EXPORTS BaseFilter_GPU + { + public: + BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {} + virtual ~BaseFilter_GPU() {} + virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; + Size ksize; + Point anchor; + }; + + /*! + The Base Class for Filter Engine. + + The class can be used to apply an arbitrary filtering operation to an image. + It contains all the necessary intermediate buffers. + */ + class CV_EXPORTS FilterEngine_GPU + { + public: + virtual ~FilterEngine_GPU() {} + + virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0; + }; + + //! returns the non-separable filter engine with the specified filter + CV_EXPORTS Ptr createFilter2D_GPU(const Ptr& filter2D, int srcType, int dstType); + + //! returns the separable filter engine with the specified filters + CV_EXPORTS Ptr createSeparableFilter_GPU(const Ptr& rowFilter, + const Ptr& columnFilter, int srcType, int bufType, int dstType); + + //! returns horizontal 1D box filter + //! supports only CV_8UC1 source type and CV_32FC1 sum type + CV_EXPORTS Ptr getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1); + + //! returns vertical 1D box filter + //! supports only CV_8UC1 sum type and CV_32FC1 dst type + CV_EXPORTS Ptr getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1); + + //! returns 2D box filter + //! supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type + CV_EXPORTS Ptr getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1)); + + //! returns box filter engine + CV_EXPORTS Ptr createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, + const Point& anchor = Point(-1,-1)); + + //! returns 2D morphological filter + //! only MORPH_ERODE and MORPH_DILATE are supported + //! supports CV_8UC1 and CV_8UC4 types + //! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height + CV_EXPORTS Ptr getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, + Point anchor=Point(-1,-1)); + + //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported. + CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, const Mat& kernel, + const Point& anchor = Point(-1,-1), int iterations = 1); + + //! returns 2D filter with the specified kernel + //! supports CV_8UC1 and CV_8UC4 types + CV_EXPORTS Ptr getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, + Point anchor = Point(-1, -1)); + + //! returns the non-separable linear filter engine + CV_EXPORTS Ptr createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, + const Point& anchor = Point(-1,-1)); + + //! returns the primitive row filter with the specified kernel. + //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type. + //! there are two version of algorithm: NPP and OpenCV. + //! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType, + //! otherwise calls OpenCV version. + //! NPP supports only BORDER_CONSTANT border type. + //! OpenCV version supports only CV_32F as buffer depth and + //! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types. + CV_EXPORTS Ptr getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, + int anchor = -1, int borderType = BORDER_CONSTANT); + + //! returns the primitive column filter with the specified kernel. + //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type. + //! there are two version of algorithm: NPP and OpenCV. + //! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType, + //! otherwise calls OpenCV version. + //! NPP supports only BORDER_CONSTANT border type. + //! OpenCV version supports only CV_32F as buffer depth and + //! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types. + CV_EXPORTS Ptr getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, + int anchor = -1, int borderType = BORDER_CONSTANT); + + //! returns the separable linear filter engine + CV_EXPORTS Ptr createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, + const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, + int columnBorderType = -1); + + //! returns filter engine for the generalized Sobel operator + CV_EXPORTS Ptr createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, + int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); + + //! returns the Gaussian filter engine + CV_EXPORTS Ptr createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, + int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); + + //! returns maximum filter + CV_EXPORTS Ptr getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)); + + //! returns minimum filter + CV_EXPORTS Ptr getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)); + + //! smooths the image using the normalized box filter + //! supports CV_8UC1, CV_8UC4 types + CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()); + + //! a synonym for normalized box filter + static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) { boxFilter(src, dst, -1, ksize, anchor, stream); } + + //! erodes the image (applies the local minimum operator) + CV_EXPORTS void erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); + + //! dilates the image (applies the local maximum operator) + CV_EXPORTS void dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); + + //! applies an advanced morphological operation to the image + CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); + + //! applies non-separable 2D linear filter to the image + CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null()); + + //! applies separable 2D linear filter to the image + CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, + Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); + + //! applies generalized Sobel operator to the image + CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, + int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); + + //! applies the vertical or horizontal Scharr operator to the image + CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1, + int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); + + //! smooths the image using Gaussian filter. + CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0, + int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); + + //! applies Laplacian operator to the image + //! supports only ksize = 1 and ksize = 3 + CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, Stream& stream = Stream::Null()); ////////////////////////////// Arithmetics /////////////////////////////////// @@ -739,16 +924,54 @@ namespace cv CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method); //! downsamples image - CV_EXPORTS void downsample(const GpuMat& src, GpuMat& dst); + CV_EXPORTS void downsample(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); //! upsamples image - CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst); + CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst, Stream& stream = Stream::Null()); //! smoothes the source image and downsamples it - CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst); + CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); + + struct CV_EXPORTS PyrDownBuf; + + CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, PyrDownBuf& buf, Stream& stream = Stream::Null()); + + struct CV_EXPORTS PyrDownBuf + { + PyrDownBuf() : image_type(-1) {} + PyrDownBuf(Size image_size, int image_type_) : image_type(-1) { create(image_size, image_type_); } + void create(Size image_size, int image_type_); + + private: + friend void pyrDown(const GpuMat&, GpuMat&, PyrDownBuf&, Stream& stream); + + static Mat ker; + GpuMat buf; + Ptr filter; + int image_type; + }; //! upsamples the source image and then smoothes it - CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst); + CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); + + struct CV_EXPORTS PyrUpBuf; + + CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, PyrUpBuf& buf, Stream& stream = Stream::Null()); + + struct CV_EXPORTS PyrUpBuf + { + PyrUpBuf() : image_type(-1) {} + PyrUpBuf(Size image_size, int image_type_) : image_type(-1) { create(image_size, image_type_); } + void create(Size image_size, int image_type_); + + private: + friend void pyrUp(const GpuMat&, GpuMat&, PyrUpBuf&, Stream& stream); + + static Mat ker; + GpuMat buf; + Ptr filter; + int image_type; + }; //! performs linear blending of two images //! to avoid accuracy errors sum of weigths shouldn't be very close to zero @@ -835,190 +1058,6 @@ namespace cv int num_iters=100, float max_dist=8.0, int min_inlier_count=100, vector* inliers=NULL); - //////////////////////////////// Filter Engine //////////////////////////////// - - /*! - The Base Class for 1D or Row-wise Filters - - This is the base class for linear or non-linear filters that process 1D data. - In particular, such filters are used for the "horizontal" filtering parts in separable filters. - */ - class CV_EXPORTS BaseRowFilter_GPU - { - public: - BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseRowFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - int ksize, anchor; - }; - - /*! - The Base Class for Column-wise Filters - - This is the base class for linear or non-linear filters that process columns of 2D arrays. - Such filters are used for the "vertical" filtering parts in separable filters. - */ - class CV_EXPORTS BaseColumnFilter_GPU - { - public: - BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseColumnFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - int ksize, anchor; - }; - - /*! - The Base Class for Non-Separable 2D Filters. - - This is the base class for linear or non-linear 2D filters. - */ - class CV_EXPORTS BaseFilter_GPU - { - public: - BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - Size ksize; - Point anchor; - }; - - /*! - The Base Class for Filter Engine. - - The class can be used to apply an arbitrary filtering operation to an image. - It contains all the necessary intermediate buffers. - */ - class CV_EXPORTS FilterEngine_GPU - { - public: - virtual ~FilterEngine_GPU() {} - - virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0; - }; - - //! returns the non-separable filter engine with the specified filter - CV_EXPORTS Ptr createFilter2D_GPU(const Ptr& filter2D, int srcType, int dstType); - - //! returns the separable filter engine with the specified filters - CV_EXPORTS Ptr createSeparableFilter_GPU(const Ptr& rowFilter, - const Ptr& columnFilter, int srcType, int bufType, int dstType); - - //! returns horizontal 1D box filter - //! supports only CV_8UC1 source type and CV_32FC1 sum type - CV_EXPORTS Ptr getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1); - - //! returns vertical 1D box filter - //! supports only CV_8UC1 sum type and CV_32FC1 dst type - CV_EXPORTS Ptr getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1); - - //! returns 2D box filter - //! supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type - CV_EXPORTS Ptr getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1)); - - //! returns box filter engine - CV_EXPORTS Ptr createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, - const Point& anchor = Point(-1,-1)); - - //! returns 2D morphological filter - //! only MORPH_ERODE and MORPH_DILATE are supported - //! supports CV_8UC1 and CV_8UC4 types - //! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height - CV_EXPORTS Ptr getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, - Point anchor=Point(-1,-1)); - - //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported. - CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, const Mat& kernel, - const Point& anchor = Point(-1,-1), int iterations = 1); - - //! returns 2D filter with the specified kernel - //! supports CV_8UC1 and CV_8UC4 types - CV_EXPORTS Ptr getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, - Point anchor = Point(-1, -1)); - - //! returns the non-separable linear filter engine - CV_EXPORTS Ptr createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, - const Point& anchor = Point(-1,-1)); - - //! returns the primitive row filter with the specified kernel. - //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type. - //! there are two version of algorithm: NPP and OpenCV. - //! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType, - //! otherwise calls OpenCV version. - //! NPP supports only BORDER_CONSTANT border type. - //! OpenCV version supports only CV_32F as buffer depth and - //! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types. - CV_EXPORTS Ptr getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, - int anchor = -1, int borderType = BORDER_CONSTANT); - - //! returns the primitive column filter with the specified kernel. - //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type. - //! there are two version of algorithm: NPP and OpenCV. - //! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType, - //! otherwise calls OpenCV version. - //! NPP supports only BORDER_CONSTANT border type. - //! OpenCV version supports only CV_32F as buffer depth and - //! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types. - CV_EXPORTS Ptr getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, - int anchor = -1, int borderType = BORDER_CONSTANT); - - //! returns the separable linear filter engine - CV_EXPORTS Ptr createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, - const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, - int columnBorderType = -1); - - //! returns filter engine for the generalized Sobel operator - CV_EXPORTS Ptr createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); - - //! returns the Gaussian filter engine - CV_EXPORTS Ptr createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); - - //! returns maximum filter - CV_EXPORTS Ptr getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)); - - //! returns minimum filter - CV_EXPORTS Ptr getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)); - - //! smooths the image using the normalized box filter - //! supports CV_8UC1, CV_8UC4 types - CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()); - - //! a synonym for normalized box filter - static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) { boxFilter(src, dst, -1, ksize, anchor, stream); } - - //! erodes the image (applies the local minimum operator) - CV_EXPORTS void erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); - - //! dilates the image (applies the local maximum operator) - CV_EXPORTS void dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); - - //! applies an advanced morphological operation to the image - CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); - - //! applies non-separable 2D linear filter to the image - CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null()); - - //! applies separable 2D linear filter to the image - CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, - Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); - - //! applies generalized Sobel operator to the image - CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); - - //! applies the vertical or horizontal Scharr operator to the image - CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); - - //! smooths the image using Gaussian filter. - CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); - - //! applies Laplacian operator to the image - //! supports only ksize = 1 and ksize = 3 - CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, Stream& stream = Stream::Null()); - //////////////////////////////// Image Labeling //////////////////////////////// //!performs labeling via graph cuts diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index bb86cc8efb..def579702c 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -908,29 +908,31 @@ namespace cv { namespace gpu { namespace imgproc template - void downsampleCaller(const DevMem2D src, DevMem2D dst) + void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream) { dim3 threads(32, 8); dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y)); - downsampleKernel<<>>(DevMem2D_(src), DevMem2D_(dst)); + downsampleKernel<<>>(DevMem2D_(src), DevMem2D_(dst)); cudaSafeCall(cudaGetLastError()); - cudaSafeCall(cudaDeviceSynchronize()); + + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); } - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); - template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); ////////////////////////////////////////////////////////////////////////// @@ -952,29 +954,31 @@ namespace cv { namespace gpu { namespace imgproc template - void upsampleCaller(const DevMem2D src, DevMem2D dst) + void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream) { dim3 threads(32, 8); dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y)); - upsampleKernel<<>>(DevMem2D_(src), DevMem2D_(dst)); + upsampleKernel<<>>(DevMem2D_(src), DevMem2D_(dst)); cudaSafeCall(cudaGetLastError()); - cudaSafeCall(cudaDeviceSynchronize()); + + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); } - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); - template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); + template void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); ////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 52e23d60b1..69ac7c9079 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -79,10 +79,14 @@ void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); } void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); } -void cv::gpu::downsample(const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::upsample(const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::pyrDown(const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::pyrUp(const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::PyrDownBuf::create(Size, int) { throw_nogpu(); } +void cv::gpu::pyrDown(const GpuMat&, GpuMat&, PyrDownBuf&, Stream&) { throw_nogpu(); } +void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::PyrUpBuf::create(Size, int) { throw_nogpu(); } +void cv::gpu::pyrUp(const GpuMat&, GpuMat&, PyrUpBuf&, Stream&) { throw_nogpu(); } @@ -1413,15 +1417,15 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, namespace cv { namespace gpu { namespace imgproc { template - void downsampleCaller(const DevMem2D src, DevMem2D dst); + void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); }}} -void cv::gpu::downsample(const GpuMat& src, GpuMat& dst) +void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, Stream& stream) { CV_Assert(src.depth() < CV_64F && src.channels() <= 4); - typedef void (*Caller)(const DevMem2D, DevMem2D); + typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream); static const Caller callers[6][4] = {{imgproc::downsampleCaller, imgproc::downsampleCaller, imgproc::downsampleCaller, imgproc::downsampleCaller}, @@ -1437,7 +1441,7 @@ void cv::gpu::downsample(const GpuMat& src, GpuMat& dst) CV_Error(CV_StsUnsupportedFormat, "bad number of channels"); dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); - caller(src, dst.reshape(1)); + caller(src, dst.reshape(1), StreamAccessor::getStream(stream)); } @@ -1447,15 +1451,15 @@ void cv::gpu::downsample(const GpuMat& src, GpuMat& dst) namespace cv { namespace gpu { namespace imgproc { template - void upsampleCaller(const DevMem2D src, DevMem2D dst); + void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream); }}} -void cv::gpu::upsample(const GpuMat& src, GpuMat& dst) +void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream) { CV_Assert(src.depth() < CV_64F && src.channels() <= 4); - typedef void (*Caller)(const DevMem2D, DevMem2D); + typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream); static const Caller callers[6][5] = {{imgproc::upsampleCaller, imgproc::upsampleCaller, imgproc::upsampleCaller, imgproc::upsampleCaller}, @@ -1471,31 +1475,73 @@ void cv::gpu::upsample(const GpuMat& src, GpuMat& dst) CV_Error(CV_StsUnsupportedFormat, "bad number of channels"); dst.create(src.rows*2, src.cols*2, src.type()); - caller(src, dst.reshape(1)); + caller(src, dst.reshape(1), StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// // pyrDown -void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst) +void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream) { - Mat ker = getGaussianKernel(5, 0, std::max(CV_32F, src.depth())); - GpuMat buf; - sepFilter2D(src, buf, src.depth(), ker, ker); - downsample(buf, dst); + PyrDownBuf buf; + pyrDown(src, dst, buf, stream); +} + +cv::Mat cv::gpu::PyrDownBuf::ker; + +void cv::gpu::PyrDownBuf::create(Size image_size, int image_type_) +{ + if (ker.empty() || image_type_ != image_type) + ker = getGaussianKernel(5, 0, std::max(CV_32F, CV_MAT_DEPTH(image_type_))); + + ensureSizeIsEnough(image_size.height, image_size.width, image_type_, buf); + + if (filter.empty() || image_type_ != image_type) + { + image_type = image_type_; + filter = createSeparableLinearFilter_GPU(image_type, image_type, ker, ker); + } +} + +void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, PyrDownBuf& buf, Stream& stream) +{ + buf.create(src.size(), src.type()); + buf.filter->apply(src, buf.buf, Rect(0, 0, src.cols, src.rows), stream); + downsample(buf.buf, dst, stream); } ////////////////////////////////////////////////////////////////////////////// // pyrUp -void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst) +void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream) { - GpuMat buf; - upsample(src, buf); - Mat ker = getGaussianKernel(5, 0, std::max(CV_32F, src.depth())) * 2; - sepFilter2D(buf, dst, buf.depth(), ker, ker); + PyrUpBuf buf; + pyrUp(src, dst, buf, stream); +} + +cv::Mat cv::gpu::PyrUpBuf::ker; + +void cv::gpu::PyrUpBuf::create(Size image_size, int image_type_) +{ + if (ker.empty() || image_type_ != image_type) + ker = getGaussianKernel(5, 0, std::max(CV_32F, CV_MAT_DEPTH(image_type_))) * 2; + + ensureSizeIsEnough(image_size.height * 2, image_size.width * 2, image_type_, buf); + + if (filter.empty() || image_type_ != image_type) + { + image_type = image_type_; + filter = createSeparableLinearFilter_GPU(image_type, image_type, ker, ker); + } +} + +void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, PyrUpBuf& buf, Stream& stream) +{ + buf.create(src.size(), src.type()); + upsample(src, buf.buf, stream); + buf.filter->apply(buf.buf, dst, Rect(0, 0, buf.buf.cols, buf.buf.rows), stream); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpu/test/test_features2d.cpp index 083a35bcfa..0479cc2fa4 100644 --- a/modules/gpu/test/test_features2d.cpp +++ b/modules/gpu/test/test_features2d.cpp @@ -560,778 +560,3 @@ INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher, testing::Combine( testing::Values(57, 64, 83, 128, 179, 256, 304))); #endif // HAVE_CUDA - - - - - - - - - - - - - - - - - -//struct CV_GpuBFMTest : CV_GpuTestBase -//{ -// void run_gpu_test(); -// -// void generateData(GpuMat& query, GpuMat& train, int dim, int depth); -// -// virtual void test(const GpuMat& query, const GpuMat& train, BruteForceMatcher_GPU_base& matcher) = 0; -// -// static const int queryDescCount = 300; // must be even number because we split train data in some cases in two -// static const int countFactor = 4; // do not change it -//}; -// -//void CV_GpuBFMTest::run_gpu_test() -//{ -// BruteForceMatcher_GPU_base::DistType dists[] = {BruteForceMatcher_GPU_base::L1Dist, BruteForceMatcher_GPU_base::L2Dist, BruteForceMatcher_GPU_base::HammingDist}; -// const char* dists_str[] = {"L1Dist", "L2Dist", "HammingDist"}; -// int dists_count = sizeof(dists) / sizeof(dists[0]); -// -// RNG rng = ts->get_rng(); -// -// int dims[] = {rng.uniform(30, 60), 64, rng.uniform(70, 110), 128, rng.uniform(130, 250), 256, rng.uniform(260, 350)}; -// int dims_count = sizeof(dims) / sizeof(dims[0]); -// -// for (int dist = 0; dist < dists_count; ++dist) -// { -// int depth_end = dists[dist] == BruteForceMatcher_GPU_base::HammingDist ? CV_32S : CV_32F; -// -// for (int depth = CV_8U; depth <= depth_end; ++depth) -// { -// for (int dim = 0; dim < dims_count; ++dim) -// { -// PRINT_ARGS("dist=%s depth=%s dim=%d", dists_str[dist], getTypeName(depth), dims[dim]); -// -// BruteForceMatcher_GPU_base matcher(dists[dist]); -// -// GpuMat query, train; -// generateData(query, train, dim, depth); -// -// test(query, train, matcher); -// } -// } -// } -//} -// -//void CV_GpuBFMTest::generateData(GpuMat& queryGPU, GpuMat& trainGPU, int dim, int depth) -//{ -// RNG& rng = ts->get_rng(); -// -// Mat queryBuf, trainBuf; -// -// // Generate query descriptors randomly. -// // Descriptor vector elements are integer values. -// queryBuf.create(queryDescCount, dim, CV_32SC1); -// rng.fill(queryBuf, RNG::UNIFORM, Scalar::all(0), Scalar(3)); -// queryBuf.convertTo(queryBuf, CV_32FC1); -// -// // Generate train decriptors as follows: -// // copy each query descriptor to train set countFactor times -// // and perturb some one element of the copied descriptors in -// // in ascending order. General boundaries of the perturbation -// // are (0.f, 1.f). -// trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1); -// float step = 1.f / countFactor; -// for (int qIdx = 0; qIdx < queryDescCount; qIdx++) -// { -// Mat queryDescriptor = queryBuf.row(qIdx); -// for (int c = 0; c < countFactor; c++) -// { -// int tIdx = qIdx * countFactor + c; -// Mat trainDescriptor = trainBuf.row(tIdx); -// queryDescriptor.copyTo(trainDescriptor); -// int elem = rng(dim); -// float diff = rng.uniform(step * c, step * (c + 1)); -// trainDescriptor.at(0, elem) += diff; -// } -// } -// -// Mat query, train; -// queryBuf.convertTo(query, depth); -// trainBuf.convertTo(train, depth); -// -// queryGPU.upload(query); -// trainGPU.upload(train); -//} -// -//#define GPU_BFM_TEST(test_name) -// struct CV_GpuBFM_ ##test_name ## _Test : CV_GpuBFMTest -// { -// void test(const GpuMat& query, const GpuMat& train, BruteForceMatcher_GPU_base& matcher); -// }; -// TEST(BruteForceMatcher, test_name) { CV_GpuBFM_ ##test_name ## _Test test; test.safe_run(); } -// void CV_GpuBFM_ ##test_name ## _Test::test(const GpuMat& query, const GpuMat& train, BruteForceMatcher_GPU_base& matcher) -// -///////////////////////////////////////////////////////////////////////////////////////////////////////// -//// match -// -//GPU_BFM_TEST(match) -//{ -// vector matches; -// -// matcher.match(query, train, matches); -// -// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -// -// int badCount = 0; -// for (size_t i = 0; i < matches.size(); i++) -// { -// DMatch match = matches[i]; -// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0)) -// badCount++; -// } -// -// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//} -// -//GPU_BFM_TEST(match_add) -//{ -// vector matches; -// -// // make add() twice to test such case -// matcher.add(vector(1, train.rowRange(0, train.rows/2))); -// matcher.add(vector(1, train.rowRange(train.rows/2, train.rows))); -// -// // prepare masks (make first nearest match illegal) -// vector masks(2); -// for (int mi = 0; mi < 2; mi++) -// { -// masks[mi] = GpuMat(query.rows, train.rows/2, CV_8UC1, Scalar::all(1)); -// for (int di = 0; di < queryDescCount/2; di++) -// masks[mi].col(di * countFactor).setTo(Scalar::all(0)); -// } -// -// matcher.match(query, matches, masks); -// -// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -// -// int badCount = 0; -// for (size_t i = 0; i < matches.size(); i++) -// { -// DMatch match = matches[i]; -// int shift = matcher.isMaskSupported() ? 1 : 0; -// { -// if (i < queryDescCount / 2) -// { -// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + shift) || (match.imgIdx != 0)) -// badCount++; -// } -// else -// { -// if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + shift) || (match.imgIdx != 1)) -// badCount++; -// } -// } -// } -// -// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//} -// -///////////////////////////////////////////////////////////////////////////////////////////////////////// -//// knnMatch -// -//GPU_BFM_TEST(knnMatch) -//{ -// const int knn = 3; -// -// vector< vector > matches; -// -// matcher.knnMatch(query, train, matches, knn); -// -// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -// -// int badCount = 0; -// for (size_t i = 0; i < matches.size(); i++) -// { -// if ((int)matches[i].size() != knn) -// badCount++; -// else -// { -// int localBadCount = 0; -// for (int k = 0; k < knn; k++) -// { -// DMatch match = matches[i][k]; -// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0)) -// localBadCount++; -// } -// badCount += localBadCount > 0 ? 1 : 0; -// } -// } -// -// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//} -// -//GPU_BFM_TEST(knnMatch_add) -//{ -// const int knn = 2; -// vector > matches; -// -// // make add() twice to test such case -// matcher.add(vector(1,train.rowRange(0, train.rows / 2))); -// matcher.add(vector(1,train.rowRange(train.rows / 2, train.rows))); -// -// // prepare masks (make first nearest match illegal) -// vector masks(2); -// for (int mi = 0; mi < 2; mi++ ) -// { -// masks[mi] = GpuMat(query.rows, train.rows / 2, CV_8UC1, Scalar::all(1)); -// for (int di = 0; di < queryDescCount / 2; di++) -// masks[mi].col(di * countFactor).setTo(Scalar::all(0)); -// } -// -// matcher.knnMatch(query, matches, knn, masks); -// -// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -// -// int badCount = 0; -// int shift = matcher.isMaskSupported() ? 1 : 0; -// for (size_t i = 0; i < matches.size(); i++) -// { -// if ((int)matches[i].size() != knn) -// badCount++; -// else -// { -// int localBadCount = 0; -// for (int k = 0; k < knn; k++) -// { -// DMatch match = matches[i][k]; -// { -// if (i < queryDescCount / 2) -// { -// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) ) -// localBadCount++; -// } -// else -// { -// if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) ) -// localBadCount++; -// } -// } -// } -// badCount += localBadCount > 0 ? 1 : 0; -// } -// } -// -// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//} -// -///////////////////////////////////////////////////////////////////////////////////////////////////////// -//// radiusMatch -// -//GPU_BFM_TEST(radiusMatch) -//{ -// CHECK_RETURN(support(GLOBAL_ATOMICS), TS::SKIPPED); -// -// const float radius = 1.f / countFactor; -// -// vector< vector > matches; -// -// matcher.radiusMatch(query, train, matches, radius); -// -// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -// -// int badCount = 0; -// for (size_t i = 0; i < matches.size(); i++) -// { -// if ((int)matches[i].size() != 1) -// badCount++; -// else -// { -// DMatch match = matches[i][0]; -// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0)) -// badCount++; -// } -// } -// -// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//} -// -//GPU_BFM_TEST(radiusMatch_add) -//{ -// CHECK_RETURN(support(GLOBAL_ATOMICS), TS::SKIPPED); -// -// int n = 3; -// const float radius = 1.f / countFactor * n; -// vector< vector > matches; -// -// // make add() twice to test such case -// matcher.add(vector(1,train.rowRange(0, train.rows / 2))); -// matcher.add(vector(1,train.rowRange(train.rows / 2, train.rows))); -// -// // prepare masks (make first nearest match illegal) -// vector masks(2); -// for (int mi = 0; mi < 2; mi++) -// { -// masks[mi] = GpuMat(query.rows, train.rows / 2, CV_8UC1, Scalar::all(1)); -// for (int di = 0; di < queryDescCount / 2; di++) -// masks[mi].col(di * countFactor).setTo(Scalar::all(0)); -// } -// -// matcher.radiusMatch(query, matches, radius, masks); -// -// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -// -// int badCount = 0; -// int shift = matcher.isMaskSupported() ? 1 : 0; -// int needMatchCount = matcher.isMaskSupported() ? n-1 : n; -// for (size_t i = 0; i < matches.size(); i++) -// { -// if ((int)matches[i].size() != needMatchCount) -// badCount++; -// else -// { -// int localBadCount = 0; -// for (int k = 0; k < needMatchCount; k++) -// { -// DMatch match = matches[i][k]; -// { -// if (i < queryDescCount / 2) -// { -// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) ) -// localBadCount++; -// } -// else -// { -// if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) ) -// localBadCount++; -// } -// } -// } -// badCount += localBadCount > 0 ? 1 : 0; -// } -// } -// -// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//} -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -////struct CV_GpuBruteForceMatcherTest : CV_GpuTestBase -////{ -//// void run_gpu_test(); -//// -//// void emptyDataTest(); -//// void dataTest(int dim); -//// -//// void generateData(GpuMat& query, GpuMat& train, int dim); -//// -//// void matchTest(const GpuMat& query, const GpuMat& train); -//// void knnMatchTest(const GpuMat& query, const GpuMat& train); -//// void radiusMatchTest(const GpuMat& query, const GpuMat& train); -//// -//// BruteForceMatcher_GPU< L2 > dmatcher; -//// -//// static const int queryDescCount = 300; // must be even number because we split train data in some cases in two -//// static const int countFactor = 4; // do not change it -////}; -//// -////void CV_GpuBruteForceMatcherTest::emptyDataTest() -////{ -//// GpuMat queryDescriptors, trainDescriptors, mask; -//// vector trainDescriptorCollection, masks; -//// vector matches; -//// vector< vector > vmatches; -//// -//// try -//// { -//// dmatcher.match(queryDescriptors, trainDescriptors, matches, mask); -//// } -//// catch(...) -//// { -//// PRINTLN("match() on empty descriptors must not generate exception (1)"); -//// ts->set_failed_test_info(TS::FAIL_EXCEPTION); -//// } -//// -//// try -//// { -//// dmatcher.knnMatch(queryDescriptors, trainDescriptors, vmatches, 2, mask); -//// } -//// catch(...) -//// { -//// PRINTLN("knnMatch() on empty descriptors must not generate exception (1)"); -//// ts->set_failed_test_info(TS::FAIL_EXCEPTION); -//// } -//// -//// try -//// { -//// dmatcher.radiusMatch(queryDescriptors, trainDescriptors, vmatches, 10.f, mask); -//// } -//// catch(...) -//// { -//// PRINTLN("radiusMatch() on empty descriptors must not generate exception (1)"); -//// ts->set_failed_test_info(TS::FAIL_EXCEPTION); -//// } -//// -//// try -//// { -//// dmatcher.add(trainDescriptorCollection); -//// } -//// catch(...) -//// { -//// PRINTLN("add() on empty descriptors must not generate exception"); -//// ts->set_failed_test_info(TS::FAIL_EXCEPTION); -//// } -//// -//// try -//// { -//// dmatcher.match(queryDescriptors, matches, masks); -//// } -//// catch(...) -//// { -//// PRINTLN("match() on empty descriptors must not generate exception (2)"); -//// ts->set_failed_test_info(TS::FAIL_EXCEPTION); -//// } -//// -//// try -//// { -//// dmatcher.knnMatch(queryDescriptors, vmatches, 2, masks); -//// } -//// catch(...) -//// { -//// PRINTLN("knnMatch() on empty descriptors must not generate exception (2)"); -//// ts->set_failed_test_info(TS::FAIL_EXCEPTION); -//// } -//// -//// try -//// { -//// dmatcher.radiusMatch( queryDescriptors, vmatches, 10.f, masks ); -//// } -//// catch(...) -//// { -//// PRINTLN("radiusMatch() on empty descriptors must not generate exception (2)"); -//// ts->set_failed_test_info(TS::FAIL_EXCEPTION); -//// } -//// -////} -//// -////void CV_GpuBruteForceMatcherTest::generateData(GpuMat& queryGPU, GpuMat& trainGPU, int dim) -////{ -//// Mat query, train; -//// RNG& rng = ts->get_rng(); -//// -//// // Generate query descriptors randomly. -//// // Descriptor vector elements are integer values. -//// Mat buf(queryDescCount, dim, CV_32SC1); -//// rng.fill(buf, RNG::UNIFORM, Scalar::all(0), Scalar(3)); -//// buf.convertTo(query, CV_32FC1); -//// -//// // Generate train decriptors as follows: -//// // copy each query descriptor to train set countFactor times -//// // and perturb some one element of the copied descriptors in -//// // in ascending order. General boundaries of the perturbation -//// // are (0.f, 1.f). -//// train.create( query.rows*countFactor, query.cols, CV_32FC1 ); -//// float step = 1.f / countFactor; -//// for (int qIdx = 0; qIdx < query.rows; qIdx++) -//// { -//// Mat queryDescriptor = query.row(qIdx); -//// for (int c = 0; c < countFactor; c++) -//// { -//// int tIdx = qIdx * countFactor + c; -//// Mat trainDescriptor = train.row(tIdx); -//// queryDescriptor.copyTo(trainDescriptor); -//// int elem = rng(dim); -//// float diff = rng.uniform(step * c, step * (c + 1)); -//// trainDescriptor.at(0, elem) += diff; -//// } -//// } -//// -//// queryGPU.upload(query); -//// trainGPU.upload(train); -////} -//// -////void CV_GpuBruteForceMatcherTest::matchTest(const GpuMat& query, const GpuMat& train) -////{ -//// dmatcher.clear(); -//// -//// // test const version of match() -//// { -//// vector matches; -//// dmatcher.match(query, train, matches); -//// -//// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -//// -//// int badCount = 0; -//// for (size_t i = 0; i < matches.size(); i++) -//// { -//// DMatch match = matches[i]; -//// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0)) -//// badCount++; -//// } -//// -//// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//// } -//// -//// // test version of match() with add() -//// { -//// vector matches; -//// -//// // make add() twice to test such case -//// dmatcher.add(vector(1, train.rowRange(0, train.rows/2))); -//// dmatcher.add(vector(1, train.rowRange(train.rows/2, train.rows))); -//// -//// // prepare masks (make first nearest match illegal) -//// vector masks(2); -//// for (int mi = 0; mi < 2; mi++) -//// { -//// masks[mi] = GpuMat(query.rows, train.rows/2, CV_8UC1, Scalar::all(1)); -//// for (int di = 0; di < queryDescCount/2; di++) -//// masks[mi].col(di * countFactor).setTo(Scalar::all(0)); -//// } -//// -//// dmatcher.match(query, matches, masks); -//// -//// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -//// -//// int badCount = 0; -//// for (size_t i = 0; i < matches.size(); i++) -//// { -//// DMatch match = matches[i]; -//// int shift = dmatcher.isMaskSupported() ? 1 : 0; -//// { -//// if (i < queryDescCount / 2) -//// { -//// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + shift) || (match.imgIdx != 0)) -//// badCount++; -//// } -//// else -//// { -//// if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + shift) || (match.imgIdx != 1)) -//// badCount++; -//// } -//// } -//// } -//// -//// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//// } -////} -//// -////void CV_GpuBruteForceMatcherTest::knnMatchTest(const GpuMat& query, const GpuMat& train) -////{ -//// dmatcher.clear(); -//// -//// // test const version of knnMatch() -//// { -//// const int knn = 3; -//// -//// vector< vector > matches; -//// dmatcher.knnMatch(query, train, matches, knn); -//// -//// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -//// -//// int badCount = 0; -//// for (size_t i = 0; i < matches.size(); i++) -//// { -//// if ((int)matches[i].size() != knn) -//// badCount++; -//// else -//// { -//// int localBadCount = 0; -//// for (int k = 0; k < knn; k++) -//// { -//// DMatch match = matches[i][k]; -//// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0)) -//// localBadCount++; -//// } -//// badCount += localBadCount > 0 ? 1 : 0; -//// } -//// } -//// -//// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//// } -//// -//// // test version of knnMatch() with add() -//// { -//// const int knn = 2; -//// vector > matches; -//// -//// // make add() twice to test such case -//// dmatcher.add(vector(1,train.rowRange(0, train.rows / 2))); -//// dmatcher.add(vector(1,train.rowRange(train.rows / 2, train.rows))); -//// -//// // prepare masks (make first nearest match illegal) -//// vector masks(2); -//// for (int mi = 0; mi < 2; mi++ ) -//// { -//// masks[mi] = GpuMat(query.rows, train.rows / 2, CV_8UC1, Scalar::all(1)); -//// for (int di = 0; di < queryDescCount / 2; di++) -//// masks[mi].col(di * countFactor).setTo(Scalar::all(0)); -//// } -//// -//// dmatcher.knnMatch(query, matches, knn, masks); -//// -//// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -//// -//// int badCount = 0; -//// int shift = dmatcher.isMaskSupported() ? 1 : 0; -//// for (size_t i = 0; i < matches.size(); i++) -//// { -//// if ((int)matches[i].size() != knn) -//// badCount++; -//// else -//// { -//// int localBadCount = 0; -//// for (int k = 0; k < knn; k++) -//// { -//// DMatch match = matches[i][k]; -//// { -//// if (i < queryDescCount / 2) -//// { -//// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) ) -//// localBadCount++; -//// } -//// else -//// { -//// if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) ) -//// localBadCount++; -//// } -//// } -//// } -//// badCount += localBadCount > 0 ? 1 : 0; -//// } -//// } -//// -//// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//// } -////} -//// -////void CV_GpuBruteForceMatcherTest::radiusMatchTest(const GpuMat& query, const GpuMat& train) -////{ -//// CHECK_RETURN(support(GLOBAL_ATOMICS), TS::SKIPPED); -//// -//// dmatcher.clear(); -//// -//// // test const version of match() -//// { -//// const float radius = 1.f / countFactor; -//// -//// vector< vector > matches; -//// dmatcher.radiusMatch(query, train, matches, radius); -//// -//// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -//// -//// int badCount = 0; -//// for (size_t i = 0; i < matches.size(); i++) -//// { -//// if ((int)matches[i].size() != 1) -//// badCount++; -//// else -//// { -//// DMatch match = matches[i][0]; -//// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0)) -//// badCount++; -//// } -//// } -//// -//// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//// } -//// -//// // test version of match() with add() -//// { -//// int n = 3; -//// const float radius = 1.f / countFactor * n; -//// vector< vector > matches; -//// -//// // make add() twice to test such case -//// dmatcher.add(vector(1,train.rowRange(0, train.rows / 2))); -//// dmatcher.add(vector(1,train.rowRange(train.rows / 2, train.rows))); -//// -//// // prepare masks (make first nearest match illegal) -//// vector masks(2); -//// for (int mi = 0; mi < 2; mi++) -//// { -//// masks[mi] = GpuMat(query.rows, train.rows / 2, CV_8UC1, Scalar::all(1)); -//// for (int di = 0; di < queryDescCount / 2; di++) -//// masks[mi].col(di * countFactor).setTo(Scalar::all(0)); -//// } -//// -//// dmatcher.radiusMatch(query, matches, radius, masks); -//// -//// CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT); -//// -//// int badCount = 0; -//// int shift = dmatcher.isMaskSupported() ? 1 : 0; -//// int needMatchCount = dmatcher.isMaskSupported() ? n-1 : n; -//// for (size_t i = 0; i < matches.size(); i++) -//// { -//// if ((int)matches[i].size() != needMatchCount) -//// badCount++; -//// else -//// { -//// int localBadCount = 0; -//// for (int k = 0; k < needMatchCount; k++) -//// { -//// DMatch match = matches[i][k]; -//// { -//// if (i < queryDescCount / 2) -//// { -//// if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) ) -//// localBadCount++; -//// } -//// else -//// { -//// if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) ) -//// localBadCount++; -//// } -//// } -//// } -//// badCount += localBadCount > 0 ? 1 : 0; -//// } -//// } -//// -//// CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT); -//// } -////} -//// -////void CV_GpuBruteForceMatcherTest::dataTest(int dim) -////{ -//// GpuMat query, train; -//// generateData(query, train, dim); -//// -//// matchTest(query, train); -//// knnMatchTest(query, train); -//// radiusMatchTest(query, train); -//// -//// dmatcher.clear(); -////} -//// -////void CV_GpuBruteForceMatcherTest::run_gpu_test() -////{ -//// emptyDataTest(); -//// -//// dataTest(50); -//// dataTest(64); -//// dataTest(100); -//// dataTest(128); -//// dataTest(200); -//// dataTest(256); -//// dataTest(300); -////} -//// -////TEST(BruteForceMatcher, accuracy) { CV_GpuBruteForceMatcherTest test; test.safe_run(); } diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 48def0cffe..630708b4ee 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -866,3 +866,51 @@ TEST(GaussianBlur) GPU_OFF; } } + +TEST(pyrDown) +{ + gpu::PyrDownBuf buf; + + for (int size = 4000; size >= 1000; size -= 1000) + { + SUBTEST << "size " << size; + + Mat src; gen(src, 1000, 1000, CV_16SC3, 0, 256); + Mat dst(Size(src.cols / 2, src.rows / 2), src.type()); + + CPU_ON; + pyrDown(src, dst); + CPU_OFF; + + gpu::GpuMat d_src(src); + gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type()); + + GPU_ON; + gpu::pyrDown(d_src, d_dst, buf); + GPU_OFF; + } +} + +TEST(pyrUp) +{ + gpu::PyrUpBuf buf; + + for (int size = 4000; size >= 1000; size -= 1000) + { + SUBTEST << "size " << size; + + Mat src; gen(src, 1000, 1000, CV_16SC3, 0, 256); + Mat dst(Size(src.cols * 2, src.rows * 2), src.type()); + + CPU_ON; + pyrUp(src, dst); + CPU_OFF; + + gpu::GpuMat d_src(src); + gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type()); + + GPU_ON; + gpu::pyrUp(d_src, d_dst, buf); + GPU_OFF; + } +}