From 49fa536c6259761e4e259fddecb55646f276e669 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 4 Oct 2010 11:42:40 +0000 Subject: [PATCH] added Sobel, GaussianBlur, Canny to gpu module. minor fix of matrix_operations.cpp. --- modules/gpu/include/opencv2/gpu/gpu.hpp | 9 + modules/gpu/src/filtering_npp.cpp | 184 ++++++++++ modules/gpu/src/imgproc_gpu.cpp | 30 ++ modules/gpu/src/matrix_operations.cpp | 429 ++++++++++++++---------- tests/gpu/src/gputest_main.cpp | 3 + tests/gpu/src/imgproc_gpu.cpp | 112 +++++++ 6 files changed, 589 insertions(+), 178 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 61a202c6a1..d8dc5bf6ff 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -533,9 +533,18 @@ namespace cv //! applies an advanced morphological operation to the image CV_EXPORTS void morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations); + //! 1D mask Window Sum for 8 bit images CV_EXPORTS void sumWindowColumn(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1); CV_EXPORTS void sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor = -1); + //! applies generalized Sobel operator to the image + CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1); + + //! smooths the image using Gaussian filter. + CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0); + + //! applies Canny edge detector and produces the edge map. + CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize = 3); //////////////////////////////// Image Labeling //////////////////////////////// diff --git a/modules/gpu/src/filtering_npp.cpp b/modules/gpu/src/filtering_npp.cpp index 4a9fd888ee..1100db72cc 100644 --- a/modules/gpu/src/filtering_npp.cpp +++ b/modules/gpu/src/filtering_npp.cpp @@ -54,6 +54,8 @@ void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) void cv::gpu::boxFilter(const GpuMat&, GpuMat&, Size, Point) { throw_nogpu(); } void cv::gpu::sumWindowColumn(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); } void cv::gpu::sumWindowRow(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); } +void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double) { throw_nogpu(); } +void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double) { throw_nogpu(); } #else @@ -237,4 +239,186 @@ void cv::gpu::sumWindowRow(const GpuMat& src, GpuMat& dst, int ksize, int anchor sumWindowCaller(nppiSumWindowRow_8u32f_C1R, src, dst, ksize, anchor); } +//////////////////////////////////////////////////////////////////////// +// Filter Engine + +namespace +{ + typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI, + const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor); + typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, + const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor); + + void applyRowFilter(const GpuMat& src, GpuMat& dst, const GpuMat& rowKernel, Npp32s anchor = -1, Npp32s nDivisor = 1) + { + static const nppFilter1D_t nppFilter1D_callers[] = {nppiFilterRow_8u_C1R, nppiFilterRow_8u_C4R}; + + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); + + int kRowSize = rowKernel.cols; + + dst.create(src.size(), src.type()); + dst = Scalar(); + + NppiSize oROI; + oROI.width = src.cols - kRowSize + 1; + oROI.height = src.rows; + + if (anchor < 0) + anchor = kRowSize >> 1; + + GpuMat srcROI = src.colRange(kRowSize-1, oROI.width); + GpuMat dstROI = dst.colRange(kRowSize-1, oROI.width); + + nppFilter1D_callers[src.channels() >> 2](srcROI.ptr(), srcROI.step, dstROI.ptr(), dstROI.step, oROI, + rowKernel.ptr(), kRowSize, anchor, nDivisor); + } + + void applyColumnFilter(const GpuMat& src, GpuMat& dst, const GpuMat& columnKernel, Npp32s anchor = -1, Npp32s nDivisor = 1) + { + static const nppFilter1D_t nppFilter1D_callers[] = {nppiFilterColumn_8u_C1R, nppiFilterColumn_8u_C4R}; + + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); + + int kColSize = columnKernel.cols; + + dst.create(src.size(), src.type()); + dst = Scalar(); + + NppiSize oROI; + oROI.width = src.cols; + oROI.height = src.rows - kColSize + 1; + + if (anchor < 0) + anchor = kColSize >> 1; + + GpuMat srcROI = src.rowRange(kColSize-1, oROI.height); + GpuMat dstROI = dst.rowRange(kColSize-1, oROI.height); + + nppFilter1D_callers[src.channels() >> 2](srcROI.ptr(), srcROI.step, dstROI.ptr(), dstROI.step, oROI, + columnKernel.ptr(), kColSize, anchor, nDivisor); + } + + inline void applySeparableFilter(const GpuMat& src, GpuMat& dst, const GpuMat& rowKernel, const GpuMat& columnKernel, + const cv::Point& anchor = cv::Point(-1, -1), Npp32s nDivisor = 1) + { + GpuMat dstBuf; + applyRowFilter(src, dstBuf, rowKernel, anchor.x, nDivisor); + applyColumnFilter(dstBuf, dst, columnKernel, anchor.y, nDivisor); + } + + void makeNppKernel(Mat kernel, GpuMat& dst) + { + kernel.convertTo(kernel, CV_32S); + kernel = kernel.t(); + int ksize = kernel.cols; + for (int i = 0; i < ksize / 2; ++i) + { + std::swap(kernel.at(0, i), kernel.at(0, ksize - 1 - i)); + } + dst.upload(kernel); + } + + void applyFilter2D(const GpuMat& src, GpuMat& dst, const GpuMat& kernel, cv::Point anchor = cv::Point(-1, -1), Npp32s nDivisor = 1) + { + static const nppFilter2D_t nppFilter2D_callers[] = {nppiFilter_8u_C1R, nppiFilter_8u_C4R}; + + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); + + dst.create(src.size(), src.type()); + dst = Scalar(); + + NppiSize oROI; + oROI.width = src.cols - kernel.cols + 1; + oROI.height = src.rows - kernel.rows + 1; + + if (anchor.x < 0) + anchor.x = kernel.cols >> 1; + if (anchor.y < 0) + anchor.y = kernel.rows >> 1; + + GpuMat srcROI = src(Range(kernel.rows-1, oROI.height), Range(kernel.cols-1, oROI.width)); + GpuMat dstROI = dst(Range(kernel.rows-1, oROI.height), Range(kernel.cols-1, oROI.width)); + + NppiSize oKernelSize; + oKernelSize.height = kernel.rows; + oKernelSize.width = kernel.cols; + NppiPoint oAnchor; + oAnchor.x = anchor.x; + oAnchor.y = anchor.y; + + nppFilter2D_callers[src.channels() >> 2](srcROI.ptr(), srcROI.step, dstROI.ptr(), dstROI.step, oROI, + kernel.ptr(), oKernelSize, oAnchor, nDivisor); + } +} + +//////////////////////////////////////////////////////////////////////// +// Sobel + +void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale) +{ + Mat kx, ky; + getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F); + + if (scale != 1) + { + // usually the smoothing part is the slowest to compute, + // so try to scale it instead of the faster differenciating part + if (dx == 0) + kx *= scale; + else + ky *= scale; + } + + GpuMat rowKernel; makeNppKernel(kx, rowKernel); + GpuMat columnKernel; makeNppKernel(ky, columnKernel); + + applySeparableFilter(src, dst, rowKernel, columnKernel); +} + +//////////////////////////////////////////////////////////////////////// +// GaussianBlur + +void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2) +{ + if (ksize.width == 1 && ksize.height == 1) + { + src.copyTo(dst); + return; + } + + int depth = src.depth(); + if (sigma2 <= 0) + sigma2 = sigma1; + + // automatic detection of kernel size from sigma + if (ksize.width <= 0 && sigma1 > 0) + ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1; + if (ksize.height <= 0 && sigma2 > 0) + ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1; + + CV_Assert(ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1); + + sigma1 = std::max(sigma1, 0.0); + sigma2 = std::max(sigma2, 0.0); + + const int scaleFactor = 256; + + Mat kx = getGaussianKernel(ksize.width, sigma1, std::max(depth, CV_32F)); + kx.convertTo(kx, kx.depth(), scaleFactor); + Mat ky; + if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON) + ky = kx; + else + { + ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F)); + ky.convertTo(ky, ky.depth(), scaleFactor); + } + + GpuMat rowKernel; makeNppKernel(kx, rowKernel); + GpuMat columnKernel; makeNppKernel(ky, columnKernel); + + applySeparableFilter(src, dst, rowKernel, columnKernel, cv::Point(-1, -1), scaleFactor); +} + #endif diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 815aa8686a..a786a620b9 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -62,6 +62,7 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_ void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); } void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); } void cv::gpu::integral(GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -986,4 +987,33 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum) sum.step, sqsum.ptr(), sqsum.step, sz, 0, 0.0f, h) ); } +//////////////////////////////////////////////////////////////////////// +// Canny + +void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize) +{ + CV_Assert(image.type() == CV_8UC1); + + GpuMat srcDx, srcDy; + + Sobel(image, srcDx, -1, 1, 0, apertureSize); + Sobel(image, srcDy, -1, 0, 1, apertureSize); + + srcDx.convertTo(srcDx, CV_32F); + srcDy.convertTo(srcDy, CV_32F); + + edges.create(image.size(), CV_8UC1); + + NppiSize sz; + sz.height = image.rows; + sz.width = image.cols; + + int bufsz; + nppSafeCall( nppiCannyGetBufferSize(sz, &bufsz) ); + GpuMat buf(1, bufsz, CV_8UC1); + + nppSafeCall( nppiCanny_32f8u_C1R(srcDx.ptr(), srcDx.step, srcDy.ptr(), srcDy.step, + edges.ptr(), edges.step, sz, (Npp32f)threshold1, (Npp32f)threshold2, buf.ptr()) ); +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index 7b1837dfe5..7d58619b28 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -124,6 +124,61 @@ void cv::gpu::GpuMat::copyTo( GpuMat& mat, const GpuMat& mask ) const } } +namespace +{ + template struct NPPTypeTraits; + template<> struct NPPTypeTraits { typedef Npp8u npp_type; }; + template<> struct NPPTypeTraits { typedef Npp16u npp_type; }; + template<> struct NPPTypeTraits { typedef Npp16s npp_type; }; + template<> struct NPPTypeTraits { typedef Npp32s npp_type; }; + template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; + + template struct NppConvertFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + typedef typename NPPTypeTraits::npp_type dst_t; + + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI); + }; + template struct NppConvertFunc + { + typedef typename NPPTypeTraits::npp_type dst_t; + + typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode); + }; + + template::func_ptr func> struct NppCvt + { + typedef typename NPPTypeTraits::npp_type src_t; + typedef typename NPPTypeTraits::npp_type dst_t; + + static void cvt(const GpuMat& src, GpuMat& dst) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + nppSafeCall( func(src.ptr(), src.step, dst.ptr(), dst.step, sz) ); + } + }; + template::func_ptr func> struct NppCvt + { + typedef typename NPPTypeTraits::npp_type dst_t; + + static void cvt(const GpuMat& src, GpuMat& dst) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + nppSafeCall( func(src.ptr(), src.step, dst.ptr(), dst.step, sz, NPP_RND_NEAR) ); + } + }; + + void convertToKernelCaller(const GpuMat& src, GpuMat& dst) + { + matrix_operations::convert_to(src, src.depth(), dst, dst.depth(), src.channels(), 1.0, 0.0); + } +} + void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double beta ) const { bool noScale = fabs(alpha-1) < std::numeric_limits::epsilon() && fabs(beta) < std::numeric_limits::epsilon(); @@ -133,7 +188,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); - int stype = type(); + int scn = channels(); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) { @@ -152,44 +207,85 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta); else { - NppiSize sz; - sz.width = cols; - sz.height = rows; + typedef void (*convert_caller_t)(const GpuMat& src, GpuMat& dst); + static const convert_caller_t convert_callers[8][8][4] = + { + { + {0,0,0,0}, + {convertToKernelCaller, convertToKernelCaller, convertToKernelCaller, convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {0,0,0,0} + }, + { + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0} + } + }; - if (stype == CV_8UC1 && ddepth == CV_16U) - nppSafeCall( nppiConvert_8u16u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_16UC1 && ddepth == CV_8U) - nppSafeCall( nppiConvert_16u8u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_8UC4 && ddepth == CV_16U) - nppSafeCall( nppiConvert_8u16u_C4R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_16UC4 && ddepth == CV_8U) - nppSafeCall( nppiConvert_16u8u_C4R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_8UC1 && ddepth == CV_16S) - nppSafeCall( nppiConvert_8u16s_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_16SC1 && ddepth == CV_8U) - nppSafeCall( nppiConvert_16s8u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_8UC4 && ddepth == CV_16S) - nppSafeCall( nppiConvert_8u16s_C4R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_16SC4 && ddepth == CV_8U) - nppSafeCall( nppiConvert_16s8u_C4R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_16SC1 && ddepth == CV_32F) - nppSafeCall( nppiConvert_16s32f_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_32FC1 && ddepth == CV_16S) - nppSafeCall( nppiConvert_32f16s_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz, NPP_RND_NEAR) ); - else if (stype == CV_8UC1 && ddepth == CV_32F) - nppSafeCall( nppiConvert_8u32f_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_32FC1 && ddepth == CV_8U) - nppSafeCall( nppiConvert_32f8u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz, NPP_RND_NEAR) ); - else if (stype == CV_16UC1 && ddepth == CV_32F) - nppSafeCall( nppiConvert_16u32f_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_32FC1 && ddepth == CV_16U) - nppSafeCall( nppiConvert_32f16u_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz, NPP_RND_NEAR) ); - else if (stype == CV_16UC1 && ddepth == CV_32S) - nppSafeCall( nppiConvert_16u32s_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else if (stype == CV_16SC1 && ddepth == CV_32S) - nppSafeCall( nppiConvert_16s32s_C1R(psrc->ptr(), psrc->step, dst.ptr(), dst.step, sz) ); - else - matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), 1.0, 0.0); + convert_callers[sdepth][ddepth][scn-1](*psrc, dst); } } @@ -199,6 +295,99 @@ GpuMat& GpuMat::operator = (const Scalar& s) return *this; } +namespace +{ + template struct NppSetFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI); + }; + template struct NppSetFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI); + }; + + template::func_ptr func> struct NppSet + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, const Scalar& s) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + Scalar_ nppS = s; + nppSafeCall( func(nppS.val, src.ptr(), src.step, sz) ); + } + }; + template::func_ptr func> struct NppSet + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, const Scalar& s) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + Scalar_ nppS = s; + nppSafeCall( func(nppS[0], src.ptr(), src.step, sz) ); + } + }; + + void kernelSet(GpuMat& src, const Scalar& s) + { + matrix_operations::set_to_without_mask(src, src.depth(), s.val, src.channels()); + } + + template struct NppSetMaskFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); + }; + template struct NppSetMaskFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); + }; + + template::func_ptr func> struct NppSetMask + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, const Scalar& s, const GpuMat& mask) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + Scalar_ nppS = s; + nppSafeCall( func(nppS.val, src.ptr(), src.step, sz, mask.ptr(), mask.step) ); + } + }; + template::func_ptr func> struct NppSetMask + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, const Scalar& s, const GpuMat& mask) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + Scalar_ nppS = s; + nppSafeCall( func(nppS[0], src.ptr(), src.step, sz, mask.ptr(), mask.step) ); + } + }; + + void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask) + { + matrix_operations::set_to_with_mask(src, src.depth(), s.val, mask, src.channels()); + } +} + GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) { CV_Assert(mask.type() == CV_8UC1); @@ -211,151 +400,35 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) if (mask.empty()) { - switch (type()) + typedef void (*set_caller_t)(GpuMat& src, const Scalar& s); + static const set_caller_t set_callers[8][4] = { - case CV_8UC1: - { - Npp8u nVal = (Npp8u)s[0]; - nppSafeCall( nppiSet_8u_C1R(nVal, ptr(), step, sz) ); - break; - } - case CV_8UC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_8u_C4R(nVal.val, ptr(), step, sz) ); - break; - } - case CV_16UC1: - { - Npp16u nVal = (Npp16u)s[0]; - nppSafeCall( nppiSet_16u_C1R(nVal, ptr(), step, sz) ); - break; - } - /*case CV_16UC2: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_16u_C2R(nVal.val, ptr(), step, sz) ); - break; - }*/ - case CV_16UC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_16u_C4R(nVal.val, ptr(), step, sz) ); - break; - } - case CV_16SC1: - { - Npp16s nVal = (Npp16s)s[0]; - nppSafeCall( nppiSet_16s_C1R(nVal, ptr(), step, sz) ); - break; - } - /*case CV_16SC2: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_16s_C2R(nVal.val, ptr(), step, sz) ); - break; - }*/ - case CV_16SC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_16s_C4R(nVal.val, ptr(), step, sz) ); - break; - } - case CV_32SC1: - { - Npp32s nVal = (Npp32s)s[0]; - nppSafeCall( nppiSet_32s_C1R(nVal, ptr(), step, sz) ); - break; - } - case CV_32SC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_32s_C4R(nVal.val, ptr(), step, sz) ); - break; - } - case CV_32FC1: - { - Npp32f nVal = (Npp32f)s[0]; - nppSafeCall( nppiSet_32f_C1R(nVal, ptr(), step, sz) ); - break; - } - case CV_32FC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_32f_C4R(nVal.val, ptr(), step, sz) ); - break; - } - default: - matrix_operations::set_to_without_mask( *this, depth(), s.val, channels()); - } + {NppSet::set,kernelSet,kernelSet,NppSet::set}, + {kernelSet,kernelSet,kernelSet,kernelSet}, + {NppSet::set,kernelSet,kernelSet,NppSet::set}, + {NppSet::set,kernelSet,kernelSet,NppSet::set}, + {NppSet::set,kernelSet,kernelSet,NppSet::set}, + {NppSet::set,kernelSet,kernelSet,NppSet::set}, + {kernelSet,kernelSet,kernelSet,kernelSet}, + {0,0,0,0} + }; + set_callers[depth()][channels()-1](*this, s); } else { - switch (type()) + typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask); + static const set_caller_t set_callers[8][4] = { - case CV_8UC1: - { - Npp8u nVal = (Npp8u)s[0]; - nppSafeCall( nppiSet_8u_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_8UC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_8u_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_16UC1: - { - Npp16u nVal = (Npp16u)s[0]; - nppSafeCall( nppiSet_16u_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_16UC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_16u_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_16SC1: - { - Npp16s nVal = (Npp16s)s[0]; - nppSafeCall( nppiSet_16s_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_16SC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_16s_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_32SC1: - { - Npp32s nVal = (Npp32s)s[0]; - nppSafeCall( nppiSet_32s_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_32SC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_32s_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_32FC1: - { - Npp32f nVal = (Npp32f)s[0]; - nppSafeCall( nppiSet_32f_C1MR(nVal, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - case CV_32FC4: - { - Scalar_ nVal = s; - nppSafeCall( nppiSet_32f_C4MR(nVal.val, ptr(), step, sz, mask.ptr(), mask.step) ); - break; - } - default: - matrix_operations::set_to_with_mask( *this, depth(), s.val, mask, channels()); - } + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask}, + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask}, + {0,0,0,0} + }; + set_callers[depth()][channels()-1](*this, s, mask); } return *this; diff --git a/tests/gpu/src/gputest_main.cpp b/tests/gpu/src/gputest_main.cpp index 0833ffe335..5012c4b419 100644 --- a/tests/gpu/src/gputest_main.cpp +++ b/tests/gpu/src/gputest_main.cpp @@ -61,6 +61,9 @@ const char* blacklist[] = //"GPU-NppImageLog", // different precision //"GPU-NppImageMagnitude", // different precision //"GPU-NppImageSumWindow", // different border interpolation + //"GPU-NppImageSobel", // ??? + //"GPU-NppImageGaussianBlur", // different border interpolation + "GPU-NppImageCanny", // NPP_TEXTURE_BIND_ERROR 0 }; diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp index ce6cebc98f..7b07312c11 100644 --- a/tests/gpu/src/imgproc_gpu.cpp +++ b/tests/gpu/src/imgproc_gpu.cpp @@ -492,6 +492,115 @@ struct CV_GpuNppImageSumWindowTest : public CV_GpuImageProcTest } }; +//////////////////////////////////////////////////////////////////////////////// +// Sobel +struct CV_GpuNppImageSobelTest : public CV_GpuImageProcTest +{ + CV_GpuNppImageSobelTest() : CV_GpuImageProcTest( "GPU-NppImageSobel", "Sobel" ) {} + + int test(const Mat& img) + { + if (img.type() != CV_8UC1 && img.type() != CV_8UC4) + { + ts->printf(CvTS::LOG, "\nUnsupported type\n"); + return CvTS::OK; + } + + int ksizes[] = {3, 5, 7}; + int ksizes_num = sizeof(ksizes) / sizeof(int); + + int dx = 1, dy = 0; + + int test_res = CvTS::OK; + + for (int i = 0; i < ksizes_num; ++i) + { + ts->printf(CvTS::LOG, "\nksize = %d\n", ksizes[i]); + + Mat cpudst; + cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]); + + GpuMat gpu1(img); + GpuMat gpudst; + cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]); + + if (CheckNorm(cpudst, gpudst) != CvTS::OK) + test_res = CvTS::FAIL_GENERIC; + } + + return test_res; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +// GaussianBlur +struct CV_GpuNppImageGaussianBlurTest : public CV_GpuImageProcTest +{ + CV_GpuNppImageGaussianBlurTest() : CV_GpuImageProcTest( "GPU-NppImageGaussianBlur", "GaussianBlur" ) {} + + int test(const Mat& img) + { + if (img.type() != CV_8UC1 && img.type() != CV_8UC4) + { + ts->printf(CvTS::LOG, "\nUnsupported type\n"); + return CvTS::OK; + } + + int ksizes[] = {3, 5, 7}; + int ksizes_num = sizeof(ksizes) / sizeof(int); + + int test_res = CvTS::OK; + + const double sigma1 = 3.0; + + for (int i = 0; i < ksizes_num; ++i) + { + for (int j = 0; j < ksizes_num; ++j) + { + ts->printf(CvTS::LOG, "\nksize = (%dx%d)\n", ksizes[i], ksizes[j]); + + Mat cpudst; + cv::GaussianBlur(img, cpudst, cv::Size(ksizes[i], ksizes[j]), sigma1); + + GpuMat gpu1(img); + GpuMat gpudst; + cv::gpu::GaussianBlur(gpu1, gpudst, cv::Size(ksizes[i], ksizes[j]), sigma1); + if (CheckNorm(cpudst, gpudst) != CvTS::OK) + test_res = CvTS::FAIL_GENERIC; + } + } + + return test_res; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +// Canny +struct CV_GpuNppImageCannyTest : public CV_GpuImageProcTest +{ + CV_GpuNppImageCannyTest() : CV_GpuImageProcTest( "GPU-NppImageCanny", "Canny" ) {} + + int test(const Mat& img) + { + if (img.type() != CV_8UC1) + { + ts->printf(CvTS::LOG, "\nUnsupported type\n"); + return CvTS::OK; + } + + const double threshold1 = 1.0, threshold2 = 10.0; + + Mat cpudst; + cv::Canny(img, cpudst, threshold1, threshold2); + + GpuMat gpu1(img); + GpuMat gpudst; + cv::gpu::Canny(gpu1, gpudst, threshold1, threshold2); + + return CheckNorm(cpudst, gpudst); + } +}; + //////////////////////////////////////////////////////////////////////////////// // cvtColor class CV_GpuCvtColorTest : public CvTest @@ -598,4 +707,7 @@ CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test; CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test; CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test; CV_GpuNppImageSumWindowTest CV_GpuNppImageSumWindow_test; +CV_GpuNppImageSobelTest CV_GpuNppImageSobel_test; +CV_GpuNppImageGaussianBlurTest CV_GpuNppImageGaussianBlur_test; +CV_GpuNppImageCannyTest CV_GpuNppImageCanny_test; CV_GpuCvtColorTest CV_GpuCvtColor_test; \ No newline at end of file