diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index b18d4730da..90acd9d2fa 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -826,6 +826,32 @@ struct CV_EXPORTS CannyBuf Ptr filterDX, filterDY; }; +class CV_EXPORTS ImagePyramid +{ +public: + inline ImagePyramid() : nLayers_(0) {} + inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null()) + { + build(img, nLayers, stream); + } + + void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null()); + + void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const; + + inline void release() + { + layer0_.release(); + pyramid_.clear(); + nLayers_ = 0; + } + +private: + GpuMat layer0_; + std::vector pyramid_; + int nLayers_; +}; + ////////////////////////////// Matrix reductions ////////////////////////////// //! computes mean value and standard deviation of all or selected array elements diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 6515f0a8c9..a7b39dd1c9 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -101,6 +101,8 @@ void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, do cv::gpu::CannyBuf::CannyBuf(const GpuMat&, const GpuMat&) { throw_nogpu(); } void cv::gpu::CannyBuf::create(const Size&, int) { throw_nogpu(); } void cv::gpu::CannyBuf::release() { throw_nogpu(); } +void cv::gpu::ImagePyramid::build(const GpuMat&, int, Stream&) { throw_nogpu(); } +void cv::gpu::ImagePyramid::getLayer(GpuMat&, Size, Stream&) const { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -2017,6 +2019,137 @@ void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& d CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); } + +////////////////////////////////////////////////////////////////////////////// +// ImagePyramid + +namespace cv { namespace gpu { namespace device +{ + namespace pyramid + { + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + } +}}} + +void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stream) +{ +#ifdef _WIN32 + using namespace cv::gpu::device::pyramid; + + typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + + static const func_t funcs[7][4] = + { + {kernelDownsampleX2_gpu, /*kernelDownsampleX2_gpu*/ 0, kernelDownsampleX2_gpu, kernelDownsampleX2_gpu}, + {/*kernelDownsampleX2_gpu*/0, /*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0}, + {kernelDownsampleX2_gpu, /*kernelDownsampleX2_gpu*/ 0, kernelDownsampleX2_gpu, kernelDownsampleX2_gpu}, + {/*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0}, + {/*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0}, + {kernelDownsampleX2_gpu, /*kernelDownsampleX2_gpu*/ 0, kernelDownsampleX2_gpu, kernelDownsampleX2_gpu}, + {/*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0, /*kernelDownsampleX2_gpu*/ 0} + }; + + CV_Assert(img.channels() == 1 || img.channels() == 3 || img.channels() == 4); + CV_Assert(img.depth() == CV_8U || img.depth() == CV_16U || img.depth() == CV_32F); + + layer0_ = img; + Size szLastLayer = img.size(); + nLayers_ = 1; + + if (numLayers <= 0) + numLayers = 255; //it will cut-off when any of the dimensions goes 1 + + pyramid_.resize(numLayers); + + for (int i = 0; i < numLayers - 1; ++i) + { + Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2); + + if (szCurLayer.width == 0 || szCurLayer.height == 0) + break; + + ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]); + nLayers_++; + + const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1]; + + func_t func = funcs[img.depth()][img.channels() - 1]; + CV_Assert(func != 0); + + func(prevLayer, pyramid_[i], StreamAccessor::getStream(stream)); + + szLastLayer = szCurLayer; + } +#else + throw_nogpu(); +#endif +} + +void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream) const +{ +#ifdef _WIN32 + using namespace cv::gpu::device::pyramid; + + typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + + static const func_t funcs[7][4] = + { + {kernelInterpolateFrom1_gpu, /*kernelInterpolateFrom1_gpu*/ 0, kernelInterpolateFrom1_gpu, kernelInterpolateFrom1_gpu}, + {/*kernelInterpolateFrom1_gpu*/0, /*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0}, + {kernelInterpolateFrom1_gpu, /*kernelInterpolateFrom1_gpu*/ 0, kernelInterpolateFrom1_gpu, kernelInterpolateFrom1_gpu}, + {/*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0}, + {/*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0}, + {kernelInterpolateFrom1_gpu, /*kernelInterpolateFrom1_gpu*/ 0, kernelInterpolateFrom1_gpu, kernelInterpolateFrom1_gpu}, + {/*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0, /*kernelInterpolateFrom1_gpu*/ 0} + }; + + CV_Assert(outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0); + + ensureSizeIsEnough(outRoi, layer0_.type(), outImg); + + if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows) + { + if (stream) + stream.enqueueCopy(layer0_, outImg); + else + layer0_.copyTo(outImg); + } + + float lastScale = 1.0f; + float curScale; + GpuMat lastLayer = layer0_; + GpuMat curLayer; + + for (int i = 0; i < nLayers_ - 1; ++i) + { + curScale = lastScale * 0.5f; + curLayer = pyramid_[i]; + + if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows) + { + if (stream) + stream.enqueueCopy(curLayer, outImg); + else + curLayer.copyTo(outImg); + } + + if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows) + break; + + lastScale = curScale; + lastLayer = curLayer; + } + + func_t func = funcs[outImg.depth()][outImg.channels() - 1]; + CV_Assert(func != 0); + + func(lastLayer, outImg, StreamAccessor::getStream(stream)); +#else + throw_nogpu(); +#endif +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/nvidia/core/NCVPyramid.cu b/modules/gpu/src/nvidia/core/NCVPyramid.cu index 5a23677983..a75f3b0024 100644 --- a/modules/gpu/src/nvidia/core/NCVPyramid.cu +++ b/modules/gpu/src/nvidia/core/NCVPyramid.cu @@ -46,6 +46,7 @@ #include "NCVAlg.hpp" #include "NCVPyramid.hpp" #include "NCVPixelOperations.hpp" +#include "opencv2/gpu/device/common.hpp" #ifdef _WIN32 @@ -234,6 +235,39 @@ __global__ void kernelDownsampleX2(T *d_src, } } +namespace cv { namespace gpu { namespace device +{ + namespace pyramid + { + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream) + { + dim3 bDim(16, 8); + dim3 gDim(divUp(src.cols, bDim.x), divUp(src.rows, bDim.y)); + + kernelDownsampleX2<<>>((T*)src.data, src.step, (T*)dst.data, dst.step, NcvSize32u(dst.cols, dst.rows)); + + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + } +}}} + + + template __global__ void kernelInterpolateFrom1(T *d_srcTop, @@ -275,6 +309,37 @@ __global__ void kernelInterpolateFrom1(T *d_srcTop, d_dst_line[j] = outPix; } } +namespace cv { namespace gpu { namespace device +{ + namespace pyramid + { + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream) + { + dim3 bDim(16, 8); + dim3 gDim(divUp(dst.cols, bDim.x), divUp(dst.rows, bDim.y)); + + kernelInterpolateFrom1<<>>((T*) src.data, src.step, NcvSize32u(src.cols, src.rows), + (T*) dst.data, dst.step, NcvSize32u(dst.cols, dst.rows)); + + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + template void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); + } +}}} template diff --git a/modules/gpu/src/nvidia/core/NCVPyramid.hpp b/modules/gpu/src/nvidia/core/NCVPyramid.hpp index 1885b17235..b19ccd28bd 100644 --- a/modules/gpu/src/nvidia/core/NCVPyramid.hpp +++ b/modules/gpu/src/nvidia/core/NCVPyramid.hpp @@ -46,7 +46,7 @@ #include #include #include "NCV.hpp" - + #ifdef _WIN32 template @@ -92,8 +92,8 @@ private: const NCVMatrix *layer0; NCVMatrixStack pyramid; Ncv32u nLayers; -}; - +}; + #endif //_WIN32 #endif //_ncvpyramid_hpp_