From 3f62e78592df96b941e0f36d9917147d2d58deb9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 26 Aug 2013 10:48:10 +0400 Subject: [PATCH] used new device layer for cv::gpu::copyMakeBorder --- modules/cudaarithm/src/core.cpp | 109 ------------ .../cudaarithm/src/cuda/copy_make_border.cu | 166 ++++++++++-------- 2 files changed, 95 insertions(+), 180 deletions(-) diff --git a/modules/cudaarithm/src/core.cpp b/modules/cudaarithm/src/core.cpp index 49cd57f268..535485f08f 100644 --- a/modules/cudaarithm/src/core.cpp +++ b/modules/cudaarithm/src/core.cpp @@ -343,113 +343,4 @@ Ptr cv::cuda::createLookUpTable(InputArray lut) return makePtr(lut); } -//////////////////////////////////////////////////////////////////////// -// copyMakeBorder - -namespace cv { namespace cuda { namespace device -{ - namespace imgproc - { - template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream); - } -}}} - -namespace -{ - template void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream) - { - using namespace ::cv::cuda::device::imgproc; - - Scalar_ val(saturate_cast(value[0]), saturate_cast(value[1]), saturate_cast(value[2]), saturate_cast(value[3])); - - copyMakeBorder_gpu(src, dst, top, left, borderType, val.val, stream); - } -} - -#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4 -typedef Npp32s __attribute__((__may_alias__)) Npp32s_a; -#else -typedef Npp32s Npp32s_a; -#endif - -void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, Scalar value, Stream& _stream) -{ - GpuMat src = _src.getGpuMat(); - - CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); - CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP ); - - _dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); - GpuMat dst = _dst.getGpuMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - - if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1)) - { - NppiSize srcsz; - srcsz.width = src.cols; - srcsz.height = src.rows; - - NppiSize dstsz; - dstsz.width = dst.cols; - dstsz.height = dst.rows; - - NppStreamHandler h(stream); - - switch (src.type()) - { - case CV_8UC1: - { - Npp8u nVal = saturate_cast(value[0]); - nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr(), static_cast(src.step), srcsz, - dst.ptr(), static_cast(dst.step), dstsz, top, left, nVal) ); - break; - } - case CV_8UC4: - { - Npp8u nVal[] = {saturate_cast(value[0]), saturate_cast(value[1]), saturate_cast(value[2]), saturate_cast(value[3])}; - nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr(), static_cast(src.step), srcsz, - dst.ptr(), static_cast(dst.step), dstsz, top, left, nVal) ); - break; - } - case CV_32SC1: - { - Npp32s nVal = saturate_cast(value[0]); - nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr(), static_cast(src.step), srcsz, - dst.ptr(), static_cast(dst.step), dstsz, top, left, nVal) ); - break; - } - case CV_32FC1: - { - Npp32f val = saturate_cast(value[0]); - Npp32s nVal = *(reinterpret_cast(&val)); - nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr(), static_cast(src.step), srcsz, - dst.ptr(), static_cast(dst.step), dstsz, top, left, nVal) ); - break; - } - } - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - else - { - typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream); - static const caller_t callers[6][4] = - { - { copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller}, - {0/*copyMakeBorder_caller*/, 0/*copyMakeBorder_caller*/ , 0/*copyMakeBorder_caller*/, 0/*copyMakeBorder_caller*/}, - { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/, copyMakeBorder_caller , copyMakeBorder_caller}, - { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/ , copyMakeBorder_caller , copyMakeBorder_caller}, - {0/*copyMakeBorder_caller*/, 0/*copyMakeBorder_caller*/ , 0/*copyMakeBorder_caller*/, 0/*copyMakeBorder_caller*/}, - { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/ , copyMakeBorder_caller , copyMakeBorder_caller} - }; - - caller_t func = callers[src.depth()][src.channels() - 1]; - CV_Assert(func != 0); - - func(src, dst, top, left, borderType, value, stream); - } -} - #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaarithm/src/cuda/copy_make_border.cu b/modules/cudaarithm/src/cuda/copy_make_border.cu index 2ec53ce08d..f7dd91f987 100644 --- a/modules/cudaarithm/src/cuda/copy_make_border.cu +++ b/modules/cudaarithm/src/cuda/copy_make_border.cu @@ -40,92 +40,116 @@ // //M*/ -#if !defined CUDA_DISABLER +#include "opencv2/opencv_modules.hpp" -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/border_interpolate.hpp" +#ifndef HAVE_OPENCV_CUDEV -namespace cv { namespace cuda { namespace device +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +namespace { - namespace imgproc + struct ShiftMap { - template __global__ void copyMakeBorder(const Ptr2D src, PtrStepSz dst, int top, int left) - { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; + typedef int2 value_type; + typedef int index_type; - if (x < dst.cols && y < dst.rows) - dst.ptr(y)[x] = src(y - top, x - left); + int top; + int left; + + __device__ __forceinline__ int2 operator ()(int y, int x) const + { + return make_int2(x - left, y - top); } + }; - template