used new device layer for cv::gpu::copyMakeBorder

This commit is contained in:
Vladislav Vinogradov 2013-08-26 10:48:10 +04:00
parent 7b3bbcea71
commit 3f62e78592
2 changed files with 95 additions and 180 deletions

View File

@ -343,113 +343,4 @@ Ptr<LookUpTable> cv::cuda::createLookUpTable(InputArray lut)
return makePtr<LookUpTableImpl>(lut);
}
////////////////////////////////////////////////////////////////////////
// copyMakeBorder
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
}
}}}
namespace
{
template <typename T, int cn> void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
{
using namespace ::cv::cuda::device::imgproc;
Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);
}
}
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Npp32s __attribute__((__may_alias__)) Npp32s_a;
#else
typedef Npp32s Npp32s_a;
#endif
void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, Scalar value, Stream& _stream)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
_dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
GpuMat dst = _dst.getGpuMat();
cudaStream_t stream = StreamAccessor::getStream(_stream);
if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
{
NppiSize srcsz;
srcsz.width = src.cols;
srcsz.height = src.rows;
NppiSize dstsz;
dstsz.width = dst.cols;
dstsz.height = dst.rows;
NppStreamHandler h(stream);
switch (src.type())
{
case CV_8UC1:
{
Npp8u nVal = saturate_cast<Npp8u>(value[0]);
nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
}
case CV_8UC4:
{
Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};
nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
}
case CV_32SC1:
{
Npp32s nVal = saturate_cast<Npp32s>(value[0]);
nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
}
case CV_32FC1:
{
Npp32f val = saturate_cast<Npp32f>(value[0]);
Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val));
nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
}
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
else
{
typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
static const caller_t callers[6][4] =
{
{ copyMakeBorder_caller<uchar, 1> , copyMakeBorder_caller<uchar, 2> , copyMakeBorder_caller<uchar, 3> , copyMakeBorder_caller<uchar, 4>},
{0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},
{ copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/, copyMakeBorder_caller<ushort, 3> , copyMakeBorder_caller<ushort, 4>},
{ copyMakeBorder_caller<short, 1> , 0/*copyMakeBorder_caller<short, 2>*/ , copyMakeBorder_caller<short, 3> , copyMakeBorder_caller<short, 4>},
{0/*copyMakeBorder_caller<int, 1>*/, 0/*copyMakeBorder_caller<int, 2>*/ , 0/*copyMakeBorder_caller<int, 3>*/, 0/*copyMakeBorder_caller<int , 4>*/},
{ copyMakeBorder_caller<float, 1> , 0/*copyMakeBorder_caller<float, 2>*/ , copyMakeBorder_caller<float, 3> , copyMakeBorder_caller<float ,4>}
};
caller_t func = callers[src.depth()][src.channels() - 1];
CV_Assert(func != 0);
func(src, dst, top, left, borderType, value, stream);
}
}
#endif /* !defined (HAVE_CUDA) */

View File

@ -40,92 +40,116 @@
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
#ifndef HAVE_OPENCV_CUDEV
namespace cv { namespace cuda { namespace device
#error "opencv_cudev is required"
#else
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
namespace
{
namespace imgproc
struct ShiftMap
{
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, PtrStepSz<T> dst, int top, int left)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
typedef int2 value_type;
typedef int index_type;
if (x < dst.cols && y < dst.rows)
dst.ptr(y)[x] = src(y - top, x - left);
int top;
int left;
__device__ __forceinline__ int2 operator ()(int y, int x) const
{
return make_int2(x - left, y - top);
}
};
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
struct ShiftMapSz : ShiftMap
{
int rows, cols;
};
}
namespace cv { namespace cudev {
template <> struct PtrTraits<ShiftMapSz> : PtrTraitsBase<ShiftMapSz, ShiftMap>
{
};
}}
namespace
{
template <typename T, int cn>
void copyMakeBorderImpl(const GpuMat& src, GpuMat& dst, int top, int left, int borderMode, cv::Scalar borderValue, Stream& stream)
{
typedef typename MakeVec<T, cn>::type src_type;
cv::Scalar_<T> borderValue_ = borderValue;
const src_type brdVal = VecTraits<src_type>::make(borderValue_.val);
ShiftMapSz map;
map.top = top;
map.left = left;
map.rows = dst.rows;
map.cols = dst.cols;
switch (borderMode)
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, int top, int left,
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<T> brd(src.rows, src.cols, VecTraits<T>::make(borderValue));
BorderReader< PtrStep<T>, B<T> > brdSrc(src, brd);
copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
case cv::BORDER_CONSTANT:
gridCopy(remapPtr(brdConstant(globPtr<src_type>(src), brdVal), map), globPtr<src_type>(dst), stream);
break;
case cv::BORDER_REPLICATE:
gridCopy(remapPtr(brdReplicate(globPtr<src_type>(src)), map), globPtr<src_type>(dst), stream);
break;
case cv::BORDER_REFLECT:
gridCopy(remapPtr(brdReflect(globPtr<src_type>(src)), map), globPtr<src_type>(dst), stream);
break;
case cv::BORDER_WRAP:
gridCopy(remapPtr(brdWrap(globPtr<src_type>(src)), map), globPtr<src_type>(dst), stream);
break;
case cv::BORDER_REFLECT_101:
gridCopy(remapPtr(brdReflect101(globPtr<src_type>(src)), map), globPtr<src_type>(dst), stream);
break;
};
}
}
template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode,
const T* borderValue, cudaStream_t stream)
{
typedef typename TypeVec<T, cn>::vec_type vec_type;
void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, Scalar value, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int top, int left, int borderMode, cv::Scalar borderValue, Stream& stream);
static const func_t funcs[6][4] =
{
{ copyMakeBorderImpl<uchar , 1> , copyMakeBorderImpl<uchar , 2> , copyMakeBorderImpl<uchar , 3> , copyMakeBorderImpl<uchar , 4> },
{0 /*copyMakeBorderImpl<schar , 1>*/, 0 /*copyMakeBorderImpl<schar , 2>*/, 0 /*copyMakeBorderImpl<schar , 3>*/, 0 /*copyMakeBorderImpl<schar , 4>*/},
{ copyMakeBorderImpl<ushort, 1> , 0 /*copyMakeBorderImpl<ushort, 2>*/, copyMakeBorderImpl<ushort, 3> , copyMakeBorderImpl<ushort, 4> },
{ copyMakeBorderImpl<short , 1> , 0 /*copyMakeBorderImpl<short , 2>*/, copyMakeBorderImpl<short , 3> , copyMakeBorderImpl<short , 4> },
{0 /*copyMakeBorderImpl<int , 1>*/, 0 /*copyMakeBorderImpl<int , 2>*/, 0 /*copyMakeBorderImpl<int , 3>*/, 0 /*copyMakeBorderImpl<int , 4>*/},
{ copyMakeBorderImpl<float , 1> , 0 /*copyMakeBorderImpl<float , 2>*/, copyMakeBorderImpl<float , 3> , copyMakeBorderImpl<float ,4> }
};
typedef void (*caller_t)(const PtrStepSz<vec_type>& src, const PtrStepSz<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
GpuMat src = _src.getGpuMat();
static const caller_t callers[5] =
{
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call,
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call
};
const int depth = src.depth();
const int cn = src.channels();
callers[borderMode](PtrStepSz<vec_type>(src), PtrStepSz<vec_type>(dst), top, left, borderValue, stream);
}
CV_Assert( depth <= CV_32F && cn <= 4 );
CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
template void copyMakeBorder_gpu<uchar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
_dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
GpuMat dst = _dst.getGpuMat();
//template void copyMakeBorder_gpu<schar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
const func_t func = funcs[depth][cn - 1];
template void copyMakeBorder_gpu<ushort, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<ushort, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<ushort, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<ushort, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
template void copyMakeBorder_gpu<short, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<short, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<short, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<short, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
func(src, dst, top, left, borderType, value, stream);
}
//template void copyMakeBorder_gpu<int, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<float, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */
#endif