created wrappers for new NPP functions

removed void integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& stream) - it fails with NPP_NOT_IMPLEMENTED error
updated docs, accuracy and performance tests
This commit is contained in:
Vladislav Vinogradov 2012-02-22 10:00:53 +00:00
parent e426dfc396
commit 2d30480982
37 changed files with 1984 additions and 566 deletions

View File

@ -48,6 +48,17 @@
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#include <npp.h>
#define CUDART_MINIMUM_REQUIRED_VERSION 4010
#define NPP_MINIMUM_REQUIRED_VERSION 4100
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it."
#endif
#if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
#error "Insufficient NPP version, please update it."
#endif
#endif
using namespace std;
@ -460,15 +471,17 @@ namespace cv { namespace gpu
namespace
{
//////////////////////////////////////////////////////////////////////////
// Convert
template<int n> struct NPPTypeTraits;
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
//////////////////////////////////////////////////////////////////////////
// Convert
template<int SDEPTH, int DDEPTH> struct NppConvertFunc
{
@ -494,6 +507,7 @@ namespace
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
cudaSafeCall( cudaDeviceSynchronize() );
@ -508,6 +522,7 @@ namespace
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
cudaSafeCall( cudaDeviceSynchronize() );
@ -529,6 +544,14 @@ namespace
typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
};
template<int SCN> struct NppSetFunc<CV_8S, SCN>
{
typedef NppStatus (*func_ptr)(Npp8s values[], Npp8s* pSrc, int nSrcStep, NppiSize oSizeROI);
};
template<> struct NppSetFunc<CV_8S, 1>
{
typedef NppStatus (*func_ptr)(Npp8s val, Npp8s* pSrc, int nSrcStep, NppiSize oSizeROI);
};
template<int SDEPTH, int SCN, typename NppSetFunc<SDEPTH, SCN>::func_ptr func> struct NppSet
{
@ -613,6 +636,35 @@ namespace
}
};
//////////////////////////////////////////////////////////////////////////
// CopyMasked
template<int SDEPTH> struct NppCopyMaskedFunc
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, src_t* pDst, int nDstStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
};
template<int SDEPTH, typename NppCopyMaskedFunc<SDEPTH>::func_ptr func> struct NppCopyMasked
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
static void copyMasked(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t /*stream*/)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
//////////////////////////////////////////////////////////////////////////
// CudaFuncTable
class CudaFuncTable : public GpuFuncTable
{
public:
@ -631,7 +683,26 @@ namespace
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
{
::cv::gpu::copyWithMask(src, dst, mask);
CV_Assert(src.size() == dst.size() && src.type() == dst.type());
CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()));
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
static const caller_t callers[7][4] =
{
/* 8U */ {NppCopyMasked<CV_8U, nppiCopy_8u_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_8U, nppiCopy_8u_C3MR>::copyMasked, NppCopyMasked<CV_8U, nppiCopy_8u_C4MR>::copyMasked},
/* 8S */ {cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask},
/* 16U */ {NppCopyMasked<CV_16U, nppiCopy_16u_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_16U, nppiCopy_16u_C3MR>::copyMasked, NppCopyMasked<CV_16U, nppiCopy_16u_C4MR>::copyMasked},
/* 16S */ {NppCopyMasked<CV_16S, nppiCopy_16s_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_16S, nppiCopy_16s_C3MR>::copyMasked, NppCopyMasked<CV_16S, nppiCopy_16s_C4MR>::copyMasked},
/* 32S */ {NppCopyMasked<CV_32S, nppiCopy_32s_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_32S, nppiCopy_32s_C3MR>::copyMasked, NppCopyMasked<CV_32S, nppiCopy_32s_C4MR>::copyMasked},
/* 32F */ {NppCopyMasked<CV_32F, nppiCopy_32f_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_32F, nppiCopy_32f_C3MR>::copyMasked, NppCopyMasked<CV_32F, nppiCopy_32f_C4MR>::copyMasked},
/* 64F */ {cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask}
};
caller_t func = mask.channels() == src.channels() ? callers[src.depth()][src.channels()] : cv::gpu::copyWithMask;
CV_DbgAssert(func != 0);
func(src, dst, mask, 0);
}
void convert(const GpuMat& src, GpuMat& dst) const
@ -641,65 +712,65 @@ namespace
{
{
/* 8U -> 8U */ {0, 0, 0, 0},
/* 8U -> 8S */ {::cv::gpu::convertTo, ::cv::gpu::convertTo, ::cv::gpu::convertTo, ::cv::gpu::convertTo},
/* 8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},
/* 8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},
/* 8U -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 8U -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
/* 8U -> 8S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo},
/* 8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},
/* 8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},
/* 8U -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 8U -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
},
{
/* 8S -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 8S -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 8S -> 8S */ {0,0,0,0},
/* 8S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 8S -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 8S -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 8S -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 8S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
/* 8S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 8S -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 8S -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 8S -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 8S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
},
{
/* 16U -> 8U */ {NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},
/* 16U -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 16U -> 8U */ {NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},
/* 16U -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 16U -> 16U */ {0,0,0,0},
/* 16U -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 16U -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
/* 16U -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 16U -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
},
{
/* 16S -> 8U */ {NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},
/* 16S -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 16S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 16S -> 8U */ {NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},
/* 16S -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 16S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 16S -> 16S */ {0,0,0,0},
/* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 16S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
/* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 16S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
},
{
/* 32S -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32S -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32S -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32S -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32S -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32S -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32S -> 32S */ {0,0,0,0},
/* 32S -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
/* 32S -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
},
{
/* 32F -> 8U */ {NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32F -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32F -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 32F -> 8U */ {NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32F -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32F -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 32F -> 32F */ {0,0,0,0},
/* 32F -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
/* 32F -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
},
{
/* 64F -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 64F -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 64F -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 64F -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 64F -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 64F -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
/* 64F -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 64F -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 64F -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 64F -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 64F -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 64F -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
/* 64F -> 64F */ {0,0,0,0}
}
};
@ -712,7 +783,7 @@ namespace
void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const
{
::cv::gpu::convertTo(src, dst, alpha, beta);
cv::gpu::convertTo(src, dst, alpha, beta);
}
void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
@ -744,13 +815,13 @@ namespace
typedef void (*caller_t)(GpuMat& src, Scalar s);
static const caller_t callers[7][4] =
{
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
{::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo},
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set, NppSet<CV_16U, 2, nppiSet_16u_C2R>::set, ::cv::gpu::setTo, NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set, NppSet<CV_16S, 2, nppiSet_16s_C2R>::set, ::cv::gpu::setTo, NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
{::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo}
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set, cv::gpu::setTo, cv::gpu::setTo, NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
{NppSet<CV_8S, 1, nppiSet_8s_C1R>::set, NppSet<CV_8S, 2, nppiSet_8s_C2R>::set, NppSet<CV_8S, 3, nppiSet_8s_C3R>::set, NppSet<CV_8S, 4, nppiSet_8s_C4R>::set},
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set, NppSet<CV_16U, 2, nppiSet_16u_C2R>::set, cv::gpu::setTo, NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set, NppSet<CV_16S, 2, nppiSet_16s_C2R>::set, cv::gpu::setTo, NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set, cv::gpu::setTo, cv::gpu::setTo, NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set, cv::gpu::setTo, cv::gpu::setTo, NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
{cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo}
};
callers[m.depth()][m.channels() - 1](m, s);
@ -761,13 +832,13 @@ namespace
static const caller_t callers[7][4] =
{
{NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},
{::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo},
{NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},
{NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},
{NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},
{::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo}
{NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},
{cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo},
{NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},
{NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},
{NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},
{cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo}
};
callers[m.depth()][m.channels() - 1](m, s, mask);

View File

@ -69,18 +69,14 @@ Performs a mean-shift segmentation of the source image and eliminates small segm
gpu::integral
-----------------
Computes an integral image and a squared integral image.
Computes an integral image.
.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null())
.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& stream = Stream::Null())
:param src: Source image. Only ``CV_8UC1`` images are supported for now.
:param sum: Integral image containing 32-bit unsigned integer values packed into ``CV_32SC1`` .
:param sqsum: Squared integral image of the ``CV_32FC1`` type.
:param stream: Stream for the asynchronous version.
.. seealso:: :ocv:func:`integral`
@ -380,6 +376,22 @@ Converts an image from one color space to another.
gpu::swapChannels
-----------------
Exchanges the color channels of an image in-place.
.. ocv:function:: void gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null())
:param src: Source image. Supports only ``CV_8UC4`` type.
:param dstOrder: Integer array describing how channel values are permutated. The n-th entry of the array contains the number of the channel that is stored in the n-th channel of the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR channel order.
:param stream: Stream for the asynchronous version.
The methods support arbitrary permutations of the original channels, including replication.
gpu::threshold
------------------
Applies a fixed-level threshold to each array element.
@ -489,7 +501,7 @@ Rotates an image around the origin (0,0) and then shifts it.
.. ocv:function:: void gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
:param src: Source image. ``CV_8UC1`` and ``CV_8UC4`` types are supported.
:param src: Source image. Supports 1, 3 or 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32F`` depth.
:param dst: Destination image with the same type as ``src`` . The size is ``dsize`` .
@ -751,6 +763,38 @@ Performs linear blending of two images.
gpu::alphaComp
-------------------
Composites two images using alpha opacity values contained in each image.
.. ocv:function:: void gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null())
:param img1: First image. Supports ``CV_8UC4`` , ``CV_16UC4`` , ``CV_32SC4`` and ``CV_32FC4`` types.
:param img1: Second image. Must have the same size and the same type as ``img1`` .
:param dst: Destination image.
:param alpha_op: Flag specifying the alpha-blending operation:
* **ALPHA_OVER**
* **ALPHA_IN**
* **ALPHA_OUT**
* **ALPHA_ATOP**
* **ALPHA_XOR**
* **ALPHA_PLUS**
* **ALPHA_OVER_PREMUL**
* **ALPHA_IN_PREMUL**
* **ALPHA_OUT_PREMUL**
* **ALPHA_ATOP_PREMUL**
* **ALPHA_XOR_PREMUL**
* **ALPHA_PLUS_PREMUL**
* **ALPHA_PREMUL**
:param stream: Stream for the asynchronous version.
gpu::Canny
-------------------
Finds edges in an image using the [Canny86]_ algorithm.

View File

@ -10,6 +10,7 @@ gpu::meanStdDev
Computes a mean value and a standard deviation of matrix elements.
.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev)
.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
:param mtx: Source matrix. ``CV_8UC1`` matrices are supported for now.
@ -17,6 +18,8 @@ Computes a mean value and a standard deviation of matrix elements.
:param stddev: Standard deviation value.
:param buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
.. seealso:: :ocv:func:`meanStdDev`

View File

@ -63,7 +63,7 @@ Flips a 2D matrix around vertical, horizontal, or both axes.
.. ocv:function:: void gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream = Stream::Null())
:param src: Source matrix. Only ``CV_8UC1`` and ``CV_8UC4`` matrices are supported for now.
:param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth.
:param dst: Destination matrix.

View File

@ -139,6 +139,50 @@ where ``I`` is a multi-dimensional index of array elements. In case of multi-cha
gpu::abs
------------
Computes an absolute value of each matrix element.
.. ocv:function:: void gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
:param src: Source matrix. Supports ``CV_16S`` and ``CV_32F`` depth.
:param dst: Destination matrix with the same size and type as ``src`` .
:param stream: Stream for the asynchronous version.
.. seealso:: :ocv:func:`abs`
gpu::sqr
------------
Computes a square value of each matrix element.
.. ocv:function:: void gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
:param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
:param dst: Destination matrix with the same size and type as ``src`` .
:param stream: Stream for the asynchronous version.
gpu::sqrt
------------
Computes a square root of each matrix element.
.. ocv:function:: void gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
:param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
:param dst: Destination matrix with the same size and type as ``src`` .
:param stream: Stream for the asynchronous version.
.. seealso:: :ocv:func:`sqrt`
gpu::exp
@ -147,7 +191,7 @@ Computes an exponent of each matrix element.
.. ocv:function:: void gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
:param src: Source matrix. ``CV_32FC1`` matrixes are supported for now.
:param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
:param dst: Destination matrix with the same size and type as ``src`` .
@ -157,6 +201,22 @@ Computes an exponent of each matrix element.
gpu::log
------------
Computes a natural logarithm of absolute value of each matrix element.
.. ocv:function:: void gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
:param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
:param dst: Destination matrix with the same size and type as ``src`` .
:param stream: Stream for the asynchronous version.
.. seealso:: :ocv:func:`log`
gpu::pow
------------
Raises every matrix element to a power.
@ -181,22 +241,6 @@ The function ``pow`` raises every element of the input matrix to ``p`` :
gpu::log
------------
Computes a natural logarithm of absolute value of each matrix element.
.. ocv:function:: void gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
:param src: Source matrix. ``CV_32FC1`` matrixes are supported for now.
:param dst: Destination matrix with the same size and type as ``src`` .
:param stream: Stream for the asynchronous version.
.. seealso:: :ocv:func:`log`
gpu::absdiff
----------------
Computes per-element absolute difference of two matrices (or of a matrix and scalar).
@ -262,9 +306,10 @@ Performs a per-element bitwise inversion.
gpu::bitwise_or
-------------------
Performs a per-element bitwise disjunction of two matrices.
Performs a per-element bitwise disjunction of two matrices or of matrix and scalar.
.. ocv:function:: void gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
.. ocv:function:: void gpu::bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
:param src1: First source matrix.
@ -280,9 +325,10 @@ Performs a per-element bitwise disjunction of two matrices.
gpu::bitwise_and
--------------------
Performs a per-element bitwise conjunction of two matrices.
Performs a per-element bitwise conjunction of two matrices or of matrix and scalar.
.. ocv:function:: void gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
.. ocv:function:: void gpu::bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
:param src1: First source matrix.
@ -298,9 +344,10 @@ Performs a per-element bitwise conjunction of two matrices.
gpu::bitwise_xor
--------------------
Performs a per-element bitwise ``exclusive or`` operation of two matrices.
Performs a per-element bitwise ``exclusive or`` operation of two matrices of matrix and scalar.
.. ocv:function:: void gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
.. ocv:function:: void gpu::bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
:param src1: First source matrix.
@ -314,6 +361,38 @@ Performs a per-element bitwise ``exclusive or`` operation of two matrices.
gpu::rshift
--------------------
Performs pixel by pixel right shift of an image by a constant value.
.. ocv:function:: void gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
:param src: Source matrix. Supports 1, 3 and 4 channels images with integers elements.
:param sc: Constant values, one per channel.
:param dst: Destination matrix with the same size and type as ``src`` .
:param stream: Stream for the asynchronous version.
gpu::lshift
--------------------
Performs pixel by pixel right left of an image by a constant value.
.. ocv:function:: void gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
:param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32S`` depth.
:param sc: Constant values, one per channel.
:param dst: Destination matrix with the same size and type as ``src`` .
:param stream: Stream for the asynchronous version.
gpu::min
------------
Computes the per-element minimum of two matrices (or a matrix and a scalar).

View File

@ -498,7 +498,7 @@ CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null());
//! reverses the order of the rows, columns or both in a matrix
//! supports CV_8UC1, CV_8UC4 types
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth
CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null());
//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
@ -586,20 +586,32 @@ CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& str
//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null());
//! computes absolute value of each matrix element
//! supports CV_16S and CV_32F depth
CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
//! computes square of each pixel in an image
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
//! computes square root of each pixel in an image
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
//! computes exponent of each matrix element (b = e**a)
//! supports only CV_32FC1 type
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
//! computes power of each matrix element:
// (dst(i,j) = pow( src(i,j) , power), if src.type() is integer
// (dst(i,j) = pow(fabs(src(i,j)), power), otherwise
//! supports all, except depth == CV_64F
CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null());
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
//! supports only CV_32FC1 type
CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
//! compares elements of two arrays (c = a <cmpop> b)
CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
@ -608,12 +620,29 @@ CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=G
//! calculates per-element bit-wise disjunction of two arrays
CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
//! calculates per-element bit-wise disjunction of array and scalar
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
//! calculates per-element bit-wise conjunction of two arrays
CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
//! calculates per-element bit-wise conjunction of array and scalar
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
//! calculates per-element bit-wise "exclusive or" operation
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
//! calculates per-element bit-wise "exclusive or" of array and scalar
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
//! pixel by pixel right shift of an image by a constant value
//! supports 1, 3 and 4 channels images with integers elements
CV_EXPORTS void rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
//! pixel by pixel left shift of an image by a constant value
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
CV_EXPORTS void lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
//! computes per-element minimum of two arrays (dst = min(src1, src2))
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
@ -627,6 +656,13 @@ CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream&
//! computes per-element maximum of array and scalar (dst = max(src1, src2))
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
//! Composite two images using alpha opacity values contained in each image
//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
////////////////////////////// Image processing //////////////////////////////
@ -665,6 +701,13 @@ CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat&
//! converts image from one color space to another
CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
//! swap channels
//! dstOrder - Integer array describing how channel values are permutated. The n-th entry
//! of the array contains the number of the channel that is stored in the n-th channel of
//! the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR
//! channel order.
CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null());
//! applies fixed threshold to the image
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
@ -692,9 +735,9 @@ CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat
CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
//! rotate 8bit single or four channel image
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
//! supports CV_8UC1, CV_8UC4 types
//! rotates an image around the origin (0,0) and then shifts it
//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
@ -706,15 +749,9 @@ CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bott
//! sum will have CV_32S type, but will contain unsigned int values
//! supports only CV_8UC1 source type
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null());
//! buffered version
CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null());
//! computes the integral image and integral for the squared image
//! sum will have CV_32S type, sqsum - CV32F type
//! supports only CV_8UC1 source type
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& stream = Stream::Null());
//! computes squared integral image
//! result matrix will have 64F type, but will contain 64U values
//! supports source images of 8UC1 type only
@ -859,6 +896,8 @@ private:
//! computes mean value and standard deviation of all or selected array elements
//! supports only CV_8UC1 type
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
//! buffered version
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
//! computes norm of array
//! supports NORM_INF, NORM_L1, NORM_L2
@ -939,10 +978,16 @@ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& c
//////////////////////////////// Image Labeling ////////////////////////////////
//!performs labeling via graph cuts
//!performs labeling via graph cuts of a 2D regular 4-connected graph.
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
GpuMat& buf, Stream& stream = Stream::Null());
//!performs labeling via graph cuts of a 2D regular 8-connected graph.
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight,
GpuMat& labels,
GpuMat& buf, Stream& stream = Stream::Null());
////////////////////////////////// Histograms //////////////////////////////////
//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.

View File

@ -59,7 +59,7 @@ GPU_PERF_TEST(Flip, cv::gpu::DeviceInfo, cv::Size, perf::MatType, FlipCode)
INSTANTIATE_TEST_CASE_P(Arithm, Flip, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(CV_8UC1, CV_8UC4),
testing::Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
testing::Values((int) HORIZONTAL_AXIS, (int) VERTICAL_AXIS, (int) BOTH_AXIS)));
//////////////////////////////////////////////////////////////////////
@ -363,6 +363,33 @@ INSTANTIATE_TEST_CASE_P(Arithm, BitwiseAnd, testing::Combine(
GPU_TYPICAL_MAT_SIZES,
testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
GPU_PERF_TEST(BitwiseScalarAnd, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat src_host(size, type);
declare.in(src_host, WARMUP_RNG);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
TEST_CYCLE()
{
cv::gpu::bitwise_and(src, sc, dst);
}
}
INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarAnd, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
//////////////////////////////////////////////////////////////////////
// Min
@ -411,10 +438,11 @@ GPU_PERF_TEST(MeanStdDev, cv::gpu::DeviceInfo, cv::Size)
cv::gpu::GpuMat src(src_host);
cv::Scalar mean;
cv::Scalar stddev;
cv::gpu::GpuMat buf;
TEST_CYCLE()
{
cv::gpu::meanStdDev(src, mean, stddev);
cv::gpu::meanStdDev(src, mean, stddev, buf);
}
}

View File

@ -201,7 +201,7 @@ GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, perf::MatType, CvtColorIn
declare.in(src_host, WARMUP_RNG);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst(size, CV_MAKETYPE(type, info.dcn));
cv::gpu::GpuMat dst;
TEST_CYCLE()
{
@ -218,6 +218,32 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
CvtColorInfo(4, 4, cv::COLOR_BGR2XYZ), CvtColorInfo(4, 4, cv::COLOR_BGR2YCrCb), CvtColorInfo(4, 4, cv::COLOR_YCrCb2BGR),
CvtColorInfo(4, 4, cv::COLOR_BGR2HSV), CvtColorInfo(4, 4, cv::COLOR_HSV2BGR))));
//////////////////////////////////////////////////////////////////////
// SwapChannels
GPU_PERF_TEST(SwapChannels, cv::gpu::DeviceInfo, cv::Size)
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::Size size = GET_PARAM(1);
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat src_host(size, CV_8UC4);
declare.in(src_host, WARMUP_RNG);
cv::gpu::GpuMat src(src_host);
const int dstOrder[] = {2, 1, 0, 3};
TEST_CYCLE()
{
cv::gpu::swapChannels(src, dstOrder);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, testing::Combine(ALL_DEVICES, GPU_TYPICAL_MAT_SIZES));
//////////////////////////////////////////////////////////////////////
// Threshold
@ -457,7 +483,7 @@ GPU_PERF_TEST(Rotate, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolatio
INSTANTIATE_TEST_CASE_P(ImgProc, Rotate, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(CV_8UC1, CV_8UC4),
testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
//////////////////////////////////////////////////////////////////////
@ -519,33 +545,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES));
//////////////////////////////////////////////////////////////////////
// IntegralBoth
GPU_PERF_TEST(IntegralBoth, cv::gpu::DeviceInfo, cv::Size)
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::Size size = GET_PARAM(1);
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat src_host(size, CV_8UC1);
declare.in(src_host, WARMUP_RNG);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat sum, sqsum;
TEST_CYCLE()
{
cv::gpu::integral(src, sum, sqsum);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, IntegralBoth, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES));
//////////////////////////////////////////////////////////////////////
// IntegralSqr
@ -849,6 +848,39 @@ INSTANTIATE_TEST_CASE_P(ImgProc, BlendLinear, testing::Combine(
GPU_TYPICAL_MAT_SIZES,
testing::Values(CV_8UC1, CV_32FC1)));
//////////////////////////////////////////////////////////////////////
// AlphaComp
GPU_PERF_TEST(AlphaComp, cv::gpu::DeviceInfo, cv::Size, perf::MatType, AlphaOp)
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int alpha_op = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img1_host(size, type);
cv::Mat img2_host(size, type);
declare.in(img1_host, img2_host, WARMUP_RNG);
cv::gpu::GpuMat img1(img1_host);
cv::gpu::GpuMat img2(img2_host);
cv::gpu::GpuMat dst;
TEST_CYCLE()
{
cv::gpu::alphaComp(img1, img2, dst, alpha_op);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, AlphaComp, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
testing::Values((int)cv::gpu::ALPHA_OVER, (int)cv::gpu::ALPHA_IN, (int)cv::gpu::ALPHA_OUT, (int)cv::gpu::ALPHA_ATOP, (int)cv::gpu::ALPHA_XOR, (int)cv::gpu::ALPHA_PLUS, (int)cv::gpu::ALPHA_OVER_PREMUL, (int)cv::gpu::ALPHA_IN_PREMUL, (int)cv::gpu::ALPHA_OUT_PREMUL, (int)cv::gpu::ALPHA_ATOP_PREMUL, (int)cv::gpu::ALPHA_XOR_PREMUL, (int)cv::gpu::ALPHA_PLUS_PREMUL, (int)cv::gpu::ALPHA_PREMUL)));
//////////////////////////////////////////////////////////////////////
// Canny

View File

@ -11,7 +11,7 @@ int main(int argc, char **argv)
#else
int main(int argc, char** argv)
int main()
{
printf("OpenCV was built without CUDA support\n");
return 0;

View File

@ -11,6 +11,7 @@ CV_ENUM(FlipCode, HORIZONTAL_AXIS, VERTICAL_AXIS, BOTH_AXIS)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
CV_ENUM(MatchMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2)
CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
struct CvtColorInfo
{

View File

@ -52,8 +52,6 @@ void cv::gpu::gemm(const GpuMat&, const GpuMat&, double, const GpuMat&, double,
void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::flip(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::exp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::log(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::magnitude(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::magnitudeSqr(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
@ -89,9 +87,9 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2);
CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()));
bool tr1 = flags & GEMM_1_T;
bool tr2 = flags & GEMM_2_T;
bool tr3 = flags & GEMM_3_T;
bool tr1 = (flags & GEMM_1_T) != 0;
bool tr2 = (flags & GEMM_2_T) != 0;
bool tr3 = (flags & GEMM_3_T) != 0;
Size src1Size = tr1 ? Size(src1.rows, src1.cols) : src1.size();
Size src2Size = tr2 ? Size(src2.rows, src2.cols) : src2.size();
@ -243,35 +241,66 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
////////////////////////////////////////////////////////////////////////
// flip
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& s)
namespace
{
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
template<int DEPTH> struct NppTypeTraits;
template<> struct NppTypeTraits<CV_8U> { typedef Npp8u npp_t; };
template<> struct NppTypeTraits<CV_8S> { typedef Npp8s npp_t; };
template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; };
template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; };
template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; };
template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; };
dst.create( src.size(), src.type() );
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
if (src.type() == CV_8UC1)
template <int DEPTH> struct NppMirrorFunc
{
nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
}
else
{
nppSafeCall( nppiMirror_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
}
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
typedef NppStatus (*func_t)(const npp_t* pSrc, int nSrcStep, npp_t* pDst, int nDstStep, NppiSize oROI, NppiAxis flip);
};
template <int DEPTH, typename NppMirrorFunc<DEPTH>::func_t func> struct NppMirror
{
typedef typename NppMirrorFunc<DEPTH>::npp_t npp_t;
static void call(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream)
{
NppStreamHandler h(stream);
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step),
dst.ptr<npp_t>(), static_cast<int>(dst.step), sz,
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
}
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call},
{0,0,0,0},
{NppMirror<CV_16U, nppiMirror_16u_C1R>::call, 0, NppMirror<CV_16U, nppiMirror_16u_C3R>::call, NppMirror<CV_16U, nppiMirror_16u_C4R>::call},
{0,0,0,0},
{NppMirror<CV_32S, nppiMirror_32s_C1R>::call, 0, NppMirror<CV_32S, nppiMirror_32s_C3R>::call, NppMirror<CV_32S, nppiMirror_32s_C4R>::call},
{NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
};
CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F);
CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
dst.create(src.size(), src.type());
funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream));
}
////////////////////////////////////////////////////////////////////////
@ -340,52 +369,6 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// exp
void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& s)
{
CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type());
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// log
void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& s)
{
CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type());
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// NPP magnitide

View File

@ -48,6 +48,7 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA)
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); }
void cv::gpu::swapChannels(GpuMat&, const int[], Stream&) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
@ -1423,4 +1424,19 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
func(src, dst, dcn, stream);
}
void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
{
CV_Assert(image.type() == CV_8UC4);
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
NppiSize sz;
sz.width = image.cols;
sz.height = image.rows;
nppSafeCall( nppiSwapChannels_8u_C4IR(image.ptr<Npp8u>(), static_cast<int>(image.step), sz, dstOrder) );
}
#endif /* !defined (HAVE_CUDA) */

View File

@ -209,7 +209,7 @@ namespace cv { namespace gpu { namespace device
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__, "bilateral_filter_caller");
}
if (stream != 0)
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}

View File

@ -1104,9 +1104,9 @@ namespace cv { namespace gpu { namespace device
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, WithOutMask(), stream);
}
template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
//template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<schar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<ushort>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
//template void absdiff_gpu<ushort>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<short >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<int >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
//template void absdiff_gpu<float >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);

File diff suppressed because it is too large Load Diff

View File

@ -50,7 +50,7 @@ using namespace cv::gpu;
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int, GpuMat& buf) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int, GpuMat&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }

View File

@ -45,12 +45,41 @@
#if !defined (HAVE_CUDA)
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
namespace
{
typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem);
class NppiGraphcutStateHandler
{
public:
NppiGraphcutStateHandler(NppiSize sznpp, Npp8u* pDeviceMem, const init_func_t func)
{
nppSafeCall( func(sznpp, &pState, pDeviceMem) );
}
~NppiGraphcutStateHandler()
{
nppSafeCall( nppiGraphcutFree(pState) );
}
operator NppiGraphcutState*()
{
return pState;
}
private:
NppiGraphcutState* pState;
};
}
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
{
Size src_size = terminals.size();
CV_Assert(terminals.type() == CV_32S);
CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
CV_Assert(leftTransp.type() == CV_32S);
@ -70,30 +99,76 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
int bufsz;
nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );
if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize())
buf.create(1, bufsz, CV_8U);
ensureSizeIsEnough(1, bufsz, CV_8U, buf);
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
#if CUDART_VERSION > 4000
NppiGraphcutState* pState;
nppSafeCall( nppiGraphcutInitAlloc(sznpp, &pState, buf.ptr<Npp8u>()) );
NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc);
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), pState) );
nppSafeCall( nppiGraphcutFree(pState) );
#else
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );
#endif
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s)
{
Size src_size = terminals.size();
CV_Assert(terminals.type() == CV_32S);
CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
CV_Assert(leftTransp.type() == CV_32S);
CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
CV_Assert(rightTransp.type() == CV_32S);
CV_Assert(top.size() == src_size);
CV_Assert(top.type() == CV_32S);
CV_Assert(topLeft.size() == src_size);
CV_Assert(topLeft.type() == CV_32S);
CV_Assert(topRight.size() == src_size);
CV_Assert(topRight.type() == CV_32S);
CV_Assert(bottom.size() == src_size);
CV_Assert(bottom.type() == CV_32S);
CV_Assert(bottomLeft.size() == src_size);
CV_Assert(bottomLeft.type() == CV_32S);
CV_Assert(bottomRight.size() == src_size);
CV_Assert(bottomRight.type() == CV_32S);
labels.create(src_size, CV_8U);
NppiSize sznpp;
sznpp.width = src_size.width;
sznpp.height = src_size.height;
int bufsz;
nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) );
ensureSizeIsEnough(1, bufsz, CV_8U, buf);
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAlloc);
nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
#endif /* !defined (HAVE_CUDA) */

View File

@ -62,7 +62,6 @@ void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float,
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }
void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_nogpu(); }
@ -91,7 +90,7 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, f
void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int, Stream&) { throw_nogpu(); }
void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream& stream) { throw_nogpu(); }
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream&) { throw_nogpu(); }
void cv::gpu::pyrDown(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }
@ -780,44 +779,78 @@ void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K,
////////////////////////////////////////////////////////////////////////
// rotate
void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& s)
{
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
namespace
{
template<int DEPTH> struct NppTypeTraits;
template<> struct NppTypeTraits<CV_8U> { typedef Npp8u npp_t; };
template<> struct NppTypeTraits<CV_8S> { typedef Npp8s npp_t; };
template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; };
template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; };
template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; };
template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; };
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
template <int DEPTH> struct NppRotateFunc
{
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
typedef NppStatus (*func_t)(const npp_t* pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI,
npp_t* pDst, int nDstStep, NppiRect oDstROI,
double nAngle, double nShiftX, double nShiftY, int eInterpolation);
};
template <int DEPTH, typename NppRotateFunc<DEPTH>::func_t func> struct NppRotate
{
typedef typename NppRotateFunc<DEPTH>::npp_t npp_t;
static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
{
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
NppStreamHandler h(stream);
NppiSize srcsz;
srcsz.height = src.rows;
srcsz.width = src.cols;
NppiRect srcroi;
srcroi.x = srcroi.y = 0;
srcroi.height = src.rows;
srcroi.width = src.cols;
NppiRect dstroi;
dstroi.x = dstroi.y = 0;
dstroi.height = dst.rows;
dstroi.width = dst.cols;
nppSafeCall( func(src.ptr<npp_t>(), srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
}
void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
{0,0,0,0},
{NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call},
{0,0,0,0},
{0,0,0,0},
{NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
};
CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
dst.create(dsize, src.type());
NppiSize srcsz;
srcsz.height = src.rows;
srcsz.width = src.cols;
NppiRect srcroi;
srcroi.x = srcroi.y = 0;
srcroi.height = src.rows;
srcroi.width = src.cols;
NppiRect dstroi;
dstroi.x = dstroi.y = 0;
dstroi.height = dst.rows;
dstroi.width = dst.cols;
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
if (src.type() == CV_8UC1)
{
nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
}
else
{
nppSafeCall( nppiRotate_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
}
////////////////////////////////////////////////////////////////////////
@ -857,30 +890,6 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
cudaSafeCall( cudaDeviceSynchronize() );
}
void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& s)
{
CV_Assert(src.type() == CV_8UC1);
int width = src.cols + 1, height = src.rows + 1;
sum.create(height, width, CV_32S);
sqsum.create(height, width, CV_32F);
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(const_cast<Npp8u*>(src.ptr<Npp8u>()), static_cast<int>(src.step),
sum.ptr<Npp32s>(), static_cast<int>(sum.step), sqsum.ptr<Npp32f>(), static_cast<int>(sqsum.step), sz, 0, 0.0f, height) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
//////////////////////////////////////////////////////////////////////////////
// sqrIntegral
@ -935,7 +944,6 @@ void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
{
#if CUDART_VERSION > 4000
CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);
dst.create(src.size(), CV_32FC1);
@ -959,31 +967,6 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
#else
CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);
dst.create(src.size(), CV_32FC1);
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
NppiRect nppRect;
nppRect.height = rect.height;
nppRect.width = rect.width;
nppRect.x = rect.x;
nppRect.y = rect.y;
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp32f>(), static_cast<int>(sqr.step),
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
#endif
}
@ -992,25 +975,19 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
namespace
{
template<int n> struct NPPTypeTraits;
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);
typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);
template<int SDEPTH> struct NppHistogramEvenFuncC1
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,
int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);
};
template<int SDEPTH> struct NppHistogramEvenFuncC4
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,
Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);
@ -1079,7 +1056,7 @@ namespace
template<int SDEPTH> struct NppHistogramRangeFuncC1
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
typedef Npp32s level_t;
enum {LEVEL_TYPE_CODE=CV_32SC1};
@ -1097,7 +1074,7 @@ namespace
};
template<int SDEPTH> struct NppHistogramRangeFuncC4
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
typedef Npp32s level_t;
enum {LEVEL_TYPE_CODE=CV_32SC1};

View File

@ -171,8 +171,8 @@ bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet) const { throw_nogpu(); r
bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu(); return false; }
void cv::gpu::DeviceInfo::query() { throw_nogpu(); }
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu(); }
void cv::gpu::printCudaDeviceInfo(int device) { throw_nogpu(); }
void cv::gpu::printShortCudaDeviceInfo(int device) { throw_nogpu(); }
void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu(); }
void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
@ -346,7 +346,6 @@ void cv::gpu::printCudaDeviceInfo(int device)
convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
#if (CUDART_VERSION >= 4000)
// This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output
int memoryClock, memBusWidth, L2CacheSize;
getCudaAttribute<int>( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev );
@ -364,7 +363,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
#endif
printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n", prop.regsPerBlock);
@ -375,11 +374,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
#if CUDART_VERSION >= 4000
printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
#else
printf(" Concurrent copy and execution: %s\n", prop.deviceOverlap ? "Yes" : "No");
#endif
printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
@ -388,10 +383,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No");
printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No");
printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
#if CUDART_VERSION >= 4000
printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
#endif
printf(" Compute Mode:\n");
printf(" %s \n", computeMode[prop.computeMode]);
}

View File

@ -48,6 +48,7 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA)
void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_nogpu(); }
void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&, GpuMat&) { throw_nogpu(); }
double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; }
double cv::gpu::norm(const GpuMat&, int, GpuMat&) { throw_nogpu(); return 0.0; }
double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; }
@ -108,6 +109,12 @@ namespace
// meanStdDev
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
{
GpuMat buf;
meanStdDev(src, mean, stddev, buf);
}
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat& buf)
{
CV_Assert(src.type() == CV_8UC1);
@ -117,15 +124,12 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
DeviceBuffer dbuf(2);
#if CUDART_VERSION > 4000
int bufSize;
nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
GpuMat buf(1, bufSize, CV_8UC1);
ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
#else
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, dbuf, (double*)dbuf + 1) );
#endif
cudaSafeCall( cudaDeviceSynchronize() );
@ -133,7 +137,6 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
dbuf.download(ptrs);
}
////////////////////////////////////////////////////////////////////////
// norm
@ -151,7 +154,7 @@ double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf)
return absSum(src_single_channel, buf)[0];
if (normType == NORM_L2)
return sqrt(sqrSum(src_single_channel, buf)[0]);
return std::sqrt(sqrSum(src_single_channel, buf)[0]);
if (normType == NORM_INF)
{

View File

@ -228,7 +228,7 @@ void cv::gpu::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMa
minMax(u_avg, 0, &uMax);
minMax(v_avg, 0, &vMax);
float max_flow = static_cast<float>(sqrt(uMax * uMax + vMax * vMax));
float max_flow = static_cast<float>(std::sqrt(uMax * uMax + vMax * vMax));
CreateOpticalFlowNeedleMap_gpu(u_avg, v_avg, vertex.ptr<float>(), colors.ptr<float>(), max_flow, 1.0f / u.cols, 1.0f / u.rows);

View File

@ -160,7 +160,7 @@ void cv::gpu::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double s
double ig11, ig03, ig33, ig55;
prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, ig11, ig03, ig33, ig55);
device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
}

View File

@ -429,11 +429,11 @@ void cv::gpu::ORB_GPU::setParams(size_t n_features, const ORB::CommonParams& det
// pre-compute the end of a row in a circular patch
int half_patch_size = params_.patch_size_ / 2;
vector<int> u_max(half_patch_size + 1);
for (int v = 0; v <= half_patch_size * sqrt(2.f) / 2 + 1; ++v)
u_max[v] = cvRound(sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
// Make sure we are symmetric
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * sqrt(2.f) / 2; --v)
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
{
while (u_max[v_0] == u_max[v_0 + 1])
++v_0;

View File

@ -91,8 +91,8 @@
#include "nvidia/NCVHaarObjectDetection.hpp"
#include "nvidia/NCVBroxOpticalFlow.hpp"
#define CUDART_MINIMUM_REQUIRED_VERSION 4000
#define NPP_MINIMUM_REQUIRED_VERSION 4000
#define CUDART_MINIMUM_REQUIRED_VERSION 4010
#define NPP_MINIMUM_REQUIRED_VERSION 4100
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it."

View File

@ -69,10 +69,10 @@ PARAM_TEST_CASE(ArithmTestBase, cv::gpu::DeviceInfo, MatType, UseRoi)
size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
mat1 = randomMat(rng, size, type, 1, 16, false);
mat2 = randomMat(rng, size, type, 1, 16, false);
mat1 = randomMat(rng, size, type, 5, 16, false);
mat2 = randomMat(rng, size, type, 5, 16, false);
val = cv::Scalar(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
val = cv::Scalar(rng.uniform(1, 3), rng.uniform(1, 3), rng.uniform(1, 3), rng.uniform(1, 3));
}
};
@ -115,7 +115,8 @@ TEST_P(Add, Scalar)
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
ALL_DEVICES,
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
CV_32SC1, CV_32SC2, CV_32SC3, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4),
USE_ROI));
////////////////////////////////////////////////////////////////////////////////
@ -157,7 +158,8 @@ TEST_P(Subtract, Scalar)
INSTANTIATE_TEST_CASE_P(Arithm, Subtract, Combine(
ALL_DEVICES,
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
CV_32SC1, CV_32SC2, CV_32SC3, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4),
USE_ROI));
////////////////////////////////////////////////////////////////////////////////
@ -199,7 +201,8 @@ TEST_P(Multiply, Scalar)
INSTANTIATE_TEST_CASE_P(Arithm, Multiply, Combine(
ALL_DEVICES,
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC3, CV_16SC4,
CV_32SC1, CV_32SC3, CV_32FC1, CV_32FC3, CV_32FC4),
USE_ROI));
////////////////////////////////////////////////////////////////////////////////
@ -220,7 +223,7 @@ TEST_P(Divide, Array)
gpuRes.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
EXPECT_MAT_NEAR(dst_gold, dst, mat1.depth() == CV_32F ? 1e-5 : 1);
}
TEST_P(Divide, Scalar)
@ -236,12 +239,13 @@ TEST_P(Divide, Scalar)
gpuRes.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
EXPECT_MAT_NEAR(dst_gold, dst, mat1.depth() == CV_32F ? 1e-5 : 1);
}
INSTANTIATE_TEST_CASE_P(Arithm, Divide, Combine(
ALL_DEVICES,
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC3, CV_16SC4,
CV_32SC1, CV_32SC3, CV_32FC1, CV_32FC3, CV_32FC4),
USE_ROI));
////////////////////////////////////////////////////////////////////////////////
@ -312,6 +316,83 @@ INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
USE_ROI));
////////////////////////////////////////////////////////////////////////////////
// abs
struct Abs : ArithmTestBase {};
TEST_P(Abs, Array)
{
cv::Mat dst_gold = cv::abs(mat1);
cv::Mat dst;
cv::gpu::GpuMat gpuRes;
cv::gpu::abs(loadMat(mat1, useRoi), gpuRes);
gpuRes.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(Arithm, Abs, Combine(
ALL_DEVICES,
Values(CV_16SC1, CV_32FC1),
USE_ROI));
////////////////////////////////////////////////////////////////////////////////
// Sqr
struct Sqr : ArithmTestBase {};
TEST_P(Sqr, Array)
{
cv::Mat dst_gold;
cv::multiply(mat1, mat1, dst_gold);
cv::Mat dst;
cv::gpu::GpuMat gpuRes;
cv::gpu::sqr(loadMat(mat1, useRoi), gpuRes);
gpuRes.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(Arithm, Sqr, Combine(
ALL_DEVICES,
Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1),
USE_ROI));
////////////////////////////////////////////////////////////////////////////////
// Sqrt
struct Sqrt : ArithmTestBase {};
TEST_P(Sqrt, Array)
{
cv::Mat dst_gold;
cv::sqrt(mat1, dst_gold);
cv::Mat dst;
cv::gpu::GpuMat gpuRes;
cv::gpu::sqrt(loadMat(mat1, useRoi), gpuRes);
gpuRes.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(
ALL_DEVICES,
Values(MatType(CV_32FC1)),
USE_ROI));
////////////////////////////////////////////////////////////////////////////////
// compare
@ -513,7 +594,7 @@ TEST_P(Flip, Accuracy)
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
ALL_DEVICES,
Values(CV_8UC1, CV_8UC4),
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((int)FLIP_BOTH, (int)FLIP_X, (int)FLIP_Y),
USE_ROI));
@ -1329,6 +1410,90 @@ INSTANTIATE_TEST_CASE_P(Arithm, Bitwise, Combine(
ALL_DEVICES,
ALL_TYPES));
PARAM_TEST_CASE(BitwiseScalar, cv::gpu::DeviceInfo, MatType)
{
cv::gpu::DeviceInfo devInfo;
int type;
cv::Size size;
cv::Mat mat;
cv::Scalar sc;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
type = GET_PARAM(1);
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = cvtest::TS::ptr()->get_rng();
size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
mat.create(size, type);
for (int i = 0; i < mat.rows; ++i)
{
cv::Mat row(1, static_cast<int>(mat.cols * mat.elemSize()), CV_8U, (void*)mat.ptr(i));
rng.fill(row, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
}
sc = cv::Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
}
};
TEST_P(BitwiseScalar, Or)
{
cv::Mat dst_gold;
cv::bitwise_or(mat, sc, dst_gold);
cv::Mat dst;
cv::gpu::GpuMat dev_dst;
cv::gpu::bitwise_or(loadMat(mat), sc, dev_dst);
dev_dst.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST_P(BitwiseScalar, And)
{
cv::Mat dst_gold;
cv::bitwise_and(mat, sc, dst_gold);
cv::Mat dst;
cv::gpu::GpuMat dev_dst;
cv::gpu::bitwise_and(loadMat(mat), sc, dev_dst);
dev_dst.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST_P(BitwiseScalar, Xor)
{
cv::Mat dst_gold;
cv::bitwise_xor(mat, sc, dst_gold);
cv::Mat dst;
cv::gpu::GpuMat dev_dst;
cv::gpu::bitwise_xor(loadMat(mat), sc, dev_dst);
dev_dst.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalar, Combine(
ALL_DEVICES,
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
//////////////////////////////////////////////////////////////////////////////
// addWeighted

View File

@ -2377,6 +2377,49 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, Combine(
Values(CV_8U, CV_16U, CV_32F),
USE_ROI));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// swapChannels
PARAM_TEST_CASE(SwapChannels, cv::gpu::DeviceInfo, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
bool useRoi;
cv::Mat img;
cv::Mat dst_gold;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
useRoi = GET_PARAM(1);
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat imgBase = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(imgBase.empty());
cv::cvtColor(imgBase, img, cv::COLOR_BGR2BGRA);
cv::cvtColor(img, dst_gold, cv::COLOR_BGRA2RGBA);
}
};
TEST_P(SwapChannels, Accuracy)
{
cv::gpu::GpuMat gpuImage = loadMat(img, useRoi);
const int dstOrder[] = {2, 1, 0, 3};
cv::gpu::swapChannels(gpuImage, dstOrder);
cv::Mat dst;
gpuImage.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, Combine(ALL_DEVICES, USE_ROI));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// histograms

View File

@ -136,7 +136,7 @@ int main(int argc, char** argv)
#else // HAVE_CUDA
int main(int argc, char** argv)
int main()
{
printf("OpenCV was built without CUDA support\n");
return 0;

View File

@ -146,7 +146,6 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id)
default:
return -1;
}
return -1;
}
void cv::namedWindow( const string& winname, int flags )

View File

@ -105,6 +105,8 @@ public:
GraphCutSeamFinder(int cost_type = COST_COLOR_GRAD, float terminal_cost = 10000.f,
float bad_region_penalty = 1000.f);
~GraphCutSeamFinder();
void find(const std::vector<Mat> &src, const std::vector<Point> &corners,
std::vector<Mat> &masks);

View File

@ -411,6 +411,8 @@ void GraphCutSeamFinder::Impl::findInPair(size_t first, size_t second, Rect roi)
GraphCutSeamFinder::GraphCutSeamFinder(int cost_type, float terminal_cost, float bad_region_penalty)
: impl_(new Impl(cost_type, terminal_cost, bad_region_penalty)) {}
GraphCutSeamFinder::~GraphCutSeamFinder() {}
void GraphCutSeamFinder::find(const vector<Mat> &src, const vector<Point> &corners,
vector<Mat> &masks)

View File

@ -247,7 +247,7 @@ inline int clamp(int val, int minVal, int maxVal)
return max(min(val, maxVal), minVal);
}
void PointCloudRenderer::onMouseEvent(int event, int x, int y, int flags)
void PointCloudRenderer::onMouseEvent(int event, int x, int y, int /*flags*/)
{
static int oldx = x;
static int oldy = y;

View File

@ -0,0 +1,68 @@
#include <iostream>
#include "opencv2/core/opengl_interop.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
int main()
{
cout << "This program demonstrates using alphaComp" << endl;
cout << "Press SPACE to change compositing operation" << endl;
cout << "Press ESC to exit" << endl;
namedWindow("First Image", WINDOW_NORMAL);
namedWindow("Second Image", WINDOW_NORMAL);
namedWindow("Result", WINDOW_OPENGL);
setGlDevice();
Mat src1(640, 480, CV_8UC4, Scalar::all(0));
Mat src2(640, 480, CV_8UC4, Scalar::all(0));
rectangle(src1, Rect(50, 50, 200, 200), Scalar(0, 0, 255, 128), 30);
rectangle(src2, Rect(100, 100, 200, 200), Scalar(255, 0, 0, 128), 30);
GpuMat d_src1(src1);
GpuMat d_src2(src2);
GpuMat d_res;
imshow("First Image", src1);
imshow("Second Image", src2);
int alpha_op = ALPHA_OVER;
const char* op_names[] =
{
"ALPHA_OVER", "ALPHA_IN", "ALPHA_OUT", "ALPHA_ATOP", "ALPHA_XOR", "ALPHA_PLUS", "ALPHA_OVER_PREMUL", "ALPHA_IN_PREMUL", "ALPHA_OUT_PREMUL",
"ALPHA_ATOP_PREMUL", "ALPHA_XOR_PREMUL", "ALPHA_PLUS_PREMUL", "ALPHA_PREMUL"
};
while (true)
{
cout << op_names[alpha_op] << endl;
alphaComp(d_src1, d_src2, d_res, alpha_op);
imshow("Result", d_res);
char key = static_cast<char>(waitKey());
if (key == 27)
break;
if (key == 32)
{
++alpha_op;
if (alpha_op > ALPHA_PREMUL)
alpha_op = ALPHA_OVER;
}
}
return 0;
}

View File

@ -226,7 +226,7 @@ int main(int argc, const char* argv[])
break;
case 'S':
if (currentFrame < frames.size() - 1)
if (currentFrame < static_cast<int>(frames.size()) - 1)
++currentFrame;
imshow("Interpolated frame", frames[currentFrame]);

View File

@ -26,7 +26,7 @@ void colorizeFlow(const Mat &u, const Mat &v, Mat &dst)
minMaxLoc(v, &vMin, &vMax, 0, 0);
uMin = ::abs(uMin); uMax = ::abs(uMax);
vMin = ::abs(vMin); vMax = ::abs(vMax);
float dMax = ::max(::max(uMin, uMax), ::max(vMin, vMax));
float dMax = static_cast<float>(::max(::max(uMin, uMax), ::max(vMin, vMax)));
dst.create(u.size(), CV_8UC3);
for (int y = 0; y < u.rows; ++y)
@ -111,11 +111,11 @@ int main(int argc, char **argv)
s.str("");
s << "opt. flow FPS: " << cvRound((getTickFrequency()/(tc1-tc0)));
putText(image, s.str(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2.);
putText(image, s.str(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2);
s.str("");
s << "total FPS: " << cvRound((getTickFrequency()/(t1-t0)));
putText(image, s.str(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2.);
putText(image, s.str(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2);
imshow("flow", image);

View File

@ -63,7 +63,7 @@ void TestSystem::finishCurrentSubtest()
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double speedup = static_cast<double>(cpu_elapsed_) / std::max((int64)1, gpu_elapsed_);
double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
speedup_total_ += speedup;
printMetrics(cpu_time, gpu_time, speedup);

View File

@ -127,8 +127,10 @@ private:
std::stringstream cur_subtest_description_;
bool cur_subtest_is_empty_;
int64 cpu_started_, cpu_elapsed_;
int64 gpu_started_, gpu_elapsed_;
int64 cpu_started_;
int64 gpu_started_;
double cpu_elapsed_;
double gpu_elapsed_;
double speedup_total_;
int num_subtests_called_;

View File

@ -1199,7 +1199,7 @@ TEST(FarnebackOpticalFlow)
if (frame1.empty()) throw runtime_error("can't open " + datasets[i] + "2.png");
gpu::FarnebackOpticalFlow calc;
calc.fastPyramids = fastPyramids;
calc.fastPyramids = fastPyramids != 0;
calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;