mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 19:20:28 +08:00
created wrappers for new NPP functions
removed void integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& stream) - it fails with NPP_NOT_IMPLEMENTED error updated docs, accuracy and performance tests
This commit is contained in:
parent
e426dfc396
commit
2d30480982
@ -48,6 +48,17 @@
|
||||
#ifdef HAVE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <npp.h>
|
||||
|
||||
#define CUDART_MINIMUM_REQUIRED_VERSION 4010
|
||||
#define NPP_MINIMUM_REQUIRED_VERSION 4100
|
||||
|
||||
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
||||
#error "Insufficient Cuda Runtime library version, please update it."
|
||||
#endif
|
||||
|
||||
#if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
|
||||
#error "Insufficient NPP version, please update it."
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
@ -460,15 +471,17 @@ namespace cv { namespace gpu
|
||||
|
||||
namespace
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Convert
|
||||
|
||||
template<int n> struct NPPTypeTraits;
|
||||
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Convert
|
||||
|
||||
template<int SDEPTH, int DDEPTH> struct NppConvertFunc
|
||||
{
|
||||
@ -494,6 +507,7 @@ namespace
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@ -508,6 +522,7 @@ namespace
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@ -529,6 +544,14 @@ namespace
|
||||
|
||||
typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
|
||||
};
|
||||
template<int SCN> struct NppSetFunc<CV_8S, SCN>
|
||||
{
|
||||
typedef NppStatus (*func_ptr)(Npp8s values[], Npp8s* pSrc, int nSrcStep, NppiSize oSizeROI);
|
||||
};
|
||||
template<> struct NppSetFunc<CV_8S, 1>
|
||||
{
|
||||
typedef NppStatus (*func_ptr)(Npp8s val, Npp8s* pSrc, int nSrcStep, NppiSize oSizeROI);
|
||||
};
|
||||
|
||||
template<int SDEPTH, int SCN, typename NppSetFunc<SDEPTH, SCN>::func_ptr func> struct NppSet
|
||||
{
|
||||
@ -613,6 +636,35 @@ namespace
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// CopyMasked
|
||||
|
||||
template<int SDEPTH> struct NppCopyMaskedFunc
|
||||
{
|
||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||
|
||||
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, src_t* pDst, int nDstStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
|
||||
};
|
||||
|
||||
template<int SDEPTH, typename NppCopyMaskedFunc<SDEPTH>::func_ptr func> struct NppCopyMasked
|
||||
{
|
||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||
|
||||
static void copyMasked(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t /*stream*/)
|
||||
{
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// CudaFuncTable
|
||||
|
||||
class CudaFuncTable : public GpuFuncTable
|
||||
{
|
||||
public:
|
||||
@ -631,7 +683,26 @@ namespace
|
||||
|
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
|
||||
{
|
||||
::cv::gpu::copyWithMask(src, dst, mask);
|
||||
CV_Assert(src.size() == dst.size() && src.type() == dst.type());
|
||||
CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()));
|
||||
|
||||
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[7][4] =
|
||||
{
|
||||
/* 8U */ {NppCopyMasked<CV_8U, nppiCopy_8u_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_8U, nppiCopy_8u_C3MR>::copyMasked, NppCopyMasked<CV_8U, nppiCopy_8u_C4MR>::copyMasked},
|
||||
/* 8S */ {cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask},
|
||||
/* 16U */ {NppCopyMasked<CV_16U, nppiCopy_16u_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_16U, nppiCopy_16u_C3MR>::copyMasked, NppCopyMasked<CV_16U, nppiCopy_16u_C4MR>::copyMasked},
|
||||
/* 16S */ {NppCopyMasked<CV_16S, nppiCopy_16s_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_16S, nppiCopy_16s_C3MR>::copyMasked, NppCopyMasked<CV_16S, nppiCopy_16s_C4MR>::copyMasked},
|
||||
/* 32S */ {NppCopyMasked<CV_32S, nppiCopy_32s_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_32S, nppiCopy_32s_C3MR>::copyMasked, NppCopyMasked<CV_32S, nppiCopy_32s_C4MR>::copyMasked},
|
||||
/* 32F */ {NppCopyMasked<CV_32F, nppiCopy_32f_C1MR>::copyMasked, cv::gpu::copyWithMask, NppCopyMasked<CV_32F, nppiCopy_32f_C3MR>::copyMasked, NppCopyMasked<CV_32F, nppiCopy_32f_C4MR>::copyMasked},
|
||||
/* 64F */ {cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask, cv::gpu::copyWithMask}
|
||||
};
|
||||
|
||||
caller_t func = mask.channels() == src.channels() ? callers[src.depth()][src.channels()] : cv::gpu::copyWithMask;
|
||||
CV_DbgAssert(func != 0);
|
||||
|
||||
func(src, dst, mask, 0);
|
||||
}
|
||||
|
||||
void convert(const GpuMat& src, GpuMat& dst) const
|
||||
@ -641,65 +712,65 @@ namespace
|
||||
{
|
||||
{
|
||||
/* 8U -> 8U */ {0, 0, 0, 0},
|
||||
/* 8U -> 8S */ {::cv::gpu::convertTo, ::cv::gpu::convertTo, ::cv::gpu::convertTo, ::cv::gpu::convertTo},
|
||||
/* 8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},
|
||||
/* 8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},
|
||||
/* 8U -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 8U -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
|
||||
/* 8U -> 8S */ {cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo, cv::gpu::convertTo},
|
||||
/* 8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},
|
||||
/* 8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},
|
||||
/* 8U -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 8U -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
|
||||
},
|
||||
{
|
||||
/* 8S -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 8S -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 8S -> 8S */ {0,0,0,0},
|
||||
/* 8S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 8S -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 8S -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 8S -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 8S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
|
||||
/* 8S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 8S -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 8S -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 8S -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 8S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
|
||||
},
|
||||
{
|
||||
/* 16U -> 8U */ {NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},
|
||||
/* 16U -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 16U -> 8U */ {NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},
|
||||
/* 16U -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 16U -> 16U */ {0,0,0,0},
|
||||
/* 16U -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 16U -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
|
||||
/* 16U -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 16U -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
|
||||
},
|
||||
{
|
||||
/* 16S -> 8U */ {NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},
|
||||
/* 16S -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 16S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 16S -> 8U */ {NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},
|
||||
/* 16S -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 16S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 16S -> 16S */ {0,0,0,0},
|
||||
/* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 16S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
|
||||
/* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 16S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
|
||||
},
|
||||
{
|
||||
/* 32S -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32S -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32S -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32S -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32S -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32S -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32S -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32S -> 32S */ {0,0,0,0},
|
||||
/* 32S -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
|
||||
/* 32S -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32S -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
|
||||
},
|
||||
{
|
||||
/* 32F -> 8U */ {NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32F -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32F -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 32F -> 8U */ {NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32F -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32F -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 32F -> 32F */ {0,0,0,0},
|
||||
/* 32F -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}
|
||||
/* 32F -> 64F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo}
|
||||
},
|
||||
{
|
||||
/* 64F -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 64F -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 64F -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 64F -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 64F -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 64F -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},
|
||||
/* 64F -> 8U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 64F -> 8S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 64F -> 16U */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 64F -> 16S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 64F -> 32S */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 64F -> 32F */ {cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo,cv::gpu::convertTo},
|
||||
/* 64F -> 64F */ {0,0,0,0}
|
||||
}
|
||||
};
|
||||
@ -712,7 +783,7 @@ namespace
|
||||
|
||||
void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const
|
||||
{
|
||||
::cv::gpu::convertTo(src, dst, alpha, beta);
|
||||
cv::gpu::convertTo(src, dst, alpha, beta);
|
||||
}
|
||||
|
||||
void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
|
||||
@ -744,13 +815,13 @@ namespace
|
||||
typedef void (*caller_t)(GpuMat& src, Scalar s);
|
||||
static const caller_t callers[7][4] =
|
||||
{
|
||||
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
|
||||
{::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo},
|
||||
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set, NppSet<CV_16U, 2, nppiSet_16u_C2R>::set, ::cv::gpu::setTo, NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
|
||||
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set, NppSet<CV_16S, 2, nppiSet_16s_C2R>::set, ::cv::gpu::setTo, NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
|
||||
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
|
||||
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
|
||||
{::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo}
|
||||
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set, cv::gpu::setTo, cv::gpu::setTo, NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
|
||||
{NppSet<CV_8S, 1, nppiSet_8s_C1R>::set, NppSet<CV_8S, 2, nppiSet_8s_C2R>::set, NppSet<CV_8S, 3, nppiSet_8s_C3R>::set, NppSet<CV_8S, 4, nppiSet_8s_C4R>::set},
|
||||
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set, NppSet<CV_16U, 2, nppiSet_16u_C2R>::set, cv::gpu::setTo, NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
|
||||
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set, NppSet<CV_16S, 2, nppiSet_16s_C2R>::set, cv::gpu::setTo, NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
|
||||
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set, cv::gpu::setTo, cv::gpu::setTo, NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
|
||||
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set, cv::gpu::setTo, cv::gpu::setTo, NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
|
||||
{cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo}
|
||||
};
|
||||
|
||||
callers[m.depth()][m.channels() - 1](m, s);
|
||||
@ -761,13 +832,13 @@ namespace
|
||||
|
||||
static const caller_t callers[7][4] =
|
||||
{
|
||||
{NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},
|
||||
{::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo},
|
||||
{NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},
|
||||
{NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},
|
||||
{NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},
|
||||
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},
|
||||
{::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo}
|
||||
{NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},
|
||||
{cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo},
|
||||
{NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},
|
||||
{NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},
|
||||
{NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},
|
||||
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set, cv::gpu::setTo, cv::gpu::setTo, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},
|
||||
{cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo, cv::gpu::setTo}
|
||||
};
|
||||
|
||||
callers[m.depth()][m.channels() - 1](m, s, mask);
|
||||
|
@ -69,18 +69,14 @@ Performs a mean-shift segmentation of the source image and eliminates small segm
|
||||
|
||||
gpu::integral
|
||||
-----------------
|
||||
Computes an integral image and a squared integral image.
|
||||
Computes an integral image.
|
||||
|
||||
.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null())
|
||||
|
||||
.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source image. Only ``CV_8UC1`` images are supported for now.
|
||||
|
||||
:param sum: Integral image containing 32-bit unsigned integer values packed into ``CV_32SC1`` .
|
||||
|
||||
:param sqsum: Squared integral image of the ``CV_32FC1`` type.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`integral`
|
||||
@ -380,6 +376,22 @@ Converts an image from one color space to another.
|
||||
|
||||
|
||||
|
||||
gpu::swapChannels
|
||||
-----------------
|
||||
Exchanges the color channels of an image in-place.
|
||||
|
||||
.. ocv:function:: void gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source image. Supports only ``CV_8UC4`` type.
|
||||
|
||||
:param dstOrder: Integer array describing how channel values are permutated. The n-th entry of the array contains the number of the channel that is stored in the n-th channel of the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR channel order.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
The methods support arbitrary permutations of the original channels, including replication.
|
||||
|
||||
|
||||
|
||||
gpu::threshold
|
||||
------------------
|
||||
Applies a fixed-level threshold to each array element.
|
||||
@ -489,7 +501,7 @@ Rotates an image around the origin (0,0) and then shifts it.
|
||||
|
||||
.. ocv:function:: void gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source image. ``CV_8UC1`` and ``CV_8UC4`` types are supported.
|
||||
:param src: Source image. Supports 1, 3 or 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32F`` depth.
|
||||
|
||||
:param dst: Destination image with the same type as ``src`` . The size is ``dsize`` .
|
||||
|
||||
@ -751,6 +763,38 @@ Performs linear blending of two images.
|
||||
|
||||
|
||||
|
||||
gpu::alphaComp
|
||||
-------------------
|
||||
Composites two images using alpha opacity values contained in each image.
|
||||
|
||||
.. ocv:function:: void gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null())
|
||||
|
||||
:param img1: First image. Supports ``CV_8UC4`` , ``CV_16UC4`` , ``CV_32SC4`` and ``CV_32FC4`` types.
|
||||
|
||||
:param img1: Second image. Must have the same size and the same type as ``img1`` .
|
||||
|
||||
:param dst: Destination image.
|
||||
|
||||
:param alpha_op: Flag specifying the alpha-blending operation:
|
||||
|
||||
* **ALPHA_OVER**
|
||||
* **ALPHA_IN**
|
||||
* **ALPHA_OUT**
|
||||
* **ALPHA_ATOP**
|
||||
* **ALPHA_XOR**
|
||||
* **ALPHA_PLUS**
|
||||
* **ALPHA_OVER_PREMUL**
|
||||
* **ALPHA_IN_PREMUL**
|
||||
* **ALPHA_OUT_PREMUL**
|
||||
* **ALPHA_ATOP_PREMUL**
|
||||
* **ALPHA_XOR_PREMUL**
|
||||
* **ALPHA_PLUS_PREMUL**
|
||||
* **ALPHA_PREMUL**
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::Canny
|
||||
-------------------
|
||||
Finds edges in an image using the [Canny86]_ algorithm.
|
||||
|
@ -10,6 +10,7 @@ gpu::meanStdDev
|
||||
Computes a mean value and a standard deviation of matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev)
|
||||
.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
|
||||
|
||||
:param mtx: Source matrix. ``CV_8UC1`` matrices are supported for now.
|
||||
|
||||
@ -17,6 +18,8 @@ Computes a mean value and a standard deviation of matrix elements.
|
||||
|
||||
:param stddev: Standard deviation value.
|
||||
|
||||
:param buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
.. seealso:: :ocv:func:`meanStdDev`
|
||||
|
||||
|
||||
|
@ -63,7 +63,7 @@ Flips a 2D matrix around vertical, horizontal, or both axes.
|
||||
|
||||
.. ocv:function:: void gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. Only ``CV_8UC1`` and ``CV_8UC4`` matrices are supported for now.
|
||||
:param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth.
|
||||
|
||||
:param dst: Destination matrix.
|
||||
|
||||
|
@ -139,6 +139,50 @@ where ``I`` is a multi-dimensional index of array elements. In case of multi-cha
|
||||
|
||||
|
||||
|
||||
gpu::abs
|
||||
------------
|
||||
Computes an absolute value of each matrix element.
|
||||
|
||||
.. ocv:function:: void gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. Supports ``CV_16S`` and ``CV_32F`` depth.
|
||||
|
||||
:param dst: Destination matrix with the same size and type as ``src`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`abs`
|
||||
|
||||
|
||||
|
||||
gpu::sqr
|
||||
------------
|
||||
Computes a square value of each matrix element.
|
||||
|
||||
.. ocv:function:: void gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
|
||||
|
||||
:param dst: Destination matrix with the same size and type as ``src`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::sqrt
|
||||
------------
|
||||
Computes a square root of each matrix element.
|
||||
|
||||
.. ocv:function:: void gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
|
||||
|
||||
:param dst: Destination matrix with the same size and type as ``src`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`sqrt`
|
||||
|
||||
|
||||
|
||||
gpu::exp
|
||||
@ -147,7 +191,7 @@ Computes an exponent of each matrix element.
|
||||
|
||||
.. ocv:function:: void gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. ``CV_32FC1`` matrixes are supported for now.
|
||||
:param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
|
||||
|
||||
:param dst: Destination matrix with the same size and type as ``src`` .
|
||||
|
||||
@ -157,6 +201,22 @@ Computes an exponent of each matrix element.
|
||||
|
||||
|
||||
|
||||
gpu::log
|
||||
------------
|
||||
Computes a natural logarithm of absolute value of each matrix element.
|
||||
|
||||
.. ocv:function:: void gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth.
|
||||
|
||||
:param dst: Destination matrix with the same size and type as ``src`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`log`
|
||||
|
||||
|
||||
|
||||
gpu::pow
|
||||
------------
|
||||
Raises every matrix element to a power.
|
||||
@ -181,22 +241,6 @@ The function ``pow`` raises every element of the input matrix to ``p`` :
|
||||
|
||||
|
||||
|
||||
gpu::log
|
||||
------------
|
||||
Computes a natural logarithm of absolute value of each matrix element.
|
||||
|
||||
.. ocv:function:: void gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. ``CV_32FC1`` matrixes are supported for now.
|
||||
|
||||
:param dst: Destination matrix with the same size and type as ``src`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`log`
|
||||
|
||||
|
||||
|
||||
gpu::absdiff
|
||||
----------------
|
||||
Computes per-element absolute difference of two matrices (or of a matrix and scalar).
|
||||
@ -262,9 +306,10 @@ Performs a per-element bitwise inversion.
|
||||
|
||||
gpu::bitwise_or
|
||||
-------------------
|
||||
Performs a per-element bitwise disjunction of two matrices.
|
||||
Performs a per-element bitwise disjunction of two matrices or of matrix and scalar.
|
||||
|
||||
.. ocv:function:: void gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
|
||||
.. ocv:function:: void gpu::bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src1: First source matrix.
|
||||
|
||||
@ -280,9 +325,10 @@ Performs a per-element bitwise disjunction of two matrices.
|
||||
|
||||
gpu::bitwise_and
|
||||
--------------------
|
||||
Performs a per-element bitwise conjunction of two matrices.
|
||||
Performs a per-element bitwise conjunction of two matrices or of matrix and scalar.
|
||||
|
||||
.. ocv:function:: void gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
|
||||
.. ocv:function:: void gpu::bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src1: First source matrix.
|
||||
|
||||
@ -298,9 +344,10 @@ Performs a per-element bitwise conjunction of two matrices.
|
||||
|
||||
gpu::bitwise_xor
|
||||
--------------------
|
||||
Performs a per-element bitwise ``exclusive or`` operation of two matrices.
|
||||
Performs a per-element bitwise ``exclusive or`` operation of two matrices of matrix and scalar.
|
||||
|
||||
.. ocv:function:: void gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null())
|
||||
.. ocv:function:: void gpu::bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src1: First source matrix.
|
||||
|
||||
@ -314,6 +361,38 @@ Performs a per-element bitwise ``exclusive or`` operation of two matrices.
|
||||
|
||||
|
||||
|
||||
gpu::rshift
|
||||
--------------------
|
||||
Performs pixel by pixel right shift of an image by a constant value.
|
||||
|
||||
.. ocv:function:: void gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. Supports 1, 3 and 4 channels images with integers elements.
|
||||
|
||||
:param sc: Constant values, one per channel.
|
||||
|
||||
:param dst: Destination matrix with the same size and type as ``src`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::lshift
|
||||
--------------------
|
||||
Performs pixel by pixel right left of an image by a constant value.
|
||||
|
||||
.. ocv:function:: void gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32S`` depth.
|
||||
|
||||
:param sc: Constant values, one per channel.
|
||||
|
||||
:param dst: Destination matrix with the same size and type as ``src`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::min
|
||||
------------
|
||||
Computes the per-element minimum of two matrices (or a matrix and a scalar).
|
||||
|
@ -498,7 +498,7 @@ CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
|
||||
CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! reverses the order of the rows, columns or both in a matrix
|
||||
//! supports CV_8UC1, CV_8UC4 types
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth
|
||||
CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null());
|
||||
|
||||
//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
|
||||
@ -586,20 +586,32 @@ CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& str
|
||||
//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
|
||||
CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes absolute value of each matrix element
|
||||
//! supports CV_16S and CV_32F depth
|
||||
CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes square of each pixel in an image
|
||||
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
|
||||
CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes square root of each pixel in an image
|
||||
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
|
||||
CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes exponent of each matrix element (b = e**a)
|
||||
//! supports only CV_32FC1 type
|
||||
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
|
||||
CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
|
||||
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
|
||||
CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes power of each matrix element:
|
||||
// (dst(i,j) = pow( src(i,j) , power), if src.type() is integer
|
||||
// (dst(i,j) = pow(fabs(src(i,j)), power), otherwise
|
||||
//! supports all, except depth == CV_64F
|
||||
CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
|
||||
//! supports only CV_32FC1 type
|
||||
CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
|
||||
|
||||
//! compares elements of two arrays (c = a <cmpop> b)
|
||||
CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
|
||||
|
||||
@ -608,12 +620,29 @@ CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=G
|
||||
|
||||
//! calculates per-element bit-wise disjunction of two arrays
|
||||
CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
|
||||
//! calculates per-element bit-wise disjunction of array and scalar
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! calculates per-element bit-wise conjunction of two arrays
|
||||
CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
|
||||
//! calculates per-element bit-wise conjunction of array and scalar
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! calculates per-element bit-wise "exclusive or" operation
|
||||
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
|
||||
//! calculates per-element bit-wise "exclusive or" of array and scalar
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! pixel by pixel right shift of an image by a constant value
|
||||
//! supports 1, 3 and 4 channels images with integers elements
|
||||
CV_EXPORTS void rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! pixel by pixel left shift of an image by a constant value
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes per-element minimum of two arrays (dst = min(src1, src2))
|
||||
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
@ -627,6 +656,13 @@ CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream&
|
||||
//! computes per-element maximum of array and scalar (dst = max(src1, src2))
|
||||
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
|
||||
ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
|
||||
|
||||
//! Composite two images using alpha opacity values contained in each image
|
||||
//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
|
||||
CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
|
||||
|
||||
|
||||
////////////////////////////// Image processing //////////////////////////////
|
||||
|
||||
@ -665,6 +701,13 @@ CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat&
|
||||
//! converts image from one color space to another
|
||||
CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
|
||||
|
||||
//! swap channels
|
||||
//! dstOrder - Integer array describing how channel values are permutated. The n-th entry
|
||||
//! of the array contains the number of the channel that is stored in the n-th channel of
|
||||
//! the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR
|
||||
//! channel order.
|
||||
CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null());
|
||||
|
||||
//! applies fixed threshold to the image
|
||||
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
|
||||
|
||||
@ -692,9 +735,9 @@ CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat
|
||||
CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
||||
GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
|
||||
|
||||
//! rotate 8bit single or four channel image
|
||||
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
|
||||
//! supports CV_8UC1, CV_8UC4 types
|
||||
//! rotates an image around the origin (0,0) and then shifts it
|
||||
//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
|
||||
//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
|
||||
CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
|
||||
int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
|
||||
|
||||
@ -706,15 +749,9 @@ CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bott
|
||||
//! sum will have CV_32S type, but will contain unsigned int values
|
||||
//! supports only CV_8UC1 source type
|
||||
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null());
|
||||
|
||||
//! buffered version
|
||||
CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes the integral image and integral for the squared image
|
||||
//! sum will have CV_32S type, sqsum - CV32F type
|
||||
//! supports only CV_8UC1 source type
|
||||
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes squared integral image
|
||||
//! result matrix will have 64F type, but will contain 64U values
|
||||
//! supports source images of 8UC1 type only
|
||||
@ -859,6 +896,8 @@ private:
|
||||
//! computes mean value and standard deviation of all or selected array elements
|
||||
//! supports only CV_8UC1 type
|
||||
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
|
||||
//! buffered version
|
||||
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
|
||||
|
||||
//! computes norm of array
|
||||
//! supports NORM_INF, NORM_L1, NORM_L2
|
||||
@ -939,10 +978,16 @@ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& c
|
||||
|
||||
//////////////////////////////// Image Labeling ////////////////////////////////
|
||||
|
||||
//!performs labeling via graph cuts
|
||||
//!performs labeling via graph cuts of a 2D regular 4-connected graph.
|
||||
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
|
||||
GpuMat& buf, Stream& stream = Stream::Null());
|
||||
|
||||
//!performs labeling via graph cuts of a 2D regular 8-connected graph.
|
||||
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
|
||||
GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight,
|
||||
GpuMat& labels,
|
||||
GpuMat& buf, Stream& stream = Stream::Null());
|
||||
|
||||
////////////////////////////////// Histograms //////////////////////////////////
|
||||
|
||||
//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
|
||||
|
@ -59,7 +59,7 @@ GPU_PERF_TEST(Flip, cv::gpu::DeviceInfo, cv::Size, perf::MatType, FlipCode)
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Flip, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
GPU_TYPICAL_MAT_SIZES,
|
||||
testing::Values(CV_8UC1, CV_8UC4),
|
||||
testing::Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||
testing::Values((int) HORIZONTAL_AXIS, (int) VERTICAL_AXIS, (int) BOTH_AXIS)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
@ -363,6 +363,33 @@ INSTANTIATE_TEST_CASE_P(Arithm, BitwiseAnd, testing::Combine(
|
||||
GPU_TYPICAL_MAT_SIZES,
|
||||
testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
|
||||
|
||||
GPU_PERF_TEST(BitwiseScalarAnd, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
|
||||
cv::Size size = GET_PARAM(1);
|
||||
int type = GET_PARAM(2);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat src_host(size, type);
|
||||
|
||||
declare.in(src_host, WARMUP_RNG);
|
||||
|
||||
cv::gpu::GpuMat src(src_host);
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::bitwise_and(src, sc, dst);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarAnd, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
GPU_TYPICAL_MAT_SIZES,
|
||||
testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Min
|
||||
|
||||
@ -411,10 +438,11 @@ GPU_PERF_TEST(MeanStdDev, cv::gpu::DeviceInfo, cv::Size)
|
||||
cv::gpu::GpuMat src(src_host);
|
||||
cv::Scalar mean;
|
||||
cv::Scalar stddev;
|
||||
cv::gpu::GpuMat buf;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::meanStdDev(src, mean, stddev);
|
||||
cv::gpu::meanStdDev(src, mean, stddev, buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,7 @@ GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, perf::MatType, CvtColorIn
|
||||
declare.in(src_host, WARMUP_RNG);
|
||||
|
||||
cv::gpu::GpuMat src(src_host);
|
||||
cv::gpu::GpuMat dst(size, CV_MAKETYPE(type, info.dcn));
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
@ -218,6 +218,32 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
|
||||
CvtColorInfo(4, 4, cv::COLOR_BGR2XYZ), CvtColorInfo(4, 4, cv::COLOR_BGR2YCrCb), CvtColorInfo(4, 4, cv::COLOR_YCrCb2BGR),
|
||||
CvtColorInfo(4, 4, cv::COLOR_BGR2HSV), CvtColorInfo(4, 4, cv::COLOR_HSV2BGR))));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SwapChannels
|
||||
|
||||
GPU_PERF_TEST(SwapChannels, cv::gpu::DeviceInfo, cv::Size)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
|
||||
cv::Size size = GET_PARAM(1);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat src_host(size, CV_8UC4);
|
||||
|
||||
declare.in(src_host, WARMUP_RNG);
|
||||
|
||||
cv::gpu::GpuMat src(src_host);
|
||||
|
||||
const int dstOrder[] = {2, 1, 0, 3};
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::swapChannels(src, dstOrder);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, testing::Combine(ALL_DEVICES, GPU_TYPICAL_MAT_SIZES));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Threshold
|
||||
|
||||
@ -457,7 +483,7 @@ GPU_PERF_TEST(Rotate, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolatio
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, Rotate, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
GPU_TYPICAL_MAT_SIZES,
|
||||
testing::Values(CV_8UC1, CV_8UC4),
|
||||
testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
@ -519,33 +545,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
GPU_TYPICAL_MAT_SIZES));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// IntegralBoth
|
||||
|
||||
GPU_PERF_TEST(IntegralBoth, cv::gpu::DeviceInfo, cv::Size)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
|
||||
cv::Size size = GET_PARAM(1);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat src_host(size, CV_8UC1);
|
||||
|
||||
declare.in(src_host, WARMUP_RNG);
|
||||
|
||||
cv::gpu::GpuMat src(src_host);
|
||||
cv::gpu::GpuMat sum, sqsum;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::integral(src, sum, sqsum);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, IntegralBoth, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
GPU_TYPICAL_MAT_SIZES));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// IntegralSqr
|
||||
|
||||
@ -849,6 +848,39 @@ INSTANTIATE_TEST_CASE_P(ImgProc, BlendLinear, testing::Combine(
|
||||
GPU_TYPICAL_MAT_SIZES,
|
||||
testing::Values(CV_8UC1, CV_32FC1)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// AlphaComp
|
||||
|
||||
GPU_PERF_TEST(AlphaComp, cv::gpu::DeviceInfo, cv::Size, perf::MatType, AlphaOp)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
|
||||
cv::Size size = GET_PARAM(1);
|
||||
int type = GET_PARAM(2);
|
||||
int alpha_op = GET_PARAM(3);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat img1_host(size, type);
|
||||
cv::Mat img2_host(size, type);
|
||||
|
||||
declare.in(img1_host, img2_host, WARMUP_RNG);
|
||||
|
||||
cv::gpu::GpuMat img1(img1_host);
|
||||
cv::gpu::GpuMat img2(img2_host);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::alphaComp(img1, img2, dst, alpha_op);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, AlphaComp, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
GPU_TYPICAL_MAT_SIZES,
|
||||
testing::Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
|
||||
testing::Values((int)cv::gpu::ALPHA_OVER, (int)cv::gpu::ALPHA_IN, (int)cv::gpu::ALPHA_OUT, (int)cv::gpu::ALPHA_ATOP, (int)cv::gpu::ALPHA_XOR, (int)cv::gpu::ALPHA_PLUS, (int)cv::gpu::ALPHA_OVER_PREMUL, (int)cv::gpu::ALPHA_IN_PREMUL, (int)cv::gpu::ALPHA_OUT_PREMUL, (int)cv::gpu::ALPHA_ATOP_PREMUL, (int)cv::gpu::ALPHA_XOR_PREMUL, (int)cv::gpu::ALPHA_PLUS_PREMUL, (int)cv::gpu::ALPHA_PREMUL)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Canny
|
||||
|
||||
|
@ -11,7 +11,7 @@ int main(int argc, char **argv)
|
||||
|
||||
#else
|
||||
|
||||
int main(int argc, char** argv)
|
||||
int main()
|
||||
{
|
||||
printf("OpenCV was built without CUDA support\n");
|
||||
return 0;
|
||||
|
@ -11,6 +11,7 @@ CV_ENUM(FlipCode, HORIZONTAL_AXIS, VERTICAL_AXIS, BOTH_AXIS)
|
||||
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
|
||||
CV_ENUM(MatchMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
|
||||
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2)
|
||||
CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
|
||||
|
||||
struct CvtColorInfo
|
||||
{
|
||||
|
@ -52,8 +52,6 @@ void cv::gpu::gemm(const GpuMat&, const GpuMat&, double, const GpuMat&, double,
|
||||
void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::flip(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::exp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::log(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::magnitude(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::magnitudeSqr(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
@ -89,9 +87,9 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
|
||||
CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2);
|
||||
CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()));
|
||||
|
||||
bool tr1 = flags & GEMM_1_T;
|
||||
bool tr2 = flags & GEMM_2_T;
|
||||
bool tr3 = flags & GEMM_3_T;
|
||||
bool tr1 = (flags & GEMM_1_T) != 0;
|
||||
bool tr2 = (flags & GEMM_2_T) != 0;
|
||||
bool tr3 = (flags & GEMM_3_T) != 0;
|
||||
|
||||
Size src1Size = tr1 ? Size(src1.rows, src1.cols) : src1.size();
|
||||
Size src2Size = tr2 ? Size(src2.rows, src2.cols) : src2.size();
|
||||
@ -243,35 +241,66 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// flip
|
||||
|
||||
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& s)
|
||||
namespace
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
template<int DEPTH> struct NppTypeTraits;
|
||||
template<> struct NppTypeTraits<CV_8U> { typedef Npp8u npp_t; };
|
||||
template<> struct NppTypeTraits<CV_8S> { typedef Npp8s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
|
||||
template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; };
|
||||
template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; };
|
||||
|
||||
dst.create( src.size(), src.type() );
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
template <int DEPTH> struct NppMirrorFunc
|
||||
{
|
||||
nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
|
||||
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiMirror_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step),
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
|
||||
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
|
||||
}
|
||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
typedef NppStatus (*func_t)(const npp_t* pSrc, int nSrcStep, npp_t* pDst, int nDstStep, NppiSize oROI, NppiAxis flip);
|
||||
};
|
||||
|
||||
template <int DEPTH, typename NppMirrorFunc<DEPTH>::func_t func> struct NppMirror
|
||||
{
|
||||
typedef typename NppMirrorFunc<DEPTH>::npp_t npp_t;
|
||||
|
||||
static void call(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream)
|
||||
{
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step),
|
||||
dst.ptr<npp_t>(), static_cast<int>(dst.step), sz,
|
||||
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{NppMirror<CV_16U, nppiMirror_16u_C1R>::call, 0, NppMirror<CV_16U, nppiMirror_16u_C3R>::call, NppMirror<CV_16U, nppiMirror_16u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{NppMirror<CV_32S, nppiMirror_32s_C1R>::call, 0, NppMirror<CV_32S, nppiMirror_32s_C3R>::call, NppMirror<CV_32S, nppiMirror_32s_C4R>::call},
|
||||
{NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
|
||||
};
|
||||
|
||||
CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F);
|
||||
CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -340,52 +369,6 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// exp
|
||||
|
||||
void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& s)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32FC1);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// log
|
||||
|
||||
void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& s)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32FC1);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// NPP magnitide
|
||||
|
||||
|
@ -48,6 +48,7 @@ using namespace cv::gpu;
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::swapChannels(GpuMat&, const int[], Stream&) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
@ -1423,4 +1424,19 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
|
||||
func(src, dst, dcn, stream);
|
||||
}
|
||||
|
||||
void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
|
||||
{
|
||||
CV_Assert(image.type() == CV_8UC4);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = image.cols;
|
||||
sz.height = image.rows;
|
||||
|
||||
nppSafeCall( nppiSwapChannels_8u_C4IR(image.ptr<Npp8u>(), static_cast<int>(image.step), sz, dstOrder) );
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
@ -209,7 +209,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cv::gpu::error("Unsupported channels count", __FILE__, __LINE__, "bilateral_filter_caller");
|
||||
}
|
||||
|
||||
if (stream != 0)
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
@ -1104,9 +1104,9 @@ namespace cv { namespace gpu { namespace device
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
//template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<schar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<ushort>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
//template void absdiff_gpu<ushort>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<short >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<int >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
//template void absdiff_gpu<float >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -50,7 +50,7 @@ using namespace cv::gpu;
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int, GpuMat& buf) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int, GpuMat&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
|
||||
Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
|
||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
|
||||
Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
|
||||
|
@ -45,12 +45,41 @@
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem);
|
||||
|
||||
class NppiGraphcutStateHandler
|
||||
{
|
||||
public:
|
||||
NppiGraphcutStateHandler(NppiSize sznpp, Npp8u* pDeviceMem, const init_func_t func)
|
||||
{
|
||||
nppSafeCall( func(sznpp, &pState, pDeviceMem) );
|
||||
}
|
||||
|
||||
~NppiGraphcutStateHandler()
|
||||
{
|
||||
nppSafeCall( nppiGraphcutFree(pState) );
|
||||
}
|
||||
|
||||
operator NppiGraphcutState*()
|
||||
{
|
||||
return pState;
|
||||
}
|
||||
|
||||
private:
|
||||
NppiGraphcutState* pState;
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
|
||||
{
|
||||
Size src_size = terminals.size();
|
||||
|
||||
CV_Assert(terminals.type() == CV_32S);
|
||||
CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
|
||||
CV_Assert(leftTransp.type() == CV_32S);
|
||||
@ -70,30 +99,76 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
|
||||
int bufsz;
|
||||
nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );
|
||||
|
||||
if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize())
|
||||
buf.create(1, bufsz, CV_8U);
|
||||
ensureSizeIsEnough(1, bufsz, CV_8U, buf);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
#if CUDART_VERSION > 4000
|
||||
NppiGraphcutState* pState;
|
||||
nppSafeCall( nppiGraphcutInitAlloc(sznpp, &pState, buf.ptr<Npp8u>()) );
|
||||
NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc);
|
||||
|
||||
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), pState) );
|
||||
|
||||
nppSafeCall( nppiGraphcutFree(pState) );
|
||||
#else
|
||||
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );
|
||||
#endif
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
|
||||
GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s)
|
||||
{
|
||||
Size src_size = terminals.size();
|
||||
|
||||
CV_Assert(terminals.type() == CV_32S);
|
||||
|
||||
CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
|
||||
CV_Assert(leftTransp.type() == CV_32S);
|
||||
|
||||
CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
|
||||
CV_Assert(rightTransp.type() == CV_32S);
|
||||
|
||||
CV_Assert(top.size() == src_size);
|
||||
CV_Assert(top.type() == CV_32S);
|
||||
|
||||
CV_Assert(topLeft.size() == src_size);
|
||||
CV_Assert(topLeft.type() == CV_32S);
|
||||
|
||||
CV_Assert(topRight.size() == src_size);
|
||||
CV_Assert(topRight.type() == CV_32S);
|
||||
|
||||
CV_Assert(bottom.size() == src_size);
|
||||
CV_Assert(bottom.type() == CV_32S);
|
||||
|
||||
CV_Assert(bottomLeft.size() == src_size);
|
||||
CV_Assert(bottomLeft.type() == CV_32S);
|
||||
|
||||
CV_Assert(bottomRight.size() == src_size);
|
||||
CV_Assert(bottomRight.type() == CV_32S);
|
||||
|
||||
labels.create(src_size, CV_8U);
|
||||
|
||||
NppiSize sznpp;
|
||||
sznpp.width = src_size.width;
|
||||
sznpp.height = src_size.height;
|
||||
|
||||
int bufsz;
|
||||
nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) );
|
||||
|
||||
ensureSizeIsEnough(1, bufsz, CV_8U, buf);
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAlloc);
|
||||
|
||||
nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
|
||||
top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
|
||||
bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
|
||||
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
||||
|
@ -62,7 +62,6 @@ void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float,
|
||||
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_nogpu(); }
|
||||
@ -91,7 +90,7 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, f
|
||||
void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }
|
||||
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }
|
||||
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream& stream) { throw_nogpu(); }
|
||||
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::pyrDown(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }
|
||||
@ -780,44 +779,78 @@ void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K,
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// rotate
|
||||
|
||||
void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& s)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
namespace
|
||||
{
|
||||
template<int DEPTH> struct NppTypeTraits;
|
||||
template<> struct NppTypeTraits<CV_8U> { typedef Npp8u npp_t; };
|
||||
template<> struct NppTypeTraits<CV_8S> { typedef Npp8s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
|
||||
template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; };
|
||||
template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; };
|
||||
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
|
||||
template <int DEPTH> struct NppRotateFunc
|
||||
{
|
||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||
|
||||
typedef NppStatus (*func_t)(const npp_t* pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI,
|
||||
npp_t* pDst, int nDstStep, NppiRect oDstROI,
|
||||
double nAngle, double nShiftX, double nShiftY, int eInterpolation);
|
||||
};
|
||||
|
||||
template <int DEPTH, typename NppRotateFunc<DEPTH>::func_t func> struct NppRotate
|
||||
{
|
||||
typedef typename NppRotateFunc<DEPTH>::npp_t npp_t;
|
||||
|
||||
static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
NppiRect srcroi;
|
||||
srcroi.x = srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
NppiRect dstroi;
|
||||
dstroi.x = dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{0,0,0,0},
|
||||
{NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
|
||||
};
|
||||
|
||||
CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F);
|
||||
CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
NppiSize srcsz;
|
||||
srcsz.height = src.rows;
|
||||
srcsz.width = src.cols;
|
||||
NppiRect srcroi;
|
||||
srcroi.x = srcroi.y = 0;
|
||||
srcroi.height = src.rows;
|
||||
srcroi.width = src.cols;
|
||||
NppiRect dstroi;
|
||||
dstroi.x = dstroi.y = 0;
|
||||
dstroi.height = dst.rows;
|
||||
dstroi.width = dst.cols;
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
{
|
||||
nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nppSafeCall( nppiRotate_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -857,30 +890,6 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& s)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
|
||||
int width = src.cols + 1, height = src.rows + 1;
|
||||
|
||||
sum.create(height, width, CV_32S);
|
||||
sqsum.create(height, width, CV_32F);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(const_cast<Npp8u*>(src.ptr<Npp8u>()), static_cast<int>(src.step),
|
||||
sum.ptr<Npp32s>(), static_cast<int>(sum.step), sqsum.ptr<Npp32f>(), static_cast<int>(sqsum.step), sz, 0, 0.0f, height) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// sqrIntegral
|
||||
|
||||
@ -935,7 +944,6 @@ void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
|
||||
|
||||
void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
|
||||
{
|
||||
#if CUDART_VERSION > 4000
|
||||
CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);
|
||||
|
||||
dst.create(src.size(), CV_32FC1);
|
||||
@ -959,31 +967,6 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
#else
|
||||
CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);
|
||||
|
||||
dst.create(src.size(), CV_32FC1);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
NppiRect nppRect;
|
||||
nppRect.height = rect.height;
|
||||
nppRect.width = rect.width;
|
||||
nppRect.x = rect.x;
|
||||
nppRect.y = rect.y;
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp32f>(), static_cast<int>(sqr.step),
|
||||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -992,25 +975,19 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
|
||||
|
||||
namespace
|
||||
{
|
||||
template<int n> struct NPPTypeTraits;
|
||||
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
|
||||
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
|
||||
|
||||
typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);
|
||||
typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);
|
||||
|
||||
template<int SDEPTH> struct NppHistogramEvenFuncC1
|
||||
{
|
||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||
typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
|
||||
|
||||
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,
|
||||
int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);
|
||||
};
|
||||
template<int SDEPTH> struct NppHistogramEvenFuncC4
|
||||
{
|
||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||
typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
|
||||
|
||||
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,
|
||||
Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);
|
||||
@ -1079,7 +1056,7 @@ namespace
|
||||
|
||||
template<int SDEPTH> struct NppHistogramRangeFuncC1
|
||||
{
|
||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||
typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
|
||||
typedef Npp32s level_t;
|
||||
enum {LEVEL_TYPE_CODE=CV_32SC1};
|
||||
|
||||
@ -1097,7 +1074,7 @@ namespace
|
||||
};
|
||||
template<int SDEPTH> struct NppHistogramRangeFuncC4
|
||||
{
|
||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||
typedef typename NppTypeTraits<SDEPTH>::npp_t src_t;
|
||||
typedef Npp32s level_t;
|
||||
enum {LEVEL_TYPE_CODE=CV_32SC1};
|
||||
|
||||
|
@ -171,8 +171,8 @@ bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet) const { throw_nogpu(); r
|
||||
bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu(); return false; }
|
||||
void cv::gpu::DeviceInfo::query() { throw_nogpu(); }
|
||||
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu(); }
|
||||
void cv::gpu::printCudaDeviceInfo(int device) { throw_nogpu(); }
|
||||
void cv::gpu::printShortCudaDeviceInfo(int device) { throw_nogpu(); }
|
||||
void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu(); }
|
||||
void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
@ -346,7 +346,6 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
|
||||
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
|
||||
|
||||
#if (CUDART_VERSION >= 4000)
|
||||
// This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output
|
||||
int memoryClock, memBusWidth, L2CacheSize;
|
||||
getCudaAttribute<int>( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev );
|
||||
@ -364,7 +363,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
|
||||
prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
|
||||
prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
|
||||
#endif
|
||||
|
||||
printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
|
||||
printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
|
||||
printf(" Total number of registers available per block: %d\n", prop.regsPerBlock);
|
||||
@ -375,11 +374,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
|
||||
printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
|
||||
|
||||
#if CUDART_VERSION >= 4000
|
||||
printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
|
||||
#else
|
||||
printf(" Concurrent copy and execution: %s\n", prop.deviceOverlap ? "Yes" : "No");
|
||||
#endif
|
||||
printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
|
||||
printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
|
||||
printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
|
||||
@ -388,10 +383,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No");
|
||||
printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No");
|
||||
printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
|
||||
#if CUDART_VERSION >= 4000
|
||||
printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
|
||||
printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
|
||||
#endif
|
||||
printf(" Compute Mode:\n");
|
||||
printf(" %s \n", computeMode[prop.computeMode]);
|
||||
}
|
||||
|
@ -48,6 +48,7 @@ using namespace cv::gpu;
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_nogpu(); }
|
||||
void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&, GpuMat&) { throw_nogpu(); }
|
||||
double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; }
|
||||
double cv::gpu::norm(const GpuMat&, int, GpuMat&) { throw_nogpu(); return 0.0; }
|
||||
double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; }
|
||||
@ -108,6 +109,12 @@ namespace
|
||||
// meanStdDev
|
||||
|
||||
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
|
||||
{
|
||||
GpuMat buf;
|
||||
meanStdDev(src, mean, stddev, buf);
|
||||
}
|
||||
|
||||
void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat& buf)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1);
|
||||
|
||||
@ -117,15 +124,12 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
|
||||
|
||||
DeviceBuffer dbuf(2);
|
||||
|
||||
#if CUDART_VERSION > 4000
|
||||
int bufSize;
|
||||
nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
|
||||
|
||||
GpuMat buf(1, bufSize, CV_8UC1);
|
||||
ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
|
||||
|
||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
|
||||
#else
|
||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, dbuf, (double*)dbuf + 1) );
|
||||
#endif
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
@ -133,7 +137,6 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
|
||||
dbuf.download(ptrs);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// norm
|
||||
|
||||
@ -151,7 +154,7 @@ double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf)
|
||||
return absSum(src_single_channel, buf)[0];
|
||||
|
||||
if (normType == NORM_L2)
|
||||
return sqrt(sqrSum(src_single_channel, buf)[0]);
|
||||
return std::sqrt(sqrSum(src_single_channel, buf)[0]);
|
||||
|
||||
if (normType == NORM_INF)
|
||||
{
|
||||
|
@ -228,7 +228,7 @@ void cv::gpu::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMa
|
||||
minMax(u_avg, 0, &uMax);
|
||||
minMax(v_avg, 0, &vMax);
|
||||
|
||||
float max_flow = static_cast<float>(sqrt(uMax * uMax + vMax * vMax));
|
||||
float max_flow = static_cast<float>(std::sqrt(uMax * uMax + vMax * vMax));
|
||||
|
||||
CreateOpticalFlowNeedleMap_gpu(u_avg, v_avg, vertex.ptr<float>(), colors.ptr<float>(), max_flow, 1.0f / u.cols, 1.0f / u.rows);
|
||||
|
||||
|
@ -160,7 +160,7 @@ void cv::gpu::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double s
|
||||
double ig11, ig03, ig33, ig55;
|
||||
prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
|
||||
|
||||
device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, ig11, ig03, ig33, ig55);
|
||||
device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
|
||||
}
|
||||
|
||||
|
||||
|
@ -429,11 +429,11 @@ void cv::gpu::ORB_GPU::setParams(size_t n_features, const ORB::CommonParams& det
|
||||
// pre-compute the end of a row in a circular patch
|
||||
int half_patch_size = params_.patch_size_ / 2;
|
||||
vector<int> u_max(half_patch_size + 1);
|
||||
for (int v = 0; v <= half_patch_size * sqrt(2.f) / 2 + 1; ++v)
|
||||
u_max[v] = cvRound(sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
|
||||
for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
|
||||
u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
|
||||
|
||||
// Make sure we are symmetric
|
||||
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * sqrt(2.f) / 2; --v)
|
||||
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
|
||||
{
|
||||
while (u_max[v_0] == u_max[v_0 + 1])
|
||||
++v_0;
|
||||
|
@ -91,8 +91,8 @@
|
||||
#include "nvidia/NCVHaarObjectDetection.hpp"
|
||||
#include "nvidia/NCVBroxOpticalFlow.hpp"
|
||||
|
||||
#define CUDART_MINIMUM_REQUIRED_VERSION 4000
|
||||
#define NPP_MINIMUM_REQUIRED_VERSION 4000
|
||||
#define CUDART_MINIMUM_REQUIRED_VERSION 4010
|
||||
#define NPP_MINIMUM_REQUIRED_VERSION 4100
|
||||
|
||||
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
||||
#error "Insufficient Cuda Runtime library version, please update it."
|
||||
|
@ -69,10 +69,10 @@ PARAM_TEST_CASE(ArithmTestBase, cv::gpu::DeviceInfo, MatType, UseRoi)
|
||||
|
||||
size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
|
||||
|
||||
mat1 = randomMat(rng, size, type, 1, 16, false);
|
||||
mat2 = randomMat(rng, size, type, 1, 16, false);
|
||||
mat1 = randomMat(rng, size, type, 5, 16, false);
|
||||
mat2 = randomMat(rng, size, type, 5, 16, false);
|
||||
|
||||
val = cv::Scalar(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
|
||||
val = cv::Scalar(rng.uniform(1, 3), rng.uniform(1, 3), rng.uniform(1, 3), rng.uniform(1, 3));
|
||||
}
|
||||
};
|
||||
|
||||
@ -115,7 +115,8 @@ TEST_P(Add, Scalar)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
|
||||
CV_32SC1, CV_32SC2, CV_32SC3, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4),
|
||||
USE_ROI));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -157,7 +158,8 @@ TEST_P(Subtract, Scalar)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Subtract, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
|
||||
CV_32SC1, CV_32SC2, CV_32SC3, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4),
|
||||
USE_ROI));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -199,7 +201,8 @@ TEST_P(Multiply, Scalar)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Multiply, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC3, CV_16SC4,
|
||||
CV_32SC1, CV_32SC3, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
USE_ROI));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -220,7 +223,7 @@ TEST_P(Divide, Array)
|
||||
|
||||
gpuRes.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, mat1.depth() == CV_32F ? 1e-5 : 1);
|
||||
}
|
||||
|
||||
TEST_P(Divide, Scalar)
|
||||
@ -236,12 +239,13 @@ TEST_P(Divide, Scalar)
|
||||
|
||||
gpuRes.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, mat1.depth() == CV_32F ? 1e-5 : 1);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Divide, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC3, CV_16SC4,
|
||||
CV_32SC1, CV_32SC3, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
USE_ROI));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -312,6 +316,83 @@ INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
|
||||
Values(CV_8UC1, CV_16UC1, CV_32SC1, CV_32FC1),
|
||||
USE_ROI));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// abs
|
||||
|
||||
struct Abs : ArithmTestBase {};
|
||||
|
||||
TEST_P(Abs, Array)
|
||||
{
|
||||
cv::Mat dst_gold = cv::abs(mat1);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
cv::gpu::GpuMat gpuRes;
|
||||
|
||||
cv::gpu::abs(loadMat(mat1, useRoi), gpuRes);
|
||||
|
||||
gpuRes.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Abs, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(CV_16SC1, CV_32FC1),
|
||||
USE_ROI));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Sqr
|
||||
|
||||
struct Sqr : ArithmTestBase {};
|
||||
|
||||
TEST_P(Sqr, Array)
|
||||
{
|
||||
cv::Mat dst_gold;
|
||||
cv::multiply(mat1, mat1, dst_gold);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
cv::gpu::GpuMat gpuRes;
|
||||
|
||||
cv::gpu::sqr(loadMat(mat1, useRoi), gpuRes);
|
||||
|
||||
gpuRes.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Sqr, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1),
|
||||
USE_ROI));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Sqrt
|
||||
|
||||
struct Sqrt : ArithmTestBase {};
|
||||
|
||||
TEST_P(Sqrt, Array)
|
||||
{
|
||||
cv::Mat dst_gold;
|
||||
cv::sqrt(mat1, dst_gold);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
cv::gpu::GpuMat gpuRes;
|
||||
|
||||
cv::gpu::sqrt(loadMat(mat1, useRoi), gpuRes);
|
||||
|
||||
gpuRes.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(MatType(CV_32FC1)),
|
||||
USE_ROI));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// compare
|
||||
|
||||
@ -513,7 +594,7 @@ TEST_P(Flip, Accuracy)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(CV_8UC1, CV_8UC4),
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
Values((int)FLIP_BOTH, (int)FLIP_X, (int)FLIP_Y),
|
||||
USE_ROI));
|
||||
|
||||
@ -1329,6 +1410,90 @@ INSTANTIATE_TEST_CASE_P(Arithm, Bitwise, Combine(
|
||||
ALL_DEVICES,
|
||||
ALL_TYPES));
|
||||
|
||||
PARAM_TEST_CASE(BitwiseScalar, cv::gpu::DeviceInfo, MatType)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
int type;
|
||||
|
||||
cv::Size size;
|
||||
cv::Mat mat;
|
||||
cv::Scalar sc;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
type = GET_PARAM(1);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::RNG& rng = cvtest::TS::ptr()->get_rng();
|
||||
|
||||
size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
|
||||
|
||||
mat.create(size, type);
|
||||
|
||||
for (int i = 0; i < mat.rows; ++i)
|
||||
{
|
||||
cv::Mat row(1, static_cast<int>(mat.cols * mat.elemSize()), CV_8U, (void*)mat.ptr(i));
|
||||
rng.fill(row, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
|
||||
}
|
||||
|
||||
sc = cv::Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(BitwiseScalar, Or)
|
||||
{
|
||||
cv::Mat dst_gold;
|
||||
cv::bitwise_or(mat, sc, dst_gold);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
cv::gpu::GpuMat dev_dst;
|
||||
|
||||
cv::gpu::bitwise_or(loadMat(mat), sc, dev_dst);
|
||||
|
||||
dev_dst.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
}
|
||||
|
||||
TEST_P(BitwiseScalar, And)
|
||||
{
|
||||
cv::Mat dst_gold;
|
||||
cv::bitwise_and(mat, sc, dst_gold);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
cv::gpu::GpuMat dev_dst;
|
||||
|
||||
cv::gpu::bitwise_and(loadMat(mat), sc, dev_dst);
|
||||
|
||||
dev_dst.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
}
|
||||
|
||||
TEST_P(BitwiseScalar, Xor)
|
||||
{
|
||||
cv::Mat dst_gold;
|
||||
cv::bitwise_xor(mat, sc, dst_gold);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
cv::gpu::GpuMat dev_dst;
|
||||
|
||||
cv::gpu::bitwise_xor(loadMat(mat), sc, dev_dst);
|
||||
|
||||
dev_dst.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalar, Combine(
|
||||
ALL_DEVICES,
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// addWeighted
|
||||
|
||||
|
@ -2377,6 +2377,49 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, Combine(
|
||||
Values(CV_8U, CV_16U, CV_32F),
|
||||
USE_ROI));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// swapChannels
|
||||
|
||||
PARAM_TEST_CASE(SwapChannels, cv::gpu::DeviceInfo, UseRoi)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
bool useRoi;
|
||||
|
||||
cv::Mat img;
|
||||
|
||||
cv::Mat dst_gold;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
useRoi = GET_PARAM(1);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat imgBase = readImage("stereobm/aloe-L.png");
|
||||
ASSERT_FALSE(imgBase.empty());
|
||||
|
||||
cv::cvtColor(imgBase, img, cv::COLOR_BGR2BGRA);
|
||||
|
||||
cv::cvtColor(img, dst_gold, cv::COLOR_BGRA2RGBA);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(SwapChannels, Accuracy)
|
||||
{
|
||||
cv::gpu::GpuMat gpuImage = loadMat(img, useRoi);
|
||||
|
||||
const int dstOrder[] = {2, 1, 0, 3};
|
||||
cv::gpu::swapChannels(gpuImage, dstOrder);
|
||||
|
||||
cv::Mat dst;
|
||||
gpuImage.download(dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, Combine(ALL_DEVICES, USE_ROI));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// histograms
|
||||
|
||||
|
@ -136,7 +136,7 @@ int main(int argc, char** argv)
|
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
int main(int argc, char** argv)
|
||||
int main()
|
||||
{
|
||||
printf("OpenCV was built without CUDA support\n");
|
||||
return 0;
|
||||
|
@ -146,7 +146,6 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id)
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void cv::namedWindow( const string& winname, int flags )
|
||||
|
@ -105,6 +105,8 @@ public:
|
||||
GraphCutSeamFinder(int cost_type = COST_COLOR_GRAD, float terminal_cost = 10000.f,
|
||||
float bad_region_penalty = 1000.f);
|
||||
|
||||
~GraphCutSeamFinder();
|
||||
|
||||
void find(const std::vector<Mat> &src, const std::vector<Point> &corners,
|
||||
std::vector<Mat> &masks);
|
||||
|
||||
|
@ -411,6 +411,8 @@ void GraphCutSeamFinder::Impl::findInPair(size_t first, size_t second, Rect roi)
|
||||
GraphCutSeamFinder::GraphCutSeamFinder(int cost_type, float terminal_cost, float bad_region_penalty)
|
||||
: impl_(new Impl(cost_type, terminal_cost, bad_region_penalty)) {}
|
||||
|
||||
GraphCutSeamFinder::~GraphCutSeamFinder() {}
|
||||
|
||||
|
||||
void GraphCutSeamFinder::find(const vector<Mat> &src, const vector<Point> &corners,
|
||||
vector<Mat> &masks)
|
||||
|
@ -247,7 +247,7 @@ inline int clamp(int val, int minVal, int maxVal)
|
||||
return max(min(val, maxVal), minVal);
|
||||
}
|
||||
|
||||
void PointCloudRenderer::onMouseEvent(int event, int x, int y, int flags)
|
||||
void PointCloudRenderer::onMouseEvent(int event, int x, int y, int /*flags*/)
|
||||
{
|
||||
static int oldx = x;
|
||||
static int oldy = y;
|
||||
|
68
samples/gpu/alpha_comp.cpp
Normal file
68
samples/gpu/alpha_comp.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "opencv2/core/opengl_interop.hpp"
|
||||
#include "opencv2/highgui/highgui.hpp"
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
int main()
|
||||
{
|
||||
cout << "This program demonstrates using alphaComp" << endl;
|
||||
cout << "Press SPACE to change compositing operation" << endl;
|
||||
cout << "Press ESC to exit" << endl;
|
||||
|
||||
namedWindow("First Image", WINDOW_NORMAL);
|
||||
namedWindow("Second Image", WINDOW_NORMAL);
|
||||
namedWindow("Result", WINDOW_OPENGL);
|
||||
|
||||
setGlDevice();
|
||||
|
||||
Mat src1(640, 480, CV_8UC4, Scalar::all(0));
|
||||
Mat src2(640, 480, CV_8UC4, Scalar::all(0));
|
||||
|
||||
rectangle(src1, Rect(50, 50, 200, 200), Scalar(0, 0, 255, 128), 30);
|
||||
rectangle(src2, Rect(100, 100, 200, 200), Scalar(255, 0, 0, 128), 30);
|
||||
|
||||
GpuMat d_src1(src1);
|
||||
GpuMat d_src2(src2);
|
||||
|
||||
GpuMat d_res;
|
||||
|
||||
imshow("First Image", src1);
|
||||
imshow("Second Image", src2);
|
||||
|
||||
int alpha_op = ALPHA_OVER;
|
||||
|
||||
const char* op_names[] =
|
||||
{
|
||||
"ALPHA_OVER", "ALPHA_IN", "ALPHA_OUT", "ALPHA_ATOP", "ALPHA_XOR", "ALPHA_PLUS", "ALPHA_OVER_PREMUL", "ALPHA_IN_PREMUL", "ALPHA_OUT_PREMUL",
|
||||
"ALPHA_ATOP_PREMUL", "ALPHA_XOR_PREMUL", "ALPHA_PLUS_PREMUL", "ALPHA_PREMUL"
|
||||
};
|
||||
|
||||
while (true)
|
||||
{
|
||||
cout << op_names[alpha_op] << endl;
|
||||
|
||||
alphaComp(d_src1, d_src2, d_res, alpha_op);
|
||||
|
||||
imshow("Result", d_res);
|
||||
|
||||
char key = static_cast<char>(waitKey());
|
||||
|
||||
if (key == 27)
|
||||
break;
|
||||
|
||||
if (key == 32)
|
||||
{
|
||||
++alpha_op;
|
||||
|
||||
if (alpha_op > ALPHA_PREMUL)
|
||||
alpha_op = ALPHA_OVER;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -226,7 +226,7 @@ int main(int argc, const char* argv[])
|
||||
break;
|
||||
|
||||
case 'S':
|
||||
if (currentFrame < frames.size() - 1)
|
||||
if (currentFrame < static_cast<int>(frames.size()) - 1)
|
||||
++currentFrame;
|
||||
|
||||
imshow("Interpolated frame", frames[currentFrame]);
|
||||
|
@ -26,7 +26,7 @@ void colorizeFlow(const Mat &u, const Mat &v, Mat &dst)
|
||||
minMaxLoc(v, &vMin, &vMax, 0, 0);
|
||||
uMin = ::abs(uMin); uMax = ::abs(uMax);
|
||||
vMin = ::abs(vMin); vMax = ::abs(vMax);
|
||||
float dMax = ::max(::max(uMin, uMax), ::max(vMin, vMax));
|
||||
float dMax = static_cast<float>(::max(::max(uMin, uMax), ::max(vMin, vMax)));
|
||||
|
||||
dst.create(u.size(), CV_8UC3);
|
||||
for (int y = 0; y < u.rows; ++y)
|
||||
@ -111,11 +111,11 @@ int main(int argc, char **argv)
|
||||
|
||||
s.str("");
|
||||
s << "opt. flow FPS: " << cvRound((getTickFrequency()/(tc1-tc0)));
|
||||
putText(image, s.str(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2.);
|
||||
putText(image, s.str(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2);
|
||||
|
||||
s.str("");
|
||||
s << "total FPS: " << cvRound((getTickFrequency()/(t1-t0)));
|
||||
putText(image, s.str(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2.);
|
||||
putText(image, s.str(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2);
|
||||
|
||||
imshow("flow", image);
|
||||
|
||||
|
@ -63,7 +63,7 @@ void TestSystem::finishCurrentSubtest()
|
||||
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
|
||||
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
|
||||
|
||||
double speedup = static_cast<double>(cpu_elapsed_) / std::max((int64)1, gpu_elapsed_);
|
||||
double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
|
||||
speedup_total_ += speedup;
|
||||
|
||||
printMetrics(cpu_time, gpu_time, speedup);
|
||||
|
@ -127,8 +127,10 @@ private:
|
||||
std::stringstream cur_subtest_description_;
|
||||
bool cur_subtest_is_empty_;
|
||||
|
||||
int64 cpu_started_, cpu_elapsed_;
|
||||
int64 gpu_started_, gpu_elapsed_;
|
||||
int64 cpu_started_;
|
||||
int64 gpu_started_;
|
||||
double cpu_elapsed_;
|
||||
double gpu_elapsed_;
|
||||
|
||||
double speedup_total_;
|
||||
int num_subtests_called_;
|
||||
|
@ -1199,7 +1199,7 @@ TEST(FarnebackOpticalFlow)
|
||||
if (frame1.empty()) throw runtime_error("can't open " + datasets[i] + "2.png");
|
||||
|
||||
gpu::FarnebackOpticalFlow calc;
|
||||
calc.fastPyramids = fastPyramids;
|
||||
calc.fastPyramids = fastPyramids != 0;
|
||||
calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
|
||||
|
||||
gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
|
||||
|
Loading…
Reference in New Issue
Block a user