2010-12-20 17:07:19 +08:00
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
2010-12-20 01:20:54 +08:00
2010-12-20 17:07:19 +08:00
# include "precomp.hpp"
2010-12-20 01:20:54 +08:00
2010-12-20 17:07:19 +08:00
using namespace cv ;
using namespace cv : : gpu ;
2010-12-20 01:20:54 +08:00
2010-12-20 17:07:19 +08:00
# if !defined (HAVE_CUDA)
2010-12-20 01:20:54 +08:00
2011-10-10 16:19:11 +08:00
void cv : : gpu : : add ( const GpuMat & , const GpuMat & , GpuMat & , const GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : add ( const GpuMat & , const Scalar & , GpuMat & , const GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : subtract ( const GpuMat & , const GpuMat & , GpuMat & , const GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : subtract ( const GpuMat & , const Scalar & , GpuMat & , const GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : multiply ( const GpuMat & , const GpuMat & , GpuMat & , double , int , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : multiply ( const GpuMat & , const Scalar & , GpuMat & , double , int , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : divide ( const GpuMat & , const GpuMat & , GpuMat & , double , int , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : divide ( const GpuMat & , const Scalar & , GpuMat & , double , int , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : divide ( double , const GpuMat & , GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
2011-05-31 16:31:10 +08:00
void cv : : gpu : : absdiff ( const GpuMat & , const GpuMat & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : absdiff ( const GpuMat & , const Scalar & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2012-02-22 18:00:53 +08:00
void cv : : gpu : : abs ( const GpuMat & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : sqr ( const GpuMat & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : sqrt ( const GpuMat & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : exp ( const GpuMat & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : log ( const GpuMat & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2011-05-31 16:31:10 +08:00
void cv : : gpu : : compare ( const GpuMat & , const GpuMat & , GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
2012-05-12 17:45:21 +08:00
void cv : : gpu : : compare ( const GpuMat & , Scalar , GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
2011-05-31 16:31:10 +08:00
void cv : : gpu : : bitwise_not ( const GpuMat & , GpuMat & , const GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : bitwise_or ( const GpuMat & , const GpuMat & , GpuMat & , const GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2012-02-22 18:00:53 +08:00
void cv : : gpu : : bitwise_or ( const GpuMat & , const Scalar & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2011-05-31 16:31:10 +08:00
void cv : : gpu : : bitwise_and ( const GpuMat & , const GpuMat & , GpuMat & , const GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2012-02-22 18:00:53 +08:00
void cv : : gpu : : bitwise_and ( const GpuMat & , const Scalar & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2011-05-31 16:31:10 +08:00
void cv : : gpu : : bitwise_xor ( const GpuMat & , const GpuMat & , GpuMat & , const GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2012-02-22 18:00:53 +08:00
void cv : : gpu : : bitwise_xor ( const GpuMat & , const Scalar & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2012-03-19 22:18:12 +08:00
void cv : : gpu : : rshift ( const GpuMat & , Scalar_ < int > , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : lshift ( const GpuMat & , Scalar_ < int > , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2011-05-31 16:31:10 +08:00
void cv : : gpu : : min ( const GpuMat & , const GpuMat & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : min ( const GpuMat & , double , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : max ( const GpuMat & , const GpuMat & , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
void cv : : gpu : : max ( const GpuMat & , double , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
double cv : : gpu : : threshold ( const GpuMat & , GpuMat & , double , double , int , Stream & ) { throw_nogpu ( ) ; return 0.0 ; }
2011-09-21 16:58:54 +08:00
void cv : : gpu : : pow ( const GpuMat & , double , GpuMat & , Stream & ) { throw_nogpu ( ) ; }
2012-02-22 18:00:53 +08:00
void cv : : gpu : : alphaComp ( const GpuMat & , const GpuMat & , GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
2011-09-21 16:58:54 +08:00
void cv : : gpu : : addWeighted ( const GpuMat & , double , const GpuMat & , double , double , GpuMat & , int , Stream & ) { throw_nogpu ( ) ; }
2011-07-21 16:47:44 +08:00
2010-12-20 17:07:19 +08:00
# else
2010-12-20 01:20:54 +08:00
2010-12-20 17:07:19 +08:00
////////////////////////////////////////////////////////////////////////
// Basic arithmetical operations (add subtract multiply divide)
2010-12-20 01:20:54 +08:00
2010-12-20 17:07:19 +08:00
namespace
{
2012-02-29 21:02:25 +08:00
template < int DEPTH > struct NppTypeTraits ;
template < > struct NppTypeTraits < CV_8U > { typedef Npp8u npp_t ; } ;
template < > struct NppTypeTraits < CV_8S > { typedef Npp8s npp_t ; } ;
template < > struct NppTypeTraits < CV_16U > { typedef Npp16u npp_t ; } ;
template < > struct NppTypeTraits < CV_16S > { typedef Npp16s npp_t ; typedef Npp16sc npp_complex_type ; } ;
template < > struct NppTypeTraits < CV_32S > { typedef Npp32s npp_t ; typedef Npp32sc npp_complex_type ; } ;
template < > struct NppTypeTraits < CV_32F > { typedef Npp32f npp_t ; typedef Npp32fc npp_complex_type ; } ;
template < > struct NppTypeTraits < CV_64F > { typedef Npp64f npp_t ; typedef Npp64fc npp_complex_type ; } ;
2010-12-20 01:20:54 +08:00
2012-02-29 21:02:25 +08:00
template < int DEPTH > struct NppArithmFunc
2010-12-20 17:07:19 +08:00
{
2012-02-29 21:02:25 +08:00
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
2012-03-19 22:18:12 +08:00
2012-02-29 21:02:25 +08:00
typedef NppStatus ( * func_t ) ( const npp_t * pSrc1 , int nSrc1Step , const npp_t * pSrc2 , int nSrc2Step , npp_t * pDst , int nDstStep , NppiSize oSizeROI , int nScaleFactor ) ;
} ;
template < > struct NppArithmFunc < CV_32F >
2012-03-19 22:18:12 +08:00
{
2012-02-29 21:02:25 +08:00
typedef NppTypeTraits < CV_32F > : : npp_t npp_t ;
2012-02-22 18:00:53 +08:00
2012-02-29 21:02:25 +08:00
typedef NppStatus ( * func_t ) ( const Npp32f * pSrc1 , int nSrc1Step , const Npp32f * pSrc2 , int nSrc2Step , Npp32f * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
2012-02-22 18:00:53 +08:00
2012-02-29 21:02:25 +08:00
template < int DEPTH , typename NppArithmFunc < DEPTH > : : func_t func > struct NppArithm
{
typedef typename NppArithmFunc < DEPTH > : : npp_t npp_t ;
2012-02-22 18:00:53 +08:00
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream )
2012-02-29 21:02:25 +08:00
{
NppStreamHandler h ( stream ) ;
2010-12-20 17:07:19 +08:00
2012-02-29 21:02:25 +08:00
NppiSize sz ;
sz . width = src1 . cols ;
sz . height = src1 . rows ;
2011-05-31 16:31:10 +08:00
2012-03-19 22:18:12 +08:00
nppSafeCall ( func ( ( const npp_t * ) src1 . data , static_cast < int > ( src1 . step ) , ( const npp_t * ) src2 . data , static_cast < int > ( src2 . step ) ,
2012-03-26 19:02:03 +08:00
( npp_t * ) dst . data , static_cast < int > ( dst . step ) , sz , 0 ) ) ;
2011-10-10 16:19:11 +08:00
2012-02-29 21:02:25 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
2011-10-10 16:19:11 +08:00
}
2012-02-29 21:02:25 +08:00
} ;
template < typename NppArithmFunc < CV_32F > : : func_t func > struct NppArithm < CV_32F , func >
{
typedef typename NppArithmFunc < CV_32F > : : npp_t npp_t ;
2012-02-22 18:00:53 +08:00
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream )
2012-02-22 18:00:53 +08:00
{
2012-02-29 21:02:25 +08:00
NppStreamHandler h ( stream ) ;
2012-02-22 18:00:53 +08:00
2012-02-29 21:02:25 +08:00
NppiSize sz ;
sz . width = src1 . cols ;
sz . height = src1 . rows ;
2012-03-19 22:18:12 +08:00
nppSafeCall ( func ( ( const npp_t * ) src1 . data , static_cast < int > ( src1 . step ) , ( const npp_t * ) src2 . data , static_cast < int > ( src2 . step ) ,
2012-03-26 19:02:03 +08:00
( npp_t * ) dst . data , static_cast < int > ( dst . step ) , sz ) ) ;
2012-02-22 18:00:53 +08:00
2012-02-29 21:02:25 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
2012-02-22 18:00:53 +08:00
}
2010-12-20 17:07:19 +08:00
} ;
2012-02-22 18:00:53 +08:00
template < int DEPTH , int cn > struct NppArithmScalarFunc
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
2012-03-19 22:18:12 +08:00
typedef NppStatus ( * func_ptr ) ( const npp_t * pSrc1 , int nSrc1Step , const npp_t * pConstants ,
2012-02-22 18:00:53 +08:00
npp_t * pDst , int nDstStep , NppiSize oSizeROI , int nScaleFactor ) ;
} ;
template < int DEPTH > struct NppArithmScalarFunc < DEPTH , 1 >
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
2012-03-19 22:18:12 +08:00
typedef NppStatus ( * func_ptr ) ( const npp_t * pSrc1 , int nSrc1Step , const npp_t pConstants ,
2012-02-22 18:00:53 +08:00
npp_t * pDst , int nDstStep , NppiSize oSizeROI , int nScaleFactor ) ;
} ;
template < int DEPTH > struct NppArithmScalarFunc < DEPTH , 2 >
{
typedef typename NppTypeTraits < DEPTH > : : npp_complex_type npp_complex_type ;
2012-03-19 22:18:12 +08:00
typedef NppStatus ( * func_ptr ) ( const npp_complex_type * pSrc1 , int nSrc1Step , const npp_complex_type pConstants ,
2012-02-22 18:00:53 +08:00
npp_complex_type * pDst , int nDstStep , NppiSize oSizeROI , int nScaleFactor ) ;
} ;
template < int cn > struct NppArithmScalarFunc < CV_32F , cn >
{
typedef NppStatus ( * func_ptr ) ( const Npp32f * pSrc1 , int nSrc1Step , const Npp32f * pConstants , Npp32f * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
template < > struct NppArithmScalarFunc < CV_32F , 1 >
{
typedef NppStatus ( * func_ptr ) ( const Npp32f * pSrc1 , int nSrc1Step , const Npp32f pConstants , Npp32f * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
template < > struct NppArithmScalarFunc < CV_32F , 2 >
{
typedef NppStatus ( * func_ptr ) ( const Npp32fc * pSrc1 , int nSrc1Step , const Npp32fc pConstants , Npp32fc * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
template < int DEPTH , int cn , typename NppArithmScalarFunc < DEPTH , cn > : : func_ptr func > struct NppArithmScalar
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream )
2012-02-22 18:00:53 +08:00
{
NppStreamHandler h ( stream ) ;
NppiSize sz ;
sz . width = src . cols ;
sz . height = src . rows ;
const npp_t pConstants [ ] = { saturate_cast < npp_t > ( sc . val [ 0 ] ) , saturate_cast < npp_t > ( sc . val [ 1 ] ) , saturate_cast < npp_t > ( sc . val [ 2 ] ) , saturate_cast < npp_t > ( sc . val [ 3 ] ) } ;
2012-03-26 19:02:03 +08:00
nppSafeCall ( func ( ( const npp_t * ) src . data , static_cast < int > ( src . step ) , pConstants , ( npp_t * ) dst . data , static_cast < int > ( dst . step ) , sz , 0 ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
template < int DEPTH , typename NppArithmScalarFunc < DEPTH , 1 > : : func_ptr func > struct NppArithmScalar < DEPTH , 1 , func >
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream )
2012-02-22 18:00:53 +08:00
{
NppStreamHandler h ( stream ) ;
NppiSize sz ;
sz . width = src . cols ;
sz . height = src . rows ;
2012-03-26 19:02:03 +08:00
nppSafeCall ( func ( ( const npp_t * ) src . data , static_cast < int > ( src . step ) , saturate_cast < npp_t > ( sc . val [ 0 ] ) , ( npp_t * ) dst . data , static_cast < int > ( dst . step ) , sz , 0 ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
template < int DEPTH , typename NppArithmScalarFunc < DEPTH , 2 > : : func_ptr func > struct NppArithmScalar < DEPTH , 2 , func >
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
typedef typename NppTypeTraits < DEPTH > : : npp_complex_type npp_complex_type ;
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream )
2012-02-22 18:00:53 +08:00
{
NppStreamHandler h ( stream ) ;
NppiSize sz ;
sz . width = src . cols ;
sz . height = src . rows ;
npp_complex_type nConstant ;
nConstant . re = saturate_cast < npp_t > ( sc . val [ 0 ] ) ;
nConstant . im = saturate_cast < npp_t > ( sc . val [ 1 ] ) ;
2012-03-26 19:02:03 +08:00
nppSafeCall ( func ( ( const npp_complex_type * ) src . data , static_cast < int > ( src . step ) , nConstant ,
( npp_complex_type * ) dst . data , static_cast < int > ( dst . step ) , sz , 0 ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
template < int cn , typename NppArithmScalarFunc < CV_32F , cn > : : func_ptr func > struct NppArithmScalar < CV_32F , cn , func >
{
2012-03-26 19:02:03 +08:00
typedef typename NppTypeTraits < CV_32F > : : npp_t npp_t ;
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream )
2012-02-22 18:00:53 +08:00
{
NppStreamHandler h ( stream ) ;
NppiSize sz ;
sz . width = src . cols ;
sz . height = src . rows ;
const Npp32f pConstants [ ] = { saturate_cast < Npp32f > ( sc . val [ 0 ] ) , saturate_cast < Npp32f > ( sc . val [ 1 ] ) , saturate_cast < Npp32f > ( sc . val [ 2 ] ) , saturate_cast < Npp32f > ( sc . val [ 3 ] ) } ;
2012-03-26 19:02:03 +08:00
nppSafeCall ( func ( ( const npp_t * ) src . data , static_cast < int > ( src . step ) , pConstants , ( npp_t * ) dst . data , static_cast < int > ( dst . step ) , sz ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
template < typename NppArithmScalarFunc < CV_32F , 1 > : : func_ptr func > struct NppArithmScalar < CV_32F , 1 , func >
{
2012-03-26 19:02:03 +08:00
typedef typename NppTypeTraits < CV_32F > : : npp_t npp_t ;
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream )
2012-02-22 18:00:53 +08:00
{
NppStreamHandler h ( stream ) ;
NppiSize sz ;
sz . width = src . cols ;
sz . height = src . rows ;
2012-03-26 19:02:03 +08:00
nppSafeCall ( func ( ( const npp_t * ) src . data , static_cast < int > ( src . step ) , saturate_cast < Npp32f > ( sc . val [ 0 ] ) , ( npp_t * ) dst . data , static_cast < int > ( dst . step ) , sz ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
template < typename NppArithmScalarFunc < CV_32F , 2 > : : func_ptr func > struct NppArithmScalar < CV_32F , 2 , func >
{
2012-03-26 19:02:03 +08:00
typedef typename NppTypeTraits < CV_32F > : : npp_t npp_t ;
typedef typename NppTypeTraits < CV_32F > : : npp_complex_type npp_complex_type ;
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream )
2012-02-22 18:00:53 +08:00
{
NppStreamHandler h ( stream ) ;
NppiSize sz ;
sz . width = src . cols ;
sz . height = src . rows ;
Npp32fc nConstant ;
nConstant . re = saturate_cast < Npp32f > ( sc . val [ 0 ] ) ;
nConstant . im = saturate_cast < Npp32f > ( sc . val [ 1 ] ) ;
2012-03-26 19:02:03 +08:00
nppSafeCall ( func ( ( const npp_complex_type * ) src . data , static_cast < int > ( src . step ) , nConstant , ( npp_complex_type * ) dst . data , static_cast < int > ( dst . step ) , sz ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
}
2012-03-26 19:02:03 +08:00
////////////////////////////////////////////////////////////////////////
// add
namespace cv { namespace gpu { namespace device
{
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void add_gpu ( const PtrStepSzb & src1 , const PtrStepSzb & src2 , const PtrStepSzb & dst , const PtrStepb & mask , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void add_gpu ( const PtrStepSzb & src1 , double val , const PtrStepSzb & dst , const PtrStepb & mask , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
} } }
void cv : : gpu : : add ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , const GpuMat & mask , int dtype , Stream & s )
{
using namespace cv : : gpu : : device ;
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , const PtrStepSzb & src2 , const PtrStepSzb & dst , const PtrStepb & mask , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
{
{ add_gpu < unsigned char , unsigned char > , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu < unsigned char , unsigned short > , add_gpu < unsigned char , short > , add_gpu < unsigned char , int > , add_gpu < unsigned char , float > , add_gpu < unsigned char , double > } ,
{ 0 /*add_gpu<signed char, unsigned char>*/ , 0 /*add_gpu<signed char, signed char>*/ , 0 /*add_gpu<signed char, unsigned short>*/ , 0 /*add_gpu<signed char, short>*/ , 0 /*add_gpu<signed char, int>*/ , 0 /*add_gpu<signed char, float>*/ , 0 /*add_gpu<signed char, double>*/ } ,
{ 0 /*add_gpu<unsigned short, unsigned char>*/ , 0 /*add_gpu<unsigned short, signed char>*/ , add_gpu < unsigned short , unsigned short > , 0 /*add_gpu<unsigned short, short>*/ , add_gpu < unsigned short , int > , add_gpu < unsigned short , float > , add_gpu < unsigned short , double > } ,
{ 0 /*add_gpu<short, unsigned char>*/ , 0 /*add_gpu<short, signed char>*/ , 0 /*add_gpu<short, unsigned short>*/ , add_gpu < short , short > , add_gpu < short , int > , add_gpu < short , float > , add_gpu < short , double > } ,
{ 0 /*add_gpu<int, unsigned char>*/ , 0 /*add_gpu<int, signed char>*/ , 0 /*add_gpu<int, unsigned short>*/ , 0 /*add_gpu<int, short>*/ , add_gpu < int , int > , add_gpu < int , float > , add_gpu < int , double > } ,
{ 0 /*add_gpu<float, unsigned char>*/ , 0 /*add_gpu<float, signed char>*/ , 0 /*add_gpu<float, unsigned short>*/ , 0 /*add_gpu<float, short>*/ , 0 /*add_gpu<float, int>*/ , add_gpu < float , float > , add_gpu < float , double > } ,
{ 0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu < double , double > }
} ;
2012-08-23 21:45:50 +08:00
typedef void ( * npp_func_t ) ( const PtrStepSzb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
static const npp_func_t npp_funcs [ ] =
{
NppArithm < CV_8U , nppiAdd_8u_C1RSfs > : : call ,
0 ,
NppArithm < CV_16U , nppiAdd_16u_C1RSfs > : : call ,
NppArithm < CV_16S , nppiAdd_16s_C1RSfs > : : call ,
NppArithm < CV_32S , nppiAdd_32s_C1RSfs > : : call ,
NppArithm < CV_32F , nppiAdd_32f_C1R > : : call
} ;
if ( dtype < 0 )
dtype = src1 . depth ( ) ;
CV_Assert ( src1 . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
CV_Assert ( src1 . type ( ) = = src2 . type ( ) & & src1 . size ( ) = = src2 . size ( ) ) ;
CV_Assert ( mask . empty ( ) | | ( src1 . channels ( ) = = 1 & & mask . size ( ) = = src1 . size ( ) & & mask . type ( ) = = CV_8U ) ) ;
if ( src1 . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
dst . create ( src1 . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src1 . channels ( ) ) ) ;
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
if ( mask . empty ( ) & & dst . type ( ) = = src1 . type ( ) & & src1 . depth ( ) < = CV_32F )
{
npp_funcs [ src1 . depth ( ) ] ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , stream ) ;
return ;
}
const func_t func = funcs [ src1 . depth ( ) ] [ dst . depth ( ) ] ;
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
func ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , mask , stream ) ;
}
2011-10-10 16:19:11 +08:00
void cv : : gpu : : add ( const GpuMat & src , const Scalar & sc , GpuMat & dst , const GpuMat & mask , int dtype , Stream & s )
{
2012-03-26 19:02:03 +08:00
using namespace cv : : gpu : : device ;
2011-01-24 18:32:57 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , double val , const PtrStepSzb & dst , const PtrStepb & mask , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
2011-10-10 16:19:11 +08:00
{
2012-03-26 19:02:03 +08:00
{ add_gpu < unsigned char , unsigned char > , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu < unsigned char , unsigned short > , add_gpu < unsigned char , short > , add_gpu < unsigned char , int > , add_gpu < unsigned char , float > , add_gpu < unsigned char , double > } ,
{ 0 /*add_gpu<signed char, unsigned char>*/ , 0 /*add_gpu<signed char, signed char>*/ , 0 /*add_gpu<signed char, unsigned short>*/ , 0 /*add_gpu<signed char, short>*/ , 0 /*add_gpu<signed char, int>*/ , 0 /*add_gpu<signed char, float>*/ , 0 /*add_gpu<signed char, double>*/ } ,
{ 0 /*add_gpu<unsigned short, unsigned char>*/ , 0 /*add_gpu<unsigned short, signed char>*/ , add_gpu < unsigned short , unsigned short > , 0 /*add_gpu<unsigned short, short>*/ , add_gpu < unsigned short , int > , add_gpu < unsigned short , float > , add_gpu < unsigned short , double > } ,
{ 0 /*add_gpu<short, unsigned char>*/ , 0 /*add_gpu<short, signed char>*/ , 0 /*add_gpu<short, unsigned short>*/ , add_gpu < short , short > , add_gpu < short , int > , add_gpu < short , float > , add_gpu < short , double > } ,
{ 0 /*add_gpu<int, unsigned char>*/ , 0 /*add_gpu<int, signed char>*/ , 0 /*add_gpu<int, unsigned short>*/ , 0 /*add_gpu<int, short>*/ , add_gpu < int , int > , add_gpu < int , float > , add_gpu < int , double > } ,
{ 0 /*add_gpu<float, unsigned char>*/ , 0 /*add_gpu<float, signed char>*/ , 0 /*add_gpu<float, unsigned short>*/ , 0 /*add_gpu<float, short>*/ , 0 /*add_gpu<float, int>*/ , add_gpu < float , float > , add_gpu < float , double > } ,
{ 0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu < double , double > }
2010-12-20 17:07:19 +08:00
} ;
2011-10-10 16:19:11 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * npp_func_t ) ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const npp_func_t npp_funcs [ 7 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
2012-03-26 19:02:03 +08:00
{ NppArithmScalar < CV_8U , 1 , nppiAddC_8u_C1RSfs > : : call , 0 , NppArithmScalar < CV_8U , 3 , nppiAddC_8u_C3RSfs > : : call , NppArithmScalar < CV_8U , 4 , nppiAddC_8u_C4RSfs > : : call } ,
{ 0 , 0 , 0 , 0 } ,
{ NppArithmScalar < CV_16U , 1 , nppiAddC_16u_C1RSfs > : : call , 0 , NppArithmScalar < CV_16U , 3 , nppiAddC_16u_C3RSfs > : : call , NppArithmScalar < CV_16U , 4 , nppiAddC_16u_C4RSfs > : : call } ,
2012-02-22 18:00:53 +08:00
{ NppArithmScalar < CV_16S , 1 , nppiAddC_16s_C1RSfs > : : call , NppArithmScalar < CV_16S , 2 , nppiAddC_16sc_C1RSfs > : : call , NppArithmScalar < CV_16S , 3 , nppiAddC_16s_C3RSfs > : : call , NppArithmScalar < CV_16S , 4 , nppiAddC_16s_C4RSfs > : : call } ,
2012-03-26 19:02:03 +08:00
{ NppArithmScalar < CV_32S , 1 , nppiAddC_32s_C1RSfs > : : call , NppArithmScalar < CV_32S , 2 , nppiAddC_32sc_C1RSfs > : : call , NppArithmScalar < CV_32S , 3 , nppiAddC_32s_C3RSfs > : : call , 0 } ,
{ NppArithmScalar < CV_32F , 1 , nppiAddC_32f_C1R > : : call , NppArithmScalar < CV_32F , 2 , nppiAddC_32fc_C1R > : : call , NppArithmScalar < CV_32F , 3 , nppiAddC_32f_C3R > : : call , NppArithmScalar < CV_32F , 4 , nppiAddC_32f_C4R > : : call } ,
{ 0 , 0 , 0 , 0 }
2012-02-22 18:00:53 +08:00
} ;
2011-10-10 16:19:11 +08:00
if ( dtype < 0 )
dtype = src . depth ( ) ;
2012-03-26 19:02:03 +08:00
CV_Assert ( src . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
CV_Assert ( src . channels ( ) < = 4 ) ;
CV_Assert ( mask . empty ( ) | | ( src . channels ( ) = = 1 & & mask . size ( ) = = src . size ( ) & & mask . type ( ) = = CV_8U ) ) ;
if ( src . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2011-10-10 16:19:11 +08:00
dst . create ( src . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src . channels ( ) ) ) ;
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
2012-02-22 18:00:53 +08:00
if ( mask . empty ( ) & & dst . type ( ) = = src . type ( ) )
2010-12-20 17:07:19 +08:00
{
2012-02-22 18:00:53 +08:00
const npp_func_t npp_func = npp_funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ;
2010-12-20 17:07:19 +08:00
2012-02-22 18:00:53 +08:00
if ( npp_func )
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
npp_func ( src , sc , dst , stream ) ;
return ;
2011-10-10 16:19:11 +08:00
}
}
2011-01-24 18:32:57 +08:00
2012-02-22 18:00:53 +08:00
CV_Assert ( src . channels ( ) = = 1 ) ;
2011-10-10 16:19:11 +08:00
const func_t func = funcs [ src . depth ( ) ] [ dst . depth ( ) ] ;
2012-03-26 19:02:03 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2011-10-10 16:19:11 +08:00
func ( src , sc . val [ 0 ] , dst , mask , stream ) ;
2010-12-20 17:07:19 +08:00
}
2011-10-10 16:19:11 +08:00
////////////////////////////////////////////////////////////////////////
// subtract
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
2012-03-19 22:18:12 +08:00
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void subtract_gpu ( const PtrStepSzb & src1 , const PtrStepSzb & src2 , const PtrStepSzb & dst , const PtrStepb & mask , cudaStream_t stream ) ;
2011-11-09 21:13:52 +08:00
2012-03-19 22:18:12 +08:00
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void subtract_gpu ( const PtrStepSzb & src1 , double val , const PtrStepSzb & dst , const PtrStepb & mask , cudaStream_t stream ) ;
2011-11-14 17:02:06 +08:00
} } }
2011-09-13 22:15:18 +08:00
2011-10-10 16:19:11 +08:00
void cv : : gpu : : subtract ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , const GpuMat & mask , int dtype , Stream & s )
2010-12-20 17:07:19 +08:00
{
2012-03-26 19:02:03 +08:00
using namespace cv : : gpu : : device ;
2011-10-10 16:19:11 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , const PtrStepSzb & src2 , const PtrStepSzb & dst , const PtrStepb & mask , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
2011-09-13 22:15:18 +08:00
{
2012-03-26 19:02:03 +08:00
{ subtract_gpu < unsigned char , unsigned char > , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu < unsigned char , unsigned short > , subtract_gpu < unsigned char , short > , subtract_gpu < unsigned char , int > , subtract_gpu < unsigned char , float > , subtract_gpu < unsigned char , double > } ,
{ 0 /*subtract_gpu<signed char, unsigned char>*/ , 0 /*subtract_gpu<signed char, signed char>*/ , 0 /*subtract_gpu<signed char, unsigned short>*/ , 0 /*subtract_gpu<signed char, short>*/ , 0 /*subtract_gpu<signed char, int>*/ , 0 /*subtract_gpu<signed char, float>*/ , 0 /*subtract_gpu<signed char, double>*/ } ,
{ 0 /*subtract_gpu<unsigned short, unsigned char>*/ , 0 /*subtract_gpu<unsigned short, signed char>*/ , subtract_gpu < unsigned short , unsigned short > , 0 /*subtract_gpu<unsigned short, short>*/ , subtract_gpu < unsigned short , int > , subtract_gpu < unsigned short , float > , subtract_gpu < unsigned short , double > } ,
{ 0 /*subtract_gpu<short, unsigned char>*/ , 0 /*subtract_gpu<short, signed char>*/ , 0 /*subtract_gpu<short, unsigned short>*/ , subtract_gpu < short , short > , subtract_gpu < short , int > , subtract_gpu < short , float > , subtract_gpu < short , double > } ,
{ 0 /*subtract_gpu<int, unsigned char>*/ , 0 /*subtract_gpu<int, signed char>*/ , 0 /*subtract_gpu<int, unsigned short>*/ , 0 /*subtract_gpu<int, short>*/ , subtract_gpu < int , int > , subtract_gpu < int , float > , subtract_gpu < int , double > } ,
{ 0 /*subtract_gpu<float, unsigned char>*/ , 0 /*subtract_gpu<float, signed char>*/ , 0 /*subtract_gpu<float, unsigned short>*/ , 0 /*subtract_gpu<float, short>*/ , 0 /*subtract_gpu<float, int>*/ , subtract_gpu < float , float > , subtract_gpu < float , double > } ,
{ 0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu < double , double > }
2011-10-10 16:19:11 +08:00
} ;
2012-08-23 21:45:50 +08:00
typedef void ( * npp_func_t ) ( const PtrStepSzb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
static const npp_func_t npp_funcs [ 6 ] =
2012-02-29 21:02:25 +08:00
{
2012-03-26 19:02:03 +08:00
NppArithm < CV_8U , nppiSub_8u_C1RSfs > : : call ,
2012-02-29 21:02:25 +08:00
0 ,
NppArithm < CV_16U , nppiSub_16u_C1RSfs > : : call ,
NppArithm < CV_16S , nppiSub_16s_C1RSfs > : : call ,
NppArithm < CV_32S , nppiSub_32s_C1RSfs > : : call ,
2012-03-26 19:02:03 +08:00
NppArithm < CV_32F , nppiSub_32f_C1R > : : call
2012-02-29 21:02:25 +08:00
} ;
2012-03-26 19:02:03 +08:00
if ( dtype < 0 )
dtype = src1 . depth ( ) ;
CV_Assert ( src1 . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
2011-10-10 16:19:11 +08:00
CV_Assert ( src1 . type ( ) = = src2 . type ( ) & & src1 . size ( ) = = src2 . size ( ) ) ;
CV_Assert ( mask . empty ( ) | | ( src1 . channels ( ) = = 1 & & mask . size ( ) = = src1 . size ( ) & & mask . type ( ) = = CV_8U ) ) ;
2012-03-26 19:02:03 +08:00
if ( src1 . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2011-10-10 16:19:11 +08:00
dst . create ( src1 . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src1 . channels ( ) ) ) ;
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
2012-03-19 22:18:12 +08:00
if ( mask . empty ( ) & & dst . type ( ) = = src1 . type ( ) & & src1 . depth ( ) < = CV_32F )
2011-10-10 16:19:11 +08:00
{
2012-03-26 19:02:03 +08:00
npp_funcs [ src1 . depth ( ) ] ( src2 . reshape ( 1 ) , src1 . reshape ( 1 ) , dst . reshape ( 1 ) , stream ) ;
2012-02-29 21:02:25 +08:00
return ;
2011-09-13 22:15:18 +08:00
}
2011-10-10 16:19:11 +08:00
const func_t func = funcs [ src1 . depth ( ) ] [ dst . depth ( ) ] ;
2012-03-26 19:02:03 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2011-10-10 16:19:11 +08:00
func ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , mask , stream ) ;
2010-12-20 17:07:19 +08:00
}
2011-10-10 16:19:11 +08:00
void cv : : gpu : : subtract ( const GpuMat & src , const Scalar & sc , GpuMat & dst , const GpuMat & mask , int dtype , Stream & s )
2011-06-30 22:39:48 +08:00
{
2012-03-26 19:02:03 +08:00
using namespace cv : : gpu : : device ;
2011-06-30 22:39:48 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , double val , const PtrStepSzb & dst , const PtrStepb & mask , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
2011-06-30 22:39:48 +08:00
{
2012-03-26 19:02:03 +08:00
{ subtract_gpu < unsigned char , unsigned char > , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu < unsigned char , unsigned short > , subtract_gpu < unsigned char , short > , subtract_gpu < unsigned char , int > , subtract_gpu < unsigned char , float > , subtract_gpu < unsigned char , double > } ,
{ 0 /*subtract_gpu<signed char, unsigned char>*/ , 0 /*subtract_gpu<signed char, signed char>*/ , 0 /*subtract_gpu<signed char, unsigned short>*/ , 0 /*subtract_gpu<signed char, short>*/ , 0 /*subtract_gpu<signed char, int>*/ , 0 /*subtract_gpu<signed char, float>*/ , 0 /*subtract_gpu<signed char, double>*/ } ,
{ 0 /*subtract_gpu<unsigned short, unsigned char>*/ , 0 /*subtract_gpu<unsigned short, signed char>*/ , subtract_gpu < unsigned short , unsigned short > , 0 /*subtract_gpu<unsigned short, short>*/ , subtract_gpu < unsigned short , int > , subtract_gpu < unsigned short , float > , subtract_gpu < unsigned short , double > } ,
{ 0 /*subtract_gpu<short, unsigned char>*/ , 0 /*subtract_gpu<short, signed char>*/ , 0 /*subtract_gpu<short, unsigned short>*/ , subtract_gpu < short , short > , subtract_gpu < short , int > , subtract_gpu < short , float > , subtract_gpu < short , double > } ,
{ 0 /*subtract_gpu<int, unsigned char>*/ , 0 /*subtract_gpu<int, signed char>*/ , 0 /*subtract_gpu<int, unsigned short>*/ , 0 /*subtract_gpu<int, short>*/ , subtract_gpu < int , int > , subtract_gpu < int , float > , subtract_gpu < int , double > } ,
{ 0 /*subtract_gpu<float, unsigned char>*/ , 0 /*subtract_gpu<float, signed char>*/ , 0 /*subtract_gpu<float, unsigned short>*/ , 0 /*subtract_gpu<float, short>*/ , 0 /*subtract_gpu<float, int>*/ , subtract_gpu < float , float > , subtract_gpu < float , double > } ,
{ 0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu < double , double > }
2011-10-10 16:19:11 +08:00
} ;
2012-08-23 21:45:50 +08:00
typedef void ( * npp_func_t ) ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const npp_func_t npp_funcs [ 7 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
2012-03-26 19:02:03 +08:00
{ NppArithmScalar < CV_8U , 1 , nppiSubC_8u_C1RSfs > : : call , 0 , NppArithmScalar < CV_8U , 3 , nppiSubC_8u_C3RSfs > : : call , NppArithmScalar < CV_8U , 4 , nppiSubC_8u_C4RSfs > : : call } ,
{ 0 , 0 , 0 , 0 } ,
{ NppArithmScalar < CV_16U , 1 , nppiSubC_16u_C1RSfs > : : call , 0 , NppArithmScalar < CV_16U , 3 , nppiSubC_16u_C3RSfs > : : call , NppArithmScalar < CV_16U , 4 , nppiSubC_16u_C4RSfs > : : call } ,
2012-02-22 18:00:53 +08:00
{ NppArithmScalar < CV_16S , 1 , nppiSubC_16s_C1RSfs > : : call , NppArithmScalar < CV_16S , 2 , nppiSubC_16sc_C1RSfs > : : call , NppArithmScalar < CV_16S , 3 , nppiSubC_16s_C3RSfs > : : call , NppArithmScalar < CV_16S , 4 , nppiSubC_16s_C4RSfs > : : call } ,
2012-03-26 19:02:03 +08:00
{ NppArithmScalar < CV_32S , 1 , nppiSubC_32s_C1RSfs > : : call , NppArithmScalar < CV_32S , 2 , nppiSubC_32sc_C1RSfs > : : call , NppArithmScalar < CV_32S , 3 , nppiSubC_32s_C3RSfs > : : call , 0 } ,
{ NppArithmScalar < CV_32F , 1 , nppiSubC_32f_C1R > : : call , NppArithmScalar < CV_32F , 2 , nppiSubC_32fc_C1R > : : call , NppArithmScalar < CV_32F , 3 , nppiSubC_32f_C3R > : : call , NppArithmScalar < CV_32F , 4 , nppiSubC_32f_C4R > : : call } ,
{ 0 , 0 , 0 , 0 }
2012-02-22 18:00:53 +08:00
} ;
2011-10-10 16:19:11 +08:00
if ( dtype < 0 )
dtype = src . depth ( ) ;
2012-03-26 19:02:03 +08:00
CV_Assert ( src . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
CV_Assert ( src . channels ( ) < = 4 ) ;
CV_Assert ( mask . empty ( ) | | ( src . channels ( ) = = 1 & & mask . size ( ) = = src . size ( ) & & mask . type ( ) = = CV_8U ) ) ;
if ( src . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2011-10-10 16:19:11 +08:00
dst . create ( src . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src . channels ( ) ) ) ;
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
2012-02-22 18:00:53 +08:00
if ( mask . empty ( ) & & dst . type ( ) = = src . type ( ) )
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
const npp_func_t npp_func = npp_funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
if ( npp_func )
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
npp_func ( src , sc , dst , stream ) ;
return ;
2011-10-10 16:19:11 +08:00
}
2011-06-30 22:39:48 +08:00
}
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
CV_Assert ( src . channels ( ) = = 1 ) ;
2011-10-10 16:19:11 +08:00
const func_t func = funcs [ src . depth ( ) ] [ dst . depth ( ) ] ;
2012-03-26 19:02:03 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2011-10-10 16:19:11 +08:00
func ( src , sc . val [ 0 ] , dst , mask , stream ) ;
2010-12-20 17:07:19 +08:00
}
2011-10-10 16:19:11 +08:00
////////////////////////////////////////////////////////////////////////
// multiply
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
2012-08-23 21:45:50 +08:00
void multiply_gpu ( const PtrStepSz < uchar4 > & src1 , const PtrStepSzf & src2 , const PtrStepSz < uchar4 > & dst , cudaStream_t stream ) ;
void multiply_gpu ( const PtrStepSz < short4 > & src1 , const PtrStepSzf & src2 , const PtrStepSz < short4 > & dst , cudaStream_t stream ) ;
2011-09-10 19:56:55 +08:00
2012-03-19 22:18:12 +08:00
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void multiply_gpu ( const PtrStepSzb & src1 , const PtrStepSzb & src2 , const PtrStepSzb & dst , double scale , cudaStream_t stream ) ;
2011-10-10 16:19:11 +08:00
2012-03-19 22:18:12 +08:00
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void multiply_gpu ( const PtrStepSzb & src1 , double val , const PtrStepSzb & dst , double scale , cudaStream_t stream ) ;
2011-11-14 17:02:06 +08:00
} } }
2011-09-10 19:56:55 +08:00
2011-10-10 16:19:11 +08:00
void cv : : gpu : : multiply ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , double scale , int dtype , Stream & s )
2010-12-20 17:07:19 +08:00
{
2012-03-26 19:02:03 +08:00
using namespace cv : : gpu : : device ;
2012-02-29 21:02:25 +08:00
2011-10-10 16:19:11 +08:00
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
2011-09-12 16:45:56 +08:00
if ( src1 . type ( ) = = CV_8UC4 & & src2 . type ( ) = = CV_32FC1 )
2011-09-10 19:56:55 +08:00
{
CV_Assert ( src1 . size ( ) = = src2 . size ( ) ) ;
2011-09-12 16:45:56 +08:00
2011-09-10 19:56:55 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2011-09-12 16:45:56 +08:00
2012-08-23 21:45:50 +08:00
multiply_gpu ( static_cast < PtrStepSz < uchar4 > > ( src1 ) , static_cast < PtrStepSzf > ( src2 ) , static_cast < PtrStepSz < uchar4 > > ( dst ) , stream ) ;
2011-09-13 22:15:18 +08:00
}
else if ( src1 . type ( ) = = CV_16SC4 & & src2 . type ( ) = = CV_32FC1 )
{
CV_Assert ( src1 . size ( ) = = src2 . size ( ) ) ;
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-08-23 21:45:50 +08:00
multiply_gpu ( static_cast < PtrStepSz < short4 > > ( src1 ) , static_cast < PtrStepSzf > ( src2 ) , static_cast < PtrStepSz < short4 > > ( dst ) , stream ) ;
2011-09-10 19:56:55 +08:00
}
else
2011-10-10 16:19:11 +08:00
{
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , const PtrStepSzb & src2 , const PtrStepSzb & dst , double scale , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
{
{ multiply_gpu < unsigned char , unsigned char > , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu < unsigned char , unsigned short > , multiply_gpu < unsigned char , short > , multiply_gpu < unsigned char , int > , multiply_gpu < unsigned char , float > , multiply_gpu < unsigned char , double > } ,
{ 0 /*multiply_gpu<signed char, unsigned char>*/ , 0 /*multiply_gpu<signed char, signed char>*/ , 0 /*multiply_gpu<signed char, unsigned short>*/ , 0 /*multiply_gpu<signed char, short>*/ , 0 /*multiply_gpu<signed char, int>*/ , 0 /*multiply_gpu<signed char, float>*/ , 0 /*multiply_gpu<signed char, double>*/ } ,
{ 0 /*multiply_gpu<unsigned short, unsigned char>*/ , 0 /*multiply_gpu<unsigned short, signed char>*/ , multiply_gpu < unsigned short , unsigned short > , 0 /*multiply_gpu<unsigned short, short>*/ , multiply_gpu < unsigned short , int > , multiply_gpu < unsigned short , float > , multiply_gpu < unsigned short , double > } ,
{ 0 /*multiply_gpu<short, unsigned char>*/ , 0 /*multiply_gpu<short, signed char>*/ , 0 /*multiply_gpu<short, unsigned short>*/ , multiply_gpu < short , short > , multiply_gpu < short , int > , multiply_gpu < short , float > , multiply_gpu < short , double > } ,
{ 0 /*multiply_gpu<int, unsigned char>*/ , 0 /*multiply_gpu<int, signed char>*/ , 0 /*multiply_gpu<int, unsigned short>*/ , 0 /*multiply_gpu<int, short>*/ , multiply_gpu < int , int > , multiply_gpu < int , float > , multiply_gpu < int , double > } ,
{ 0 /*multiply_gpu<float, unsigned char>*/ , 0 /*multiply_gpu<float, signed char>*/ , 0 /*multiply_gpu<float, unsigned short>*/ , 0 /*multiply_gpu<float, short>*/ , 0 /*multiply_gpu<float, int>*/ , multiply_gpu < float , float > , multiply_gpu < float , double > } ,
{ 0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu < double , double > }
} ;
2012-08-23 21:45:50 +08:00
typedef void ( * npp_func_t ) ( const PtrStepSzb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
static const npp_func_t npp_funcs [ ] =
{
NppArithm < CV_8U , nppiMul_8u_C1RSfs > : : call ,
0 ,
NppArithm < CV_16U , nppiMul_16u_C1RSfs > : : call ,
NppArithm < CV_16S , nppiMul_16s_C1RSfs > : : call ,
NppArithm < CV_32S , nppiMul_32s_C1RSfs > : : call ,
NppArithm < CV_32F , nppiMul_32f_C1R > : : call
} ;
2011-10-10 16:19:11 +08:00
if ( dtype < 0 )
dtype = src1 . depth ( ) ;
2012-03-26 19:02:03 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
CV_Assert ( src1 . type ( ) = = src2 . type ( ) & & src1 . size ( ) = = src2 . size ( ) ) ;
if ( src1 . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2011-10-10 16:19:11 +08:00
dst . create ( src1 . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src1 . channels ( ) ) ) ;
2012-05-28 20:09:40 +08:00
# if (CUDA_VERSION <= 4020)
2012-03-26 19:02:03 +08:00
if ( scale = = 1 & & dst . type ( ) = = src1 . type ( ) & & src1 . depth ( ) < = CV_32F )
2012-05-28 20:09:40 +08:00
# else
if ( scale = = 1 & & dst . type ( ) = = src1 . type ( ) & & src1 . depth ( ) < = CV_32F & & src1 . depth ( ) > CV_8U )
# endif
2011-10-10 16:19:11 +08:00
{
2012-03-26 19:02:03 +08:00
npp_funcs [ src1 . depth ( ) ] ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , stream ) ;
2012-02-29 21:02:25 +08:00
return ;
2011-10-10 16:19:11 +08:00
}
const func_t func = funcs [ src1 . depth ( ) ] [ dst . depth ( ) ] ;
2012-03-26 19:02:03 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2011-10-10 16:19:11 +08:00
func ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , scale , stream ) ;
}
2010-12-20 17:07:19 +08:00
}
2012-02-22 18:00:53 +08:00
namespace
{
2012-02-22 19:22:31 +08:00
inline bool isIntScalar ( Scalar sc )
2012-02-22 18:00:53 +08:00
{
2012-02-22 19:22:31 +08:00
return sc . val [ 0 ] = = static_cast < int > ( sc . val [ 0 ] ) & & sc . val [ 1 ] = = static_cast < int > ( sc . val [ 1 ] ) & & sc . val [ 2 ] = = static_cast < int > ( sc . val [ 2 ] ) & & sc . val [ 3 ] = = static_cast < int > ( sc . val [ 3 ] ) ;
2012-02-22 18:00:53 +08:00
}
}
2011-10-10 16:19:11 +08:00
void cv : : gpu : : multiply ( const GpuMat & src , const Scalar & sc , GpuMat & dst , double scale , int dtype , Stream & s )
2011-09-10 19:56:55 +08:00
{
2012-03-26 19:02:03 +08:00
using namespace cv : : gpu : : device ;
2011-09-12 16:45:56 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , double val , const PtrStepSzb & dst , double scale , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
2011-09-10 19:56:55 +08:00
{
2012-03-26 19:02:03 +08:00
{ multiply_gpu < unsigned char , unsigned char > , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu < unsigned char , unsigned short > , multiply_gpu < unsigned char , short > , multiply_gpu < unsigned char , int > , multiply_gpu < unsigned char , float > , multiply_gpu < unsigned char , double > } ,
{ 0 /*multiply_gpu<signed char, unsigned char>*/ , 0 /*multiply_gpu<signed char, signed char>*/ , 0 /*multiply_gpu<signed char, unsigned short>*/ , 0 /*multiply_gpu<signed char, short>*/ , 0 /*multiply_gpu<signed char, int>*/ , 0 /*multiply_gpu<signed char, float>*/ , 0 /*multiply_gpu<signed char, double>*/ } ,
{ 0 /*multiply_gpu<unsigned short, unsigned char>*/ , 0 /*multiply_gpu<unsigned short, signed char>*/ , multiply_gpu < unsigned short , unsigned short > , 0 /*multiply_gpu<unsigned short, short>*/ , multiply_gpu < unsigned short , int > , multiply_gpu < unsigned short , float > , multiply_gpu < unsigned short , double > } ,
{ 0 /*multiply_gpu<short, unsigned char>*/ , 0 /*multiply_gpu<short, signed char>*/ , 0 /*multiply_gpu<short, unsigned short>*/ , multiply_gpu < short , short > , multiply_gpu < short , int > , multiply_gpu < short , float > , multiply_gpu < short , double > } ,
{ 0 /*multiply_gpu<int, unsigned char>*/ , 0 /*multiply_gpu<int, signed char>*/ , 0 /*multiply_gpu<int, unsigned short>*/ , 0 /*multiply_gpu<int, short>*/ , multiply_gpu < int , int > , multiply_gpu < int , float > , multiply_gpu < int , double > } ,
{ 0 /*multiply_gpu<float, unsigned char>*/ , 0 /*multiply_gpu<float, signed char>*/ , 0 /*multiply_gpu<float, unsigned short>*/ , 0 /*multiply_gpu<float, short>*/ , 0 /*multiply_gpu<float, int>*/ , multiply_gpu < float , float > , multiply_gpu < float , double > } ,
{ 0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu < double , double > }
2011-10-10 16:19:11 +08:00
} ;
2012-08-23 21:45:50 +08:00
typedef void ( * npp_func_t ) ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const npp_func_t npp_funcs [ 7 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
2012-03-26 19:02:03 +08:00
{ NppArithmScalar < CV_8U , 1 , nppiMulC_8u_C1RSfs > : : call , 0 , NppArithmScalar < CV_8U , 3 , nppiMulC_8u_C3RSfs > : : call , NppArithmScalar < CV_8U , 4 , nppiMulC_8u_C4RSfs > : : call } ,
{ 0 , 0 , 0 , 0 } ,
2012-02-22 18:00:53 +08:00
{ NppArithmScalar < CV_16U , 1 , nppiMulC_16u_C1RSfs > : : call , 0 , NppArithmScalar < CV_16U , 3 , nppiMulC_16u_C3RSfs > : : call , NppArithmScalar < CV_16U , 4 , nppiMulC_16u_C4RSfs > : : call } ,
{ NppArithmScalar < CV_16S , 1 , nppiMulC_16s_C1RSfs > : : call , 0 , NppArithmScalar < CV_16S , 3 , nppiMulC_16s_C3RSfs > : : call , NppArithmScalar < CV_16S , 4 , nppiMulC_16s_C4RSfs > : : call } ,
2012-03-26 19:02:03 +08:00
{ NppArithmScalar < CV_32S , 1 , nppiMulC_32s_C1RSfs > : : call , 0 , NppArithmScalar < CV_32S , 3 , nppiMulC_32s_C3RSfs > : : call , 0 } ,
{ NppArithmScalar < CV_32F , 1 , nppiMulC_32f_C1R > : : call , 0 , NppArithmScalar < CV_32F , 3 , nppiMulC_32f_C3R > : : call , NppArithmScalar < CV_32F , 4 , nppiMulC_32f_C4R > : : call } ,
{ 0 , 0 , 0 , 0 }
2012-02-22 18:00:53 +08:00
} ;
2011-10-10 16:19:11 +08:00
if ( dtype < 0 )
dtype = src . depth ( ) ;
2011-09-10 19:56:55 +08:00
2012-03-26 19:02:03 +08:00
CV_Assert ( src . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
CV_Assert ( src . channels ( ) < = 4 ) ;
if ( src . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2011-10-10 16:19:11 +08:00
dst . create ( src . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src . channels ( ) ) ) ;
2011-09-10 19:56:55 +08:00
2011-10-10 16:19:11 +08:00
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
2012-03-26 19:02:03 +08:00
if ( dst . type ( ) = = src . type ( ) & & scale = = 1 & & ( src . depth ( ) = = CV_32F | | isIntScalar ( sc ) ) )
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
const npp_func_t npp_func = npp_funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ;
2011-09-10 19:56:55 +08:00
2012-03-26 19:02:03 +08:00
if ( npp_func )
2012-02-22 18:00:53 +08:00
{
npp_func ( src , sc , dst , stream ) ;
return ;
}
2011-09-10 19:56:55 +08:00
}
2011-10-10 16:19:11 +08:00
2012-03-26 19:02:03 +08:00
CV_Assert ( src . channels ( ) = = 1 ) ;
2011-10-10 16:19:11 +08:00
const func_t func = funcs [ src . depth ( ) ] [ dst . depth ( ) ] ;
2012-02-22 18:00:53 +08:00
2012-03-26 19:02:03 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2011-10-10 16:19:11 +08:00
2012-03-26 19:02:03 +08:00
func ( src , sc . val [ 0 ] , dst , scale , stream ) ;
2011-09-10 19:56:55 +08:00
}
2011-10-10 16:19:11 +08:00
////////////////////////////////////////////////////////////////////////
// divide
2011-09-10 19:56:55 +08:00
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
2012-08-23 21:45:50 +08:00
void divide_gpu ( const PtrStepSz < uchar4 > & src1 , const PtrStepSzf & src2 , const PtrStepSz < uchar4 > & dst , cudaStream_t stream ) ;
void divide_gpu ( const PtrStepSz < short4 > & src1 , const PtrStepSzf & src2 , const PtrStepSz < short4 > & dst , cudaStream_t stream ) ;
2011-11-09 21:13:52 +08:00
2012-03-19 22:18:12 +08:00
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void divide_gpu ( const PtrStepSzb & src1 , const PtrStepSzb & src2 , const PtrStepSzb & dst , double scale , cudaStream_t stream ) ;
2011-11-09 21:13:52 +08:00
2012-03-19 22:18:12 +08:00
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void divide_gpu ( const PtrStepSzb & src1 , double val , const PtrStepSzb & dst , double scale , cudaStream_t stream ) ;
2011-11-09 21:13:52 +08:00
2012-03-19 22:18:12 +08:00
template < typename T , typename D >
2012-08-23 21:45:50 +08:00
void divide_gpu ( double scalar , const PtrStepSzb & src2 , const PtrStepSzb & dst , cudaStream_t stream ) ;
2011-11-14 17:02:06 +08:00
} } }
2011-09-13 22:15:18 +08:00
2011-10-10 16:19:11 +08:00
void cv : : gpu : : divide ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , double scale , int dtype , Stream & s )
2010-12-20 17:07:19 +08:00
{
2012-03-26 19:02:03 +08:00
using namespace cv : : gpu : : device ;
2012-02-29 21:02:25 +08:00
2011-10-10 16:19:11 +08:00
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
2011-09-13 22:15:18 +08:00
if ( src1 . type ( ) = = CV_8UC4 & & src2 . type ( ) = = CV_32FC1 )
{
CV_Assert ( src1 . size ( ) = = src2 . size ( ) ) ;
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-08-23 21:45:50 +08:00
divide_gpu ( static_cast < PtrStepSz < uchar4 > > ( src1 ) , static_cast < PtrStepSzf > ( src2 ) , static_cast < PtrStepSz < uchar4 > > ( dst ) , stream ) ;
2011-09-13 22:15:18 +08:00
}
else if ( src1 . type ( ) = = CV_16SC4 & & src2 . type ( ) = = CV_32FC1 )
{
CV_Assert ( src1 . size ( ) = = src2 . size ( ) ) ;
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-08-23 21:45:50 +08:00
divide_gpu ( static_cast < PtrStepSz < short4 > > ( src1 ) , static_cast < PtrStepSzf > ( src2 ) , static_cast < PtrStepSz < short4 > > ( dst ) , stream ) ;
2011-09-13 22:15:18 +08:00
}
else
2012-03-19 22:18:12 +08:00
{
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , const PtrStepSzb & src2 , const PtrStepSzb & dst , double scale , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
{
{ divide_gpu < unsigned char , unsigned char > , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu < unsigned char , unsigned short > , divide_gpu < unsigned char , short > , divide_gpu < unsigned char , int > , divide_gpu < unsigned char , float > , divide_gpu < unsigned char , double > } ,
{ 0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/ , 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/ , 0 /*divide_gpu<signed char, float>*/ , 0 /*divide_gpu<signed char, double>*/ } ,
{ 0 /*divide_gpu<unsigned short, unsigned char>*/ , 0 /*divide_gpu<unsigned short, signed char>*/ , divide_gpu < unsigned short , unsigned short > , 0 /*divide_gpu<unsigned short, short>*/ , divide_gpu < unsigned short , int > , divide_gpu < unsigned short , float > , divide_gpu < unsigned short , double > } ,
{ 0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu < short , short > , divide_gpu < short , int > , divide_gpu < short , float > , divide_gpu < short , double > } ,
{ 0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu < int , int > , divide_gpu < int , float > , divide_gpu < int , double > } ,
{ 0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu < float , float > , divide_gpu < float , double > } ,
{ 0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu < double , double > }
} ;
2012-08-23 21:45:50 +08:00
typedef void ( * npp_func_t ) ( const PtrStepSzb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream ) ;
2012-03-26 19:02:03 +08:00
static const npp_func_t npp_funcs [ 6 ] =
{
NppArithm < CV_8U , nppiDiv_8u_C1RSfs > : : call ,
0 ,
NppArithm < CV_16U , nppiDiv_16u_C1RSfs > : : call ,
NppArithm < CV_16S , nppiDiv_16s_C1RSfs > : : call ,
NppArithm < CV_32S , nppiDiv_32s_C1RSfs > : : call ,
NppArithm < CV_32F , nppiDiv_32f_C1R > : : call
} ;
2010-12-20 17:07:19 +08:00
2011-10-10 16:19:11 +08:00
if ( dtype < 0 )
dtype = src1 . depth ( ) ;
2010-12-20 17:07:19 +08:00
2012-03-26 19:02:03 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
CV_Assert ( src1 . type ( ) = = src2 . type ( ) & & src1 . size ( ) = = src2 . size ( ) ) ;
if ( src1 . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2011-10-10 16:19:11 +08:00
dst . create ( src1 . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src1 . channels ( ) ) ) ;
2010-12-20 17:07:19 +08:00
2012-03-19 22:18:12 +08:00
if ( scale = = 1 & & dst . type ( ) = = src1 . type ( ) & & src1 . depth ( ) < = CV_32F )
2011-10-10 16:19:11 +08:00
{
2012-03-26 19:02:03 +08:00
npp_funcs [ src1 . depth ( ) ] ( src2 . reshape ( 1 ) , src1 . reshape ( 1 ) , dst . reshape ( 1 ) , stream ) ;
2012-02-29 21:02:25 +08:00
return ;
2011-10-10 16:19:11 +08:00
}
const func_t func = funcs [ src1 . depth ( ) ] [ dst . depth ( ) ] ;
2012-03-26 19:02:03 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2011-10-10 16:19:11 +08:00
func ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , scale , stream ) ;
}
2010-12-20 17:07:19 +08:00
}
2011-10-10 16:19:11 +08:00
void cv : : gpu : : divide ( const GpuMat & src , const Scalar & sc , GpuMat & dst , double scale , int dtype , Stream & s )
2010-12-20 17:07:19 +08:00
{
2012-03-26 19:02:03 +08:00
using namespace cv : : gpu : : device ;
2011-10-10 16:19:11 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , double val , const PtrStepSzb & dst , double scale , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
2011-10-10 16:19:11 +08:00
{
2012-03-26 19:02:03 +08:00
{ divide_gpu < unsigned char , unsigned char > , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu < unsigned char , unsigned short > , divide_gpu < unsigned char , short > , divide_gpu < unsigned char , int > , divide_gpu < unsigned char , float > , divide_gpu < unsigned char , double > } ,
{ 0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/ , 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/ , 0 /*divide_gpu<signed char, float>*/ , 0 /*divide_gpu<signed char, double>*/ } ,
{ 0 /*divide_gpu<unsigned short, unsigned char>*/ , 0 /*divide_gpu<unsigned short, signed char>*/ , divide_gpu < unsigned short , unsigned short > , 0 /*divide_gpu<unsigned short, short>*/ , divide_gpu < unsigned short , int > , divide_gpu < unsigned short , float > , divide_gpu < unsigned short , double > } ,
{ 0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu < short , short > , divide_gpu < short , int > , divide_gpu < short , float > , divide_gpu < short , double > } ,
{ 0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu < int , int > , divide_gpu < int , float > , divide_gpu < int , double > } ,
{ 0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu < float , float > , divide_gpu < float , double > } ,
{ 0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu < double , double > }
2011-10-10 16:19:11 +08:00
} ;
2012-08-23 21:45:50 +08:00
typedef void ( * npp_func_t ) ( const PtrStepSzb src , Scalar sc , PtrStepb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const npp_func_t npp_funcs [ 7 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
2012-03-26 19:02:03 +08:00
{ NppArithmScalar < CV_8U , 1 , nppiDivC_8u_C1RSfs > : : call , 0 , NppArithmScalar < CV_8U , 3 , nppiDivC_8u_C3RSfs > : : call , NppArithmScalar < CV_8U , 4 , nppiDivC_8u_C4RSfs > : : call } ,
{ 0 , 0 , 0 , 0 } ,
2012-02-22 18:00:53 +08:00
{ NppArithmScalar < CV_16U , 1 , nppiDivC_16u_C1RSfs > : : call , 0 , NppArithmScalar < CV_16U , 3 , nppiDivC_16u_C3RSfs > : : call , NppArithmScalar < CV_16U , 4 , nppiDivC_16u_C4RSfs > : : call } ,
{ NppArithmScalar < CV_16S , 1 , nppiDivC_16s_C1RSfs > : : call , 0 , NppArithmScalar < CV_16S , 3 , nppiDivC_16s_C3RSfs > : : call , NppArithmScalar < CV_16S , 4 , nppiDivC_16s_C4RSfs > : : call } ,
2012-03-26 19:02:03 +08:00
{ NppArithmScalar < CV_32S , 1 , nppiDivC_32s_C1RSfs > : : call , 0 , NppArithmScalar < CV_32S , 3 , nppiDivC_32s_C3RSfs > : : call , 0 } ,
{ NppArithmScalar < CV_32F , 1 , nppiDivC_32f_C1R > : : call , 0 , NppArithmScalar < CV_32F , 3 , nppiDivC_32f_C3R > : : call , NppArithmScalar < CV_32F , 4 , nppiDivC_32f_C4R > : : call } ,
{ 0 , 0 , 0 , 0 }
2012-02-22 18:00:53 +08:00
} ;
2011-10-10 16:19:11 +08:00
if ( dtype < 0 )
dtype = src . depth ( ) ;
2012-03-26 19:02:03 +08:00
CV_Assert ( src . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
CV_Assert ( src . channels ( ) < = 4 ) ;
if ( src . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2011-10-10 16:19:11 +08:00
dst . create ( src . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src . channels ( ) ) ) ;
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
2012-03-26 19:02:03 +08:00
if ( dst . type ( ) = = src . type ( ) & & scale = = 1 & & ( src . depth ( ) = = CV_32F | | isIntScalar ( sc ) ) )
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
const npp_func_t npp_func = npp_funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ;
2010-12-20 17:07:19 +08:00
2012-03-26 19:02:03 +08:00
if ( npp_func )
2012-02-22 18:00:53 +08:00
{
npp_func ( src , sc , dst , stream ) ;
return ;
}
2011-10-10 16:19:11 +08:00
}
2012-03-26 19:02:03 +08:00
CV_Assert ( src . channels ( ) = = 1 ) ;
2011-10-10 16:19:11 +08:00
const func_t func = funcs [ src . depth ( ) ] [ dst . depth ( ) ] ;
2012-02-22 18:00:53 +08:00
2012-03-26 19:02:03 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2010-12-20 17:07:19 +08:00
2012-03-26 19:02:03 +08:00
func ( src , sc . val [ 0 ] , dst , scale , stream ) ;
2010-12-20 17:07:19 +08:00
}
2011-10-10 16:19:11 +08:00
void cv : : gpu : : divide ( double scale , const GpuMat & src , GpuMat & dst , int dtype , Stream & s )
2010-12-20 17:07:19 +08:00
{
2012-03-26 19:02:03 +08:00
using namespace cv : : gpu : : device ;
2010-12-20 17:07:19 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( double scalar , const PtrStepSzb & src2 , const PtrStepSzb & dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 7 ] [ 7 ] =
2011-10-10 16:19:11 +08:00
{
2012-03-26 19:02:03 +08:00
{ divide_gpu < unsigned char , unsigned char > , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu < unsigned char , unsigned short > , divide_gpu < unsigned char , short > , divide_gpu < unsigned char , int > , divide_gpu < unsigned char , float > , divide_gpu < unsigned char , double > } ,
{ 0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/ , 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/ , 0 /*divide_gpu<signed char, float>*/ , 0 /*divide_gpu<signed char, double>*/ } ,
{ 0 /*divide_gpu<unsigned short, unsigned char>*/ , 0 /*divide_gpu<unsigned short, signed char>*/ , divide_gpu < unsigned short , unsigned short > , 0 /*divide_gpu<unsigned short, short>*/ , divide_gpu < unsigned short , int > , divide_gpu < unsigned short , float > , divide_gpu < unsigned short , double > } ,
{ 0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu < short , short > , divide_gpu < short , int > , divide_gpu < short , float > , divide_gpu < short , double > } ,
{ 0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu < int , int > , divide_gpu < int , float > , divide_gpu < int , double > } ,
{ 0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu < float , float > , divide_gpu < float , double > } ,
{ 0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu < double , double > }
2011-10-10 16:19:11 +08:00
} ;
2011-06-29 18:14:16 +08:00
2011-10-10 16:19:11 +08:00
if ( dtype < 0 )
dtype = src . depth ( ) ;
2011-06-29 18:14:16 +08:00
2012-03-26 19:02:03 +08:00
CV_Assert ( src . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
CV_Assert ( src . channels ( ) = = 1 ) ;
if ( src . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2012-02-22 18:00:53 +08:00
dst . create ( src . size ( ) , CV_MAKE_TYPE ( CV_MAT_DEPTH ( dtype ) , src . channels ( ) ) ) ;
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
const func_t func = funcs [ src . depth ( ) ] [ dst . depth ( ) ] ;
2012-03-26 19:02:03 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2012-02-22 18:00:53 +08:00
func ( scale , src , dst , stream ) ;
}
//////////////////////////////////////////////////////////////////////////////
// absdiff
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2012-02-22 18:00:53 +08:00
{
template < typename T >
2012-08-23 21:45:50 +08:00
void absdiff_gpu ( const PtrStepSzb src1 , const PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
2012-02-22 18:00:53 +08:00
2012-03-19 22:18:12 +08:00
template < typename T >
2012-08-23 21:45:50 +08:00
void absdiff_gpu ( const PtrStepSzb src1 , double val , PtrStepSzb dst , cudaStream_t stream ) ;
2012-02-22 18:00:53 +08:00
} } }
2012-03-26 22:33:43 +08:00
namespace
2012-02-22 18:00:53 +08:00
{
2012-03-26 22:33:43 +08:00
template < int DEPTH > struct NppAbsDiffFunc
2012-02-22 18:00:53 +08:00
{
2012-03-26 22:33:43 +08:00
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
2012-02-22 18:00:53 +08:00
2012-03-26 22:33:43 +08:00
typedef NppStatus ( * func_t ) ( const npp_t * src1 , int src1_step , const npp_t * src2 , int src2_step , npp_t * dst , int dst_step , NppiSize sz ) ;
} ;
2012-02-22 18:00:53 +08:00
2012-03-26 22:33:43 +08:00
template < int DEPTH , typename NppAbsDiffFunc < DEPTH > : : func_t func > struct NppAbsDiff
2012-02-22 18:00:53 +08:00
{
2012-03-26 22:33:43 +08:00
typedef typename NppAbsDiffFunc < DEPTH > : : npp_t npp_t ;
2012-02-22 18:00:53 +08:00
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src1 , const PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream )
2012-03-26 22:33:43 +08:00
{
NppStreamHandler h ( stream ) ;
2012-02-22 18:00:53 +08:00
2012-03-26 22:33:43 +08:00
NppiSize sz ;
sz . width = src1 . cols ;
sz . height = src1 . rows ;
2012-02-22 18:00:53 +08:00
2012-03-26 22:33:43 +08:00
nppSafeCall ( func ( ( const npp_t * ) src1 . data , static_cast < int > ( src1 . step ) , ( const npp_t * ) src2 . data , static_cast < int > ( src2 . step ) ,
( npp_t * ) dst . data , static_cast < int > ( dst . step ) , sz ) ) ;
2012-02-22 18:00:53 +08:00
2012-03-26 22:33:43 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
2012-02-22 18:00:53 +08:00
template < int DEPTH > struct NppAbsDiffCFunc
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
2012-03-26 22:33:43 +08:00
typedef npp_t scalar_t ;
2012-02-22 18:00:53 +08:00
typedef NppStatus ( * func_t ) ( const npp_t * pSrc1 , int nSrc1Step , npp_t * pDst , int nDstStep , NppiSize oSizeROI , npp_t nConstant ) ;
} ;
template < > struct NppAbsDiffCFunc < CV_16U >
{
2012-03-26 22:33:43 +08:00
typedef NppTypeTraits < CV_16U > : : npp_t npp_t ;
typedef Npp32u scalar_t ;
2012-05-25 17:44:00 +08:00
# if (CUDA_VERSION <= 4020)
2012-02-22 18:00:53 +08:00
typedef NppStatus ( * func_t ) ( const Npp16u * pSrc1 , int nSrc1Step , Npp16u * pDst , int nDstStep , NppiSize oSizeROI , Npp32u nConstant ) ;
2012-05-25 17:44:00 +08:00
# else
typedef NppStatus ( * func_t ) ( const Npp16u * pSrc1 , int nSrc1Step , Npp16u * pDst , int nDstStep , NppiSize oSizeROI , Npp16u nConstant ) ;
# endif
2012-02-22 18:00:53 +08:00
} ;
template < int DEPTH , typename NppAbsDiffCFunc < DEPTH > : : func_t func > struct NppAbsDiffC
{
2012-03-26 22:33:43 +08:00
typedef typename NppAbsDiffCFunc < DEPTH > : : npp_t npp_t ;
typedef typename NppAbsDiffCFunc < DEPTH > : : scalar_t scalar_t ;
2012-02-22 18:00:53 +08:00
2012-08-23 21:45:50 +08:00
static void call ( const PtrStepSzb src1 , double val , PtrStepSzb dst , cudaStream_t stream )
2012-02-22 18:00:53 +08:00
{
NppStreamHandler h ( stream ) ;
NppiSize sz ;
sz . width = src1 . cols ;
sz . height = src1 . rows ;
2012-03-26 22:33:43 +08:00
nppSafeCall ( func ( ( const npp_t * ) src1 . data , static_cast < int > ( src1 . step ) ,
( npp_t * ) dst . data , static_cast < int > ( dst . step ) , sz , static_cast < scalar_t > ( val ) ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
}
2012-03-26 22:33:43 +08:00
void cv : : gpu : : absdiff ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , Stream & stream )
2012-02-22 18:00:53 +08:00
{
using namespace cv : : gpu : : device ;
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb src1 , const PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
2012-03-26 22:33:43 +08:00
static const func_t funcs [ ] =
{
NppAbsDiff < CV_8U , nppiAbsDiff_8u_C1R > : : call ,
absdiff_gpu < signed char > ,
NppAbsDiff < CV_16U , nppiAbsDiff_16u_C1R > : : call ,
absdiff_gpu < short > ,
absdiff_gpu < int > ,
NppAbsDiff < CV_32F , nppiAbsDiff_32f_C1R > : : call ,
absdiff_gpu < double >
} ;
CV_Assert ( src1 . depth ( ) < = CV_64F ) ;
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
if ( src1 . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
funcs [ src1 . depth ( ) ] ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , StreamAccessor : : getStream ( stream ) ) ;
}
2012-02-22 18:00:53 +08:00
2012-03-26 22:33:43 +08:00
void cv : : gpu : : absdiff ( const GpuMat & src1 , const Scalar & src2 , GpuMat & dst , Stream & stream )
{
using namespace cv : : gpu : : device ;
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb src1 , double val , PtrStepSzb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2012-02-22 18:00:53 +08:00
{
2012-03-19 22:18:12 +08:00
NppAbsDiffC < CV_8U , nppiAbsDiffC_8u_C1R > : : call ,
absdiff_gpu < signed char > ,
NppAbsDiffC < CV_16U , nppiAbsDiffC_16u_C1R > : : call ,
2012-02-22 18:00:53 +08:00
absdiff_gpu < short > ,
2012-03-19 22:18:12 +08:00
absdiff_gpu < int > ,
NppAbsDiffC < CV_32F , nppiAbsDiffC_32f_C1R > : : call ,
2012-02-22 18:00:53 +08:00
absdiff_gpu < double >
} ;
2012-03-26 22:33:43 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F ) ;
2012-02-22 18:00:53 +08:00
CV_Assert ( src1 . channels ( ) = = 1 ) ;
2012-03-26 22:33:43 +08:00
if ( src1 . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2012-02-22 18:00:53 +08:00
2012-03-26 22:33:43 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-02-22 18:00:53 +08:00
2012-03-26 22:33:43 +08:00
funcs [ src1 . depth ( ) ] ( src1 , src2 . val [ 0 ] , dst , StreamAccessor : : getStream ( stream ) ) ;
2012-02-22 18:00:53 +08:00
}
//////////////////////////////////////////////////////////////////////////////
// abs
void cv : : gpu : : abs ( const GpuMat & src , GpuMat & dst , Stream & s )
{
CV_Assert ( src . depth ( ) = = CV_16S | | src . depth ( ) = = CV_32F ) ;
dst . create ( src . size ( ) , src . type ( ) ) ;
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
NppStreamHandler h ( stream ) ;
NppiSize oSizeROI ;
oSizeROI . width = src . cols * src . channels ( ) ;
oSizeROI . height = src . rows ;
bool aligned = isAligned ( src . data , 16 ) & & isAligned ( dst . data , 16 ) ;
if ( src . depth ( ) = = CV_16S )
{
if ( aligned & & oSizeROI . width % 4 = = 0 )
{
oSizeROI . width / = 4 ;
nppSafeCall ( nppiAbs_16s_C4R ( src . ptr < Npp16s > ( ) , static_cast < int > ( src . step ) , dst . ptr < Npp16s > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
}
else
{
nppSafeCall ( nppiAbs_16s_C1R ( src . ptr < Npp16s > ( ) , static_cast < int > ( src . step ) , dst . ptr < Npp16s > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
}
}
else
{
if ( aligned & & oSizeROI . width % 4 = = 0 )
{
oSizeROI . width / = 4 ;
nppSafeCall ( nppiAbs_32f_C4R ( src . ptr < Npp32f > ( ) , static_cast < int > ( src . step ) , dst . ptr < Npp32f > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
}
else
{
nppSafeCall ( nppiAbs_32f_C1R ( src . ptr < Npp32f > ( ) , static_cast < int > ( src . step ) , dst . ptr < Npp32f > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
}
}
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
//////////////////////////////////////////////////////////////////////////////
// sqr
namespace
{
template < int DEPTH > struct NppSqrFunc
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
typedef NppStatus ( * func_t ) ( const npp_t * pSrc , int nSrcStep , npp_t * pDst , int nDstStep , NppiSize oSizeROI , int nScaleFactor ) ;
} ;
template < > struct NppSqrFunc < CV_32F >
{
typedef NppTypeTraits < CV_32F > : : npp_t npp_t ;
typedef NppStatus ( * func_t ) ( const npp_t * pSrc , int nSrcStep , npp_t * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
template < int DEPTH , typename NppSqrFunc < DEPTH > : : func_t func , typename NppSqrFunc < DEPTH > : : func_t func_c4 > struct NppSqr
{
typedef typename NppSqrFunc < DEPTH > : : npp_t npp_t ;
static void call ( const GpuMat & src , GpuMat & dst , cudaStream_t stream )
{
NppStreamHandler h ( stream ) ;
NppiSize oSizeROI ;
oSizeROI . width = src . cols * src . channels ( ) ;
oSizeROI . height = src . rows ;
bool aligned = isAligned ( src . data , 16 ) & & isAligned ( dst . data , 16 ) ;
if ( aligned & & oSizeROI . width % 4 = = 0 )
{
oSizeROI . width / = 4 ;
nppSafeCall ( func_c4 ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI , 0 ) ) ;
}
else
{
nppSafeCall ( func ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI , 0 ) ) ;
}
2011-06-29 18:14:16 +08:00
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
template < typename NppSqrFunc < CV_32F > : : func_t func , typename NppSqrFunc < CV_32F > : : func_t func_c4 > struct NppSqr < CV_32F , func , func_c4 >
{
typedef NppSqrFunc < CV_32F > : : npp_t npp_t ;
2010-12-20 17:07:19 +08:00
2012-02-22 18:00:53 +08:00
static void call ( const GpuMat & src , GpuMat & dst , cudaStream_t stream )
{
NppStreamHandler h ( stream ) ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
NppiSize oSizeROI ;
oSizeROI . width = src . cols * src . channels ( ) ;
oSizeROI . height = src . rows ;
2010-12-20 17:07:19 +08:00
2012-02-22 18:00:53 +08:00
bool aligned = isAligned ( src . data , 16 ) & & isAligned ( dst . data , 16 ) ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
if ( aligned & & oSizeROI . width % 4 = = 0 )
{
oSizeROI . width / = 4 ;
nppSafeCall ( func_c4 ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
}
else
{
nppSafeCall ( func ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
}
2011-11-09 21:13:52 +08:00
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
}
2010-12-20 17:07:19 +08:00
2012-02-22 18:00:53 +08:00
void cv : : gpu : : sqr ( const GpuMat & src , GpuMat & dst , Stream & stream )
2010-12-20 17:07:19 +08:00
{
2012-02-22 18:00:53 +08:00
typedef void ( * func_t ) ( const GpuMat & src , GpuMat & dst , cudaStream_t stream ) ;
2010-12-20 17:07:19 +08:00
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
NppSqr < CV_8U , nppiSqr_8u_C1RSfs , nppiSqr_8u_C4RSfs > : : call ,
0 ,
NppSqr < CV_16U , nppiSqr_16u_C1RSfs , nppiSqr_16u_C4RSfs > : : call ,
NppSqr < CV_16S , nppiSqr_16s_C1RSfs , nppiSqr_16s_C4RSfs > : : call ,
0 ,
NppSqr < CV_32F , nppiSqr_32f_C1R , nppiSqr_32f_C4R > : : call
2011-10-10 16:19:11 +08:00
} ;
2012-02-22 18:00:53 +08:00
CV_Assert ( src . depth ( ) = = CV_8U | | src . depth ( ) = = CV_16U | | src . depth ( ) = = CV_16S | | src . depth ( ) = = CV_32F ) ;
2010-12-20 17:07:19 +08:00
2012-02-22 18:00:53 +08:00
dst . create ( src . size ( ) , src . type ( ) ) ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
funcs [ src . depth ( ) ] ( src , dst , StreamAccessor : : getStream ( stream ) ) ;
}
2010-12-20 17:07:19 +08:00
2012-02-22 18:00:53 +08:00
//////////////////////////////////////////////////////////////////////////////
// sqrt
2011-12-21 13:59:14 +08:00
2012-02-22 18:00:53 +08:00
namespace
{
template < int DEPTH > struct NppOneSourceFunc
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
typedef NppStatus ( * func_t ) ( const npp_t * pSrc , int nSrcStep , npp_t * pDst , int nDstStep , NppiSize oSizeROI , int nScaleFactor ) ;
} ;
template < > struct NppOneSourceFunc < CV_32F >
{
typedef NppTypeTraits < CV_32F > : : npp_t npp_t ;
2011-05-31 16:31:10 +08:00
2012-02-22 18:00:53 +08:00
typedef NppStatus ( * func_t ) ( const npp_t * pSrc , int nSrcStep , npp_t * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
2011-05-31 16:31:10 +08:00
2012-02-22 18:00:53 +08:00
template < int DEPTH , typename NppOneSourceFunc < DEPTH > : : func_t func > struct NppOneSource
2010-12-20 17:07:19 +08:00
{
2012-02-22 18:00:53 +08:00
typedef typename NppOneSourceFunc < DEPTH > : : npp_t npp_t ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
static void call ( const GpuMat & src , GpuMat & dst , cudaStream_t stream )
2012-01-30 21:15:20 +08:00
{
NppStreamHandler h ( stream ) ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
NppiSize oSizeROI ;
oSizeROI . width = src . cols * src . channels ( ) ;
oSizeROI . height = src . rows ;
nppSafeCall ( func ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI , 0 ) ) ;
2011-10-10 16:19:11 +08:00
2012-01-30 21:15:20 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
2012-02-22 18:00:53 +08:00
} ;
template < typename NppOneSourceFunc < CV_32F > : : func_t func > struct NppOneSource < CV_32F , func >
{
typedef NppOneSourceFunc < CV_32F > : : npp_t npp_t ;
static void call ( const GpuMat & src , GpuMat & dst , cudaStream_t stream )
2012-01-30 21:15:20 +08:00
{
NppStreamHandler h ( stream ) ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
NppiSize oSizeROI ;
oSizeROI . width = src . cols * src . channels ( ) ;
oSizeROI . height = src . rows ;
nppSafeCall ( func ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
2011-10-10 16:19:11 +08:00
2012-01-30 21:15:20 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
2012-02-22 18:00:53 +08:00
} ;
2010-12-20 17:07:19 +08:00
}
2012-02-22 18:00:53 +08:00
void cv : : gpu : : sqrt ( const GpuMat & src , GpuMat & dst , Stream & stream )
2010-12-20 17:07:19 +08:00
{
2012-02-22 18:00:53 +08:00
typedef void ( * func_t ) ( const GpuMat & src , GpuMat & dst , cudaStream_t stream ) ;
2010-12-20 17:07:19 +08:00
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
NppOneSource < CV_8U , nppiSqrt_8u_C1RSfs > : : call ,
0 ,
NppOneSource < CV_16U , nppiSqrt_16u_C1RSfs > : : call ,
NppOneSource < CV_16S , nppiSqrt_16s_C1RSfs > : : call ,
0 ,
NppOneSource < CV_32F , nppiSqrt_32f_C1R > : : call
2011-10-10 16:19:11 +08:00
} ;
2012-02-22 18:00:53 +08:00
CV_Assert ( src . depth ( ) = = CV_8U | | src . depth ( ) = = CV_16U | | src . depth ( ) = = CV_16S | | src . depth ( ) = = CV_32F ) ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
dst . create ( src . size ( ) , src . type ( ) ) ;
2011-05-31 16:31:10 +08:00
2012-02-22 18:00:53 +08:00
funcs [ src . depth ( ) ] ( src , dst , StreamAccessor : : getStream ( stream ) ) ;
}
////////////////////////////////////////////////////////////////////////
// log
void cv : : gpu : : log ( const GpuMat & src , GpuMat & dst , Stream & stream )
{
typedef void ( * func_t ) ( const GpuMat & src , GpuMat & dst , cudaStream_t stream ) ;
2011-05-31 16:31:10 +08:00
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2011-10-10 16:19:11 +08:00
{
2012-02-22 18:00:53 +08:00
NppOneSource < CV_8U , nppiLn_8u_C1RSfs > : : call ,
0 ,
NppOneSource < CV_16U , nppiLn_16u_C1RSfs > : : call ,
NppOneSource < CV_16S , nppiLn_16s_C1RSfs > : : call ,
0 ,
NppOneSource < CV_32F , nppiLn_32f_C1R > : : call
} ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
CV_Assert ( src . depth ( ) = = CV_8U | | src . depth ( ) = = CV_16U | | src . depth ( ) = = CV_16S | | src . depth ( ) = = CV_32F ) ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
dst . create ( src . size ( ) , src . type ( ) ) ;
2010-12-20 17:07:19 +08:00
2012-02-22 18:00:53 +08:00
funcs [ src . depth ( ) ] ( src , dst , StreamAccessor : : getStream ( stream ) ) ;
}
2011-01-24 18:32:57 +08:00
2012-02-22 18:00:53 +08:00
////////////////////////////////////////////////////////////////////////
// exp
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
void cv : : gpu : : exp ( const GpuMat & src , GpuMat & dst , Stream & stream )
{
typedef void ( * func_t ) ( const GpuMat & src , GpuMat & dst , cudaStream_t stream ) ;
2011-10-10 16:19:11 +08:00
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2012-02-22 18:00:53 +08:00
{
NppOneSource < CV_8U , nppiExp_8u_C1RSfs > : : call ,
0 ,
NppOneSource < CV_16U , nppiExp_16u_C1RSfs > : : call ,
NppOneSource < CV_16S , nppiExp_16s_C1RSfs > : : call ,
0 ,
NppOneSource < CV_32F , nppiExp_32f_C1R > : : call
} ;
2011-10-10 16:19:11 +08:00
2012-02-22 18:00:53 +08:00
CV_Assert ( src . depth ( ) = = CV_8U | | src . depth ( ) = = CV_16U | | src . depth ( ) = = CV_16S | | src . depth ( ) = = CV_32F ) ;
2010-12-20 17:07:19 +08:00
2012-02-22 18:00:53 +08:00
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] ( src , dst , StreamAccessor : : getStream ( stream ) ) ;
}
2010-12-20 17:07:19 +08:00
//////////////////////////////////////////////////////////////////////////////
// Comparison of two matrixes
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
2012-08-23 21:45:50 +08:00
template < typename T > void compare_eq ( PtrStepSzb src1 , PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_ne ( PtrStepSzb src1 , PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_lt ( PtrStepSzb src1 , PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_le ( PtrStepSzb src1 , PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_eq ( PtrStepSzb src , int cn , double val [ 4 ] , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_ne ( PtrStepSzb src , int cn , double val [ 4 ] , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_lt ( PtrStepSzb src , int cn , double val [ 4 ] , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_le ( PtrStepSzb src , int cn , double val [ 4 ] , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_gt ( PtrStepSzb src , int cn , double val [ 4 ] , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void compare_ge ( PtrStepSzb src , int cn , double val [ 4 ] , PtrStepSzb dst , cudaStream_t stream ) ;
2011-11-14 17:02:06 +08:00
} } }
2010-12-20 17:07:19 +08:00
2011-10-10 16:19:11 +08:00
void cv : : gpu : : compare ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , int cmpop , Stream & stream )
2010-12-20 17:07:19 +08:00
{
2012-03-26 22:33:43 +08:00
using namespace cv : : gpu : : device ;
2010-12-20 17:07:19 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( PtrStepSzb src1 , PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 7 ] [ 4 ] =
2010-12-20 17:07:19 +08:00
{
2012-03-26 22:33:43 +08:00
{ compare_eq < unsigned char > , compare_ne < unsigned char > , compare_lt < unsigned char > , compare_le < unsigned char > } ,
{ compare_eq < signed char > , compare_ne < signed char > , compare_lt < signed char > , compare_le < signed char > } ,
2011-10-10 16:19:11 +08:00
{ compare_eq < unsigned short > , compare_ne < unsigned short > , compare_lt < unsigned short > , compare_le < unsigned short > } ,
2012-03-26 22:33:43 +08:00
{ compare_eq < short > , compare_ne < short > , compare_lt < short > , compare_le < short > } ,
{ compare_eq < int > , compare_ne < int > , compare_lt < int > , compare_le < int > } ,
{ compare_eq < float > , compare_ne < float > , compare_lt < float > , compare_le < float > } ,
{ compare_eq < double > , compare_ne < double > , compare_lt < double > , compare_le < double > }
2011-10-10 16:19:11 +08:00
} ;
2011-05-31 16:31:10 +08:00
2012-03-26 22:33:43 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F ) ;
2011-10-10 16:19:11 +08:00
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
2012-03-20 20:03:34 +08:00
CV_Assert ( cmpop > = CMP_EQ & & cmpop < = CMP_NE ) ;
2011-01-24 18:32:57 +08:00
2012-03-26 22:33:43 +08:00
if ( src1 . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2012-03-20 20:03:34 +08:00
static const int codes [ ] =
{
0 , 2 , 3 , 2 , 3 , 1
} ;
const GpuMat * psrc1 [ ] =
{
& src1 , & src2 , & src2 , & src1 , & src1 , & src1
} ;
const GpuMat * psrc2 [ ] =
{
& src2 , & src1 , & src1 , & src2 , & src2 , & src2
2011-10-10 16:19:11 +08:00
} ;
2011-05-31 16:31:10 +08:00
2011-10-10 16:19:11 +08:00
dst . create ( src1 . size ( ) , CV_MAKE_TYPE ( CV_8U , src1 . channels ( ) ) ) ;
2011-01-24 18:32:57 +08:00
2012-03-20 20:03:34 +08:00
funcs [ src1 . depth ( ) ] [ codes [ cmpop ] ] ( psrc1 [ cmpop ] - > reshape ( 1 ) , psrc2 [ cmpop ] - > reshape ( 1 ) , dst . reshape ( 1 ) , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
}
2012-05-12 17:45:21 +08:00
namespace
{
template < typename T >
void castScalar ( Scalar & sc )
{
sc . val [ 0 ] = saturate_cast < T > ( sc . val [ 0 ] ) ;
sc . val [ 1 ] = saturate_cast < T > ( sc . val [ 1 ] ) ;
sc . val [ 2 ] = saturate_cast < T > ( sc . val [ 2 ] ) ;
sc . val [ 3 ] = saturate_cast < T > ( sc . val [ 3 ] ) ;
}
}
void cv : : gpu : : compare ( const GpuMat & src , Scalar sc , GpuMat & dst , int cmpop , Stream & stream )
{
using namespace cv : : gpu : : device ;
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( PtrStepSzb src , int cn , double val [ 4 ] , PtrStepSzb dst , cudaStream_t stream ) ;
2012-05-12 17:45:21 +08:00
static const func_t funcs [ 7 ] [ 6 ] =
{
{ compare_eq < unsigned char > , compare_gt < unsigned char > , compare_ge < unsigned char > , compare_lt < unsigned char > , compare_le < unsigned char > , compare_ne < unsigned char > } ,
{ compare_eq < signed char > , compare_gt < signed char > , compare_ge < signed char > , compare_lt < signed char > , compare_le < signed char > , compare_ne < signed char > } ,
{ compare_eq < unsigned short > , compare_gt < unsigned short > , compare_ge < unsigned short > , compare_lt < unsigned short > , compare_le < unsigned short > , compare_ne < unsigned short > } ,
{ compare_eq < short > , compare_gt < short > , compare_ge < short > , compare_lt < short > , compare_le < short > , compare_ne < short > } ,
{ compare_eq < int > , compare_gt < int > , compare_ge < int > , compare_lt < int > , compare_le < int > , compare_ne < int > } ,
{ compare_eq < float > , compare_gt < float > , compare_ge < float > , compare_lt < float > , compare_le < float > , compare_ne < float > } ,
{ compare_eq < double > , compare_gt < double > , compare_ge < double > , compare_lt < double > , compare_le < double > , compare_ne < double > }
} ;
typedef void ( * cast_func_t ) ( Scalar & sc ) ;
static const cast_func_t cast_func [ ] =
{
castScalar < unsigned char > , castScalar < signed char > , castScalar < unsigned short > , castScalar < short > , castScalar < int > , castScalar < float > , castScalar < double >
} ;
CV_Assert ( src . depth ( ) < = CV_64F ) ;
CV_Assert ( src . channels ( ) < = 4 ) ;
CV_Assert ( cmpop > = CMP_EQ & & cmpop < = CMP_NE ) ;
if ( src . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
dst . create ( src . size ( ) , CV_MAKE_TYPE ( CV_8U , src . channels ( ) ) ) ;
cast_func [ src . depth ( ) ] ( sc ) ;
funcs [ src . depth ( ) ] [ cmpop ] ( src , src . channels ( ) , sc . val , dst , StreamAccessor : : getStream ( stream ) ) ;
}
2010-12-20 17:07:19 +08:00
//////////////////////////////////////////////////////////////////////////////
// Unary bitwise logical operations
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
void bitwiseNotCaller ( int rows , int cols , size_t elem_size1 , int cn , const PtrStepb src , PtrStepb dst , cudaStream_t stream ) ;
2011-11-09 21:13:52 +08:00
2011-11-14 17:02:06 +08:00
template < typename T >
void bitwiseMaskNotCaller ( int rows , int cols , int cn , const PtrStepb src , const PtrStepb mask , PtrStepb dst , cudaStream_t stream ) ;
} } }
2010-12-20 17:07:19 +08:00
namespace
{
void bitwiseNotCaller ( const GpuMat & src , GpuMat & dst , cudaStream_t stream )
{
dst . create ( src . size ( ) , src . type ( ) ) ;
2012-03-26 22:33:43 +08:00
cv : : gpu : : device : : bitwiseNotCaller ( src . rows , src . cols , src . elemSize1 ( ) , dst . channels ( ) , src , dst , stream ) ;
2010-12-20 17:07:19 +08:00
}
void bitwiseNotCaller ( const GpuMat & src , GpuMat & dst , const GpuMat & mask , cudaStream_t stream )
{
2012-03-26 22:33:43 +08:00
using namespace cv : : gpu : : device ;
2010-12-20 17:07:19 +08:00
2012-03-26 22:33:43 +08:00
typedef void ( * func_t ) ( int , int , int , const PtrStepb , const PtrStepb , PtrStepb , cudaStream_t ) ;
static func_t funcs [ ] =
2011-11-09 21:13:52 +08:00
{
2012-03-19 22:18:12 +08:00
bitwiseMaskNotCaller < unsigned char > , bitwiseMaskNotCaller < unsigned char > ,
2011-11-09 21:13:52 +08:00
bitwiseMaskNotCaller < unsigned short > , bitwiseMaskNotCaller < unsigned short > ,
bitwiseMaskNotCaller < unsigned int > , bitwiseMaskNotCaller < unsigned int > ,
bitwiseMaskNotCaller < unsigned int >
} ;
2010-12-20 17:07:19 +08:00
2012-03-26 22:33:43 +08:00
CV_Assert ( src . depth ( ) < = CV_64F ) ;
2010-12-20 17:07:19 +08:00
CV_Assert ( mask . type ( ) = = CV_8U & & mask . size ( ) = = src . size ( ) ) ;
2012-03-26 22:33:43 +08:00
2010-12-20 17:07:19 +08:00
dst . create ( src . size ( ) , src . type ( ) ) ;
2012-03-26 22:33:43 +08:00
const func_t func = funcs [ src . depth ( ) ] ;
2010-12-20 17:07:19 +08:00
int cn = src . depth ( ) ! = CV_64F ? src . channels ( ) : src . channels ( ) * ( sizeof ( double ) / sizeof ( unsigned int ) ) ;
2012-03-26 22:33:43 +08:00
func ( src . rows , src . cols , cn , src , mask , dst , stream ) ;
}
2010-12-20 17:07:19 +08:00
}
2011-05-31 16:31:10 +08:00
void cv : : gpu : : bitwise_not ( const GpuMat & src , GpuMat & dst , const GpuMat & mask , Stream & stream )
2010-12-20 17:07:19 +08:00
{
if ( mask . empty ( ) )
2011-11-09 21:13:52 +08:00
bitwiseNotCaller ( src , dst , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
else
2011-11-09 21:13:52 +08:00
bitwiseNotCaller ( src , dst , mask , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
}
//////////////////////////////////////////////////////////////////////////////
// Binary bitwise logical operations
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
void bitwiseOrCaller ( int rows , int cols , size_t elem_size1 , int cn , const PtrStepb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream ) ;
2010-12-20 17:07:19 +08:00
2011-11-14 17:02:06 +08:00
template < typename T >
void bitwiseMaskOrCaller ( int rows , int cols , int cn , const PtrStepb src1 , const PtrStepb src2 , const PtrStepb mask , PtrStepb dst , cudaStream_t stream ) ;
2010-12-20 17:07:19 +08:00
2011-11-14 17:02:06 +08:00
void bitwiseAndCaller ( int rows , int cols , size_t elem_size1 , int cn , const PtrStepb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream ) ;
2010-12-20 17:07:19 +08:00
2011-11-14 17:02:06 +08:00
template < typename T >
void bitwiseMaskAndCaller ( int rows , int cols , int cn , const PtrStepb src1 , const PtrStepb src2 , const PtrStepb mask , PtrStepb dst , cudaStream_t stream ) ;
2011-11-09 21:13:52 +08:00
2011-11-14 17:02:06 +08:00
void bitwiseXorCaller ( int rows , int cols , size_t elem_size1 , int cn , const PtrStepb src1 , const PtrStepb src2 , PtrStepb dst , cudaStream_t stream ) ;
2010-12-20 17:07:19 +08:00
2011-11-14 17:02:06 +08:00
template < typename T >
void bitwiseMaskXorCaller ( int rows , int cols , int cn , const PtrStepb src1 , const PtrStepb src2 , const PtrStepb mask , PtrStepb dst , cudaStream_t stream ) ;
} } }
2010-12-20 17:07:19 +08:00
namespace
{
void bitwiseOrCaller ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , cudaStream_t stream )
{
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
2010-12-20 17:07:19 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
cv : : gpu : : device : : bitwiseOrCaller ( dst . rows , dst . cols , dst . elemSize1 ( ) , dst . channels ( ) , src1 , src2 , dst , stream ) ;
2010-12-20 17:07:19 +08:00
}
void bitwiseOrCaller ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , const GpuMat & mask , cudaStream_t stream )
{
2012-03-26 22:33:43 +08:00
using namespace cv : : gpu : : device ;
2011-11-09 21:13:52 +08:00
2012-03-26 22:33:43 +08:00
typedef void ( * func_t ) ( int , int , int , const PtrStepb , const PtrStepb , const PtrStepb , PtrStepb , cudaStream_t ) ;
static func_t funcs [ ] =
2011-11-09 21:13:52 +08:00
{
2012-03-19 22:18:12 +08:00
bitwiseMaskOrCaller < unsigned char > , bitwiseMaskOrCaller < unsigned char > ,
2011-11-09 21:13:52 +08:00
bitwiseMaskOrCaller < unsigned short > , bitwiseMaskOrCaller < unsigned short > ,
bitwiseMaskOrCaller < unsigned int > , bitwiseMaskOrCaller < unsigned int > ,
bitwiseMaskOrCaller < unsigned int >
} ;
2010-12-20 17:07:19 +08:00
2012-03-26 22:33:43 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F ) ;
2010-12-20 17:07:19 +08:00
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
CV_Assert ( mask . type ( ) = = CV_8U & & mask . size ( ) = = src1 . size ( ) ) ;
2010-12-20 17:07:19 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
const func_t func = funcs [ src1 . depth ( ) ] ;
2010-12-20 17:07:19 +08:00
int cn = dst . depth ( ) ! = CV_64F ? dst . channels ( ) : dst . channels ( ) * ( sizeof ( double ) / sizeof ( unsigned int ) ) ;
2012-03-26 22:33:43 +08:00
func ( dst . rows , dst . cols , cn , src1 , src2 , mask , dst , stream ) ;
2010-12-20 17:07:19 +08:00
}
void bitwiseAndCaller ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , cudaStream_t stream )
{
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
2010-12-20 17:07:19 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
cv : : gpu : : device : : bitwiseAndCaller ( dst . rows , dst . cols , dst . elemSize1 ( ) , dst . channels ( ) , src1 , src2 , dst , stream ) ;
2010-12-20 17:07:19 +08:00
}
void bitwiseAndCaller ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , const GpuMat & mask , cudaStream_t stream )
{
2012-03-26 22:33:43 +08:00
using namespace cv : : gpu : : device ;
2010-12-20 17:07:19 +08:00
2012-03-26 22:33:43 +08:00
typedef void ( * func_t ) ( int , int , int , const PtrStepb , const PtrStepb , const PtrStepb , PtrStepb , cudaStream_t ) ;
static func_t funcs [ ] =
2011-11-09 21:13:52 +08:00
{
2012-03-19 22:18:12 +08:00
bitwiseMaskAndCaller < unsigned char > , bitwiseMaskAndCaller < unsigned char > ,
2011-11-09 21:13:52 +08:00
bitwiseMaskAndCaller < unsigned short > , bitwiseMaskAndCaller < unsigned short > ,
bitwiseMaskAndCaller < unsigned int > , bitwiseMaskAndCaller < unsigned int > ,
bitwiseMaskAndCaller < unsigned int >
} ;
2010-12-20 17:07:19 +08:00
2012-03-26 22:33:43 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F ) ;
2010-12-20 17:07:19 +08:00
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
CV_Assert ( mask . type ( ) = = CV_8U & & mask . size ( ) = = src1 . size ( ) ) ;
2010-12-20 17:07:19 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
const func_t func = funcs [ src1 . depth ( ) ] ;
2010-12-20 17:07:19 +08:00
int cn = dst . depth ( ) ! = CV_64F ? dst . channels ( ) : dst . channels ( ) * ( sizeof ( double ) / sizeof ( unsigned int ) ) ;
2012-03-26 22:33:43 +08:00
func ( dst . rows , dst . cols , cn , src1 , src2 , mask , dst , stream ) ;
2010-12-20 17:07:19 +08:00
}
void bitwiseXorCaller ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , cudaStream_t stream )
{
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
2010-12-20 17:07:19 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
cv : : gpu : : device : : bitwiseXorCaller ( dst . rows , dst . cols , dst . elemSize1 ( ) , dst . channels ( ) , src1 , src2 , dst , stream ) ;
2010-12-20 17:07:19 +08:00
}
void bitwiseXorCaller ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , const GpuMat & mask , cudaStream_t stream )
{
2012-03-26 22:33:43 +08:00
using namespace cv : : gpu : : device ;
2010-12-20 17:07:19 +08:00
2012-03-26 22:33:43 +08:00
typedef void ( * func_t ) ( int , int , int , const PtrStepb , const PtrStepb , const PtrStepb , PtrStepb , cudaStream_t ) ;
static func_t funcs [ ] =
2011-11-09 21:13:52 +08:00
{
2012-03-19 22:18:12 +08:00
bitwiseMaskXorCaller < unsigned char > , bitwiseMaskXorCaller < unsigned char > ,
2011-11-09 21:13:52 +08:00
bitwiseMaskXorCaller < unsigned short > , bitwiseMaskXorCaller < unsigned short > ,
bitwiseMaskXorCaller < unsigned int > , bitwiseMaskXorCaller < unsigned int > ,
bitwiseMaskXorCaller < unsigned int >
} ;
2010-12-20 17:07:19 +08:00
2012-03-26 22:33:43 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F ) ;
2010-12-20 17:07:19 +08:00
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
CV_Assert ( mask . type ( ) = = CV_8U & & mask . size ( ) = = src1 . size ( ) ) ;
2010-12-20 17:07:19 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
2012-03-26 22:33:43 +08:00
const func_t func = funcs [ src1 . depth ( ) ] ;
2010-12-20 17:07:19 +08:00
int cn = dst . depth ( ) ! = CV_64F ? dst . channels ( ) : dst . channels ( ) * ( sizeof ( double ) / sizeof ( unsigned int ) ) ;
2012-03-26 22:33:43 +08:00
func ( dst . rows , dst . cols , cn , src1 , src2 , mask , dst , stream ) ;
2010-12-20 17:07:19 +08:00
}
}
2011-05-31 16:31:10 +08:00
void cv : : gpu : : bitwise_or ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , const GpuMat & mask , Stream & stream )
2010-12-20 17:07:19 +08:00
{
if ( mask . empty ( ) )
2011-11-09 21:13:52 +08:00
bitwiseOrCaller ( src1 , src2 , dst , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
else
2011-11-09 21:13:52 +08:00
bitwiseOrCaller ( src1 , src2 , dst , mask , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
}
2011-05-31 16:31:10 +08:00
void cv : : gpu : : bitwise_and ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , const GpuMat & mask , Stream & stream )
2010-12-20 17:07:19 +08:00
{
if ( mask . empty ( ) )
2011-11-09 21:13:52 +08:00
bitwiseAndCaller ( src1 , src2 , dst , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
else
2011-11-09 21:13:52 +08:00
bitwiseAndCaller ( src1 , src2 , dst , mask , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
}
2011-05-31 16:31:10 +08:00
void cv : : gpu : : bitwise_xor ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , const GpuMat & mask , Stream & stream )
2010-12-20 17:07:19 +08:00
{
if ( mask . empty ( ) )
2011-11-09 21:13:52 +08:00
bitwiseXorCaller ( src1 , src2 , dst , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
else
2011-11-09 21:13:52 +08:00
bitwiseXorCaller ( src1 , src2 , dst , mask , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:07:19 +08:00
}
2012-02-22 18:00:53 +08:00
namespace
{
template < int DEPTH , int cn > struct NppBitwiseCFunc
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
typedef NppStatus ( * func_t ) ( const npp_t * pSrc1 , int nSrc1Step , const npp_t * pConstants , npp_t * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
template < int DEPTH > struct NppBitwiseCFunc < DEPTH , 1 >
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
typedef NppStatus ( * func_t ) ( const npp_t * pSrc1 , int nSrc1Step , const npp_t pConstant , npp_t * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
template < int DEPTH , int cn , typename NppBitwiseCFunc < DEPTH , cn > : : func_t func > struct NppBitwiseC
{
typedef typename NppBitwiseCFunc < DEPTH , cn > : : npp_t npp_t ;
static void call ( const GpuMat & src , Scalar sc , GpuMat & dst , cudaStream_t stream )
{
NppStreamHandler h ( stream ) ;
NppiSize oSizeROI ;
oSizeROI . width = src . cols ;
oSizeROI . height = src . rows ;
const npp_t pConstants [ ] = { static_cast < npp_t > ( sc . val [ 0 ] ) , static_cast < npp_t > ( sc . val [ 1 ] ) , static_cast < npp_t > ( sc . val [ 2 ] ) , static_cast < npp_t > ( sc . val [ 3 ] ) } ;
2012-03-19 22:18:12 +08:00
nppSafeCall ( func ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , pConstants , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
template < int DEPTH , typename NppBitwiseCFunc < DEPTH , 1 > : : func_t func > struct NppBitwiseC < DEPTH , 1 , func >
{
typedef typename NppBitwiseCFunc < DEPTH , 1 > : : npp_t npp_t ;
static void call ( const GpuMat & src , Scalar sc , GpuMat & dst , cudaStream_t stream )
{
NppStreamHandler h ( stream ) ;
NppiSize oSizeROI ;
oSizeROI . width = src . cols ;
oSizeROI . height = src . rows ;
2012-03-19 22:18:12 +08:00
nppSafeCall ( func ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , static_cast < npp_t > ( sc . val [ 0 ] ) , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
}
void cv : : gpu : : bitwise_or ( const GpuMat & src , const Scalar & sc , GpuMat & dst , Stream & stream )
{
typedef void ( * func_t ) ( const GpuMat & src , Scalar sc , GpuMat & dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 5 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
2012-03-26 22:33:43 +08:00
{ NppBitwiseC < CV_8U , 1 , nppiOrC_8u_C1R > : : call , 0 , NppBitwiseC < CV_8U , 3 , nppiOrC_8u_C3R > : : call , NppBitwiseC < CV_8U , 4 , nppiOrC_8u_C4R > : : call } ,
2012-02-22 18:00:53 +08:00
{ 0 , 0 , 0 , 0 } ,
{ NppBitwiseC < CV_16U , 1 , nppiOrC_16u_C1R > : : call , 0 , NppBitwiseC < CV_16U , 3 , nppiOrC_16u_C3R > : : call , NppBitwiseC < CV_16U , 4 , nppiOrC_16u_C4R > : : call } ,
{ 0 , 0 , 0 , 0 } ,
{ NppBitwiseC < CV_32S , 1 , nppiOrC_32s_C1R > : : call , 0 , NppBitwiseC < CV_32S , 3 , nppiOrC_32s_C3R > : : call , NppBitwiseC < CV_32S , 4 , nppiOrC_32s_C4R > : : call }
} ;
CV_Assert ( src . depth ( ) = = CV_8U | | src . depth ( ) = = CV_16U | | src . depth ( ) = = CV_32S ) ;
CV_Assert ( src . channels ( ) = = 1 | | src . channels ( ) = = 3 | | src . channels ( ) = = 4 ) ;
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ( src , sc , dst , StreamAccessor : : getStream ( stream ) ) ;
}
void cv : : gpu : : bitwise_and ( const GpuMat & src , const Scalar & sc , GpuMat & dst , Stream & stream )
{
typedef void ( * func_t ) ( const GpuMat & src , Scalar sc , GpuMat & dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 5 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
2012-03-26 22:33:43 +08:00
{ NppBitwiseC < CV_8U , 1 , nppiAndC_8u_C1R > : : call , 0 , NppBitwiseC < CV_8U , 3 , nppiAndC_8u_C3R > : : call , NppBitwiseC < CV_8U , 4 , nppiAndC_8u_C4R > : : call } ,
2012-02-22 18:00:53 +08:00
{ 0 , 0 , 0 , 0 } ,
{ NppBitwiseC < CV_16U , 1 , nppiAndC_16u_C1R > : : call , 0 , NppBitwiseC < CV_16U , 3 , nppiAndC_16u_C3R > : : call , NppBitwiseC < CV_16U , 4 , nppiAndC_16u_C4R > : : call } ,
{ 0 , 0 , 0 , 0 } ,
{ NppBitwiseC < CV_32S , 1 , nppiAndC_32s_C1R > : : call , 0 , NppBitwiseC < CV_32S , 3 , nppiAndC_32s_C3R > : : call , NppBitwiseC < CV_32S , 4 , nppiAndC_32s_C4R > : : call }
} ;
CV_Assert ( src . depth ( ) = = CV_8U | | src . depth ( ) = = CV_16U | | src . depth ( ) = = CV_32S ) ;
CV_Assert ( src . channels ( ) = = 1 | | src . channels ( ) = = 3 | | src . channels ( ) = = 4 ) ;
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ( src , sc , dst , StreamAccessor : : getStream ( stream ) ) ;
}
void cv : : gpu : : bitwise_xor ( const GpuMat & src , const Scalar & sc , GpuMat & dst , Stream & stream )
{
typedef void ( * func_t ) ( const GpuMat & src , Scalar sc , GpuMat & dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 5 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
2012-03-26 22:33:43 +08:00
{ NppBitwiseC < CV_8U , 1 , nppiXorC_8u_C1R > : : call , 0 , NppBitwiseC < CV_8U , 3 , nppiXorC_8u_C3R > : : call , NppBitwiseC < CV_8U , 4 , nppiXorC_8u_C4R > : : call } ,
2012-02-22 18:00:53 +08:00
{ 0 , 0 , 0 , 0 } ,
{ NppBitwiseC < CV_16U , 1 , nppiXorC_16u_C1R > : : call , 0 , NppBitwiseC < CV_16U , 3 , nppiXorC_16u_C3R > : : call , NppBitwiseC < CV_16U , 4 , nppiXorC_16u_C4R > : : call } ,
{ 0 , 0 , 0 , 0 } ,
{ NppBitwiseC < CV_32S , 1 , nppiXorC_32s_C1R > : : call , 0 , NppBitwiseC < CV_32S , 3 , nppiXorC_32s_C3R > : : call , NppBitwiseC < CV_32S , 4 , nppiXorC_32s_C4R > : : call }
} ;
CV_Assert ( src . depth ( ) = = CV_8U | | src . depth ( ) = = CV_16U | | src . depth ( ) = = CV_32S ) ;
CV_Assert ( src . channels ( ) = = 1 | | src . channels ( ) = = 3 | | src . channels ( ) = = 4 ) ;
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ( src , sc , dst , StreamAccessor : : getStream ( stream ) ) ;
}
//////////////////////////////////////////////////////////////////////////////
// shift
namespace
{
template < int DEPTH , int cn > struct NppShiftFunc
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
typedef NppStatus ( * func_t ) ( const npp_t * pSrc1 , int nSrc1Step , const Npp32u * pConstants , npp_t * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
template < int DEPTH > struct NppShiftFunc < DEPTH , 1 >
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
typedef NppStatus ( * func_t ) ( const npp_t * pSrc1 , int nSrc1Step , const Npp32u pConstants , npp_t * pDst , int nDstStep , NppiSize oSizeROI ) ;
} ;
template < int DEPTH , int cn , typename NppShiftFunc < DEPTH , cn > : : func_t func > struct NppShift
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
static void call ( const GpuMat & src , Scalar_ < Npp32u > sc , GpuMat & dst , cudaStream_t stream )
{
NppStreamHandler h ( stream ) ;
NppiSize oSizeROI ;
oSizeROI . width = src . cols ;
oSizeROI . height = src . rows ;
nppSafeCall ( func ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , sc . val , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
2012-03-19 22:18:12 +08:00
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
template < int DEPTH , typename NppShiftFunc < DEPTH , 1 > : : func_t func > struct NppShift < DEPTH , 1 , func >
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
static void call ( const GpuMat & src , Scalar_ < Npp32u > sc , GpuMat & dst , cudaStream_t stream )
{
NppStreamHandler h ( stream ) ;
NppiSize oSizeROI ;
oSizeROI . width = src . cols ;
oSizeROI . height = src . rows ;
nppSafeCall ( func ( src . ptr < npp_t > ( ) , static_cast < int > ( src . step ) , sc . val [ 0 ] , dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI ) ) ;
2012-03-19 22:18:12 +08:00
2012-02-22 18:00:53 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
}
2012-03-19 22:18:12 +08:00
void cv : : gpu : : rshift ( const GpuMat & src , Scalar_ < int > sc , GpuMat & dst , Stream & stream )
2012-02-22 18:00:53 +08:00
{
typedef void ( * func_t ) ( const GpuMat & src , Scalar_ < Npp32u > sc , GpuMat & dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 5 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
{ NppShift < CV_8U , 1 , nppiRShiftC_8u_C1R > : : call , 0 , NppShift < CV_8U , 3 , nppiRShiftC_8u_C3R > : : call , NppShift < CV_8U , 4 , nppiRShiftC_8u_C4R > : : call } ,
{ NppShift < CV_8S , 1 , nppiRShiftC_8s_C1R > : : call , 0 , NppShift < CV_8S , 3 , nppiRShiftC_8s_C3R > : : call , NppShift < CV_8S , 4 , nppiRShiftC_8s_C4R > : : call } ,
{ NppShift < CV_16U , 1 , nppiRShiftC_16u_C1R > : : call , 0 , NppShift < CV_16U , 3 , nppiRShiftC_16u_C3R > : : call , NppShift < CV_16U , 4 , nppiRShiftC_16u_C4R > : : call } ,
{ NppShift < CV_16S , 1 , nppiRShiftC_16s_C1R > : : call , 0 , NppShift < CV_16S , 3 , nppiRShiftC_16s_C3R > : : call , NppShift < CV_16S , 4 , nppiRShiftC_16s_C4R > : : call } ,
{ NppShift < CV_32S , 1 , nppiRShiftC_32s_C1R > : : call , 0 , NppShift < CV_32S , 3 , nppiRShiftC_32s_C3R > : : call , NppShift < CV_32S , 4 , nppiRShiftC_32s_C4R > : : call } ,
} ;
CV_Assert ( src . depth ( ) < CV_32F ) ;
CV_Assert ( src . channels ( ) = = 1 | | src . channels ( ) = = 3 | | src . channels ( ) = = 4 ) ;
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ( src , sc , dst , StreamAccessor : : getStream ( stream ) ) ;
}
2012-03-19 22:18:12 +08:00
void cv : : gpu : : lshift ( const GpuMat & src , Scalar_ < int > sc , GpuMat & dst , Stream & stream )
2012-02-22 18:00:53 +08:00
{
typedef void ( * func_t ) ( const GpuMat & src , Scalar_ < Npp32u > sc , GpuMat & dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ 5 ] [ 4 ] =
2012-02-22 18:00:53 +08:00
{
{ NppShift < CV_8U , 1 , nppiLShiftC_8u_C1R > : : call , 0 , NppShift < CV_8U , 3 , nppiLShiftC_8u_C3R > : : call , NppShift < CV_8U , 4 , nppiLShiftC_8u_C4R > : : call } ,
{ 0 , 0 , 0 , 0 } ,
{ NppShift < CV_16U , 1 , nppiLShiftC_16u_C1R > : : call , 0 , NppShift < CV_16U , 3 , nppiLShiftC_16u_C3R > : : call , NppShift < CV_16U , 4 , nppiLShiftC_16u_C4R > : : call } ,
{ 0 , 0 , 0 , 0 } ,
{ NppShift < CV_32S , 1 , nppiLShiftC_32s_C1R > : : call , 0 , NppShift < CV_32S , 3 , nppiLShiftC_32s_C3R > : : call , NppShift < CV_32S , 4 , nppiLShiftC_32s_C4R > : : call } ,
} ;
CV_Assert ( src . depth ( ) = = CV_8U | | src . depth ( ) = = CV_16U | | src . depth ( ) = = CV_32S ) ;
CV_Assert ( src . channels ( ) = = 1 | | src . channels ( ) = = 3 | | src . channels ( ) = = 4 ) ;
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] [ src . channels ( ) - 1 ] ( src , sc , dst , StreamAccessor : : getStream ( stream ) ) ;
}
2010-12-20 17:51:25 +08:00
//////////////////////////////////////////////////////////////////////////////
// Minimum and maximum operations
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
2012-08-23 21:45:50 +08:00
template < typename T > void min_gpu ( const PtrStepSzb src1 , const PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void max_gpu ( const PtrStepSzb src1 , const PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
2011-11-09 21:13:52 +08:00
2012-08-23 21:45:50 +08:00
template < typename T > void min_gpu ( const PtrStepSzb src , T val , PtrStepSzb dst , cudaStream_t stream ) ;
template < typename T > void max_gpu ( const PtrStepSzb src , T val , PtrStepSzb dst , cudaStream_t stream ) ;
2011-11-14 17:02:06 +08:00
} } }
2010-12-20 17:51:25 +08:00
2012-03-26 22:33:43 +08:00
void cv : : gpu : : min ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , Stream & stream )
2010-12-20 17:51:25 +08:00
{
2012-03-26 22:33:43 +08:00
using namespace cv : : gpu : : device ;
2010-12-20 17:51:25 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb src1 , const PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
2012-03-26 22:33:43 +08:00
static const func_t funcs [ ] =
2010-12-20 17:51:25 +08:00
{
2012-03-26 22:33:43 +08:00
min_gpu < unsigned char > ,
min_gpu < signed char > ,
min_gpu < unsigned short > ,
min_gpu < short > ,
min_gpu < int > ,
min_gpu < float > ,
min_gpu < double >
} ;
2012-03-19 22:18:12 +08:00
2012-03-26 22:33:43 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F ) ;
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
2010-12-20 17:51:25 +08:00
2012-03-26 22:33:43 +08:00
if ( src1 . depth ( ) = = CV_64F )
2010-12-20 17:51:25 +08:00
{
2012-03-26 22:33:43 +08:00
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
2010-12-20 17:51:25 +08:00
}
2012-03-26 22:33:43 +08:00
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
funcs [ src1 . depth ( ) ] ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:51:25 +08:00
}
2012-03-26 22:33:43 +08:00
void cv : : gpu : : max ( const GpuMat & src1 , const GpuMat & src2 , GpuMat & dst , Stream & stream )
2012-03-19 22:18:12 +08:00
{
2012-03-26 22:33:43 +08:00
using namespace cv : : gpu : : device ;
2011-02-16 16:31:45 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb src1 , const PtrStepSzb src2 , PtrStepSzb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2010-12-20 17:51:25 +08:00
{
2012-03-26 22:33:43 +08:00
max_gpu < unsigned char > ,
max_gpu < signed char > ,
max_gpu < unsigned short > ,
max_gpu < short > ,
max_gpu < int > ,
max_gpu < float > ,
max_gpu < double >
2010-12-20 17:51:25 +08:00
} ;
2012-03-26 22:33:43 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F ) ;
CV_Assert ( src1 . size ( ) = = src2 . size ( ) & & src1 . type ( ) = = src2 . type ( ) ) ;
if ( src1 . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
dst . create ( src1 . size ( ) , src1 . type ( ) ) ;
funcs [ src1 . depth ( ) ] ( src1 . reshape ( 1 ) , src2 . reshape ( 1 ) , dst . reshape ( 1 ) , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:51:25 +08:00
}
2012-03-26 22:33:43 +08:00
namespace
2010-12-20 17:51:25 +08:00
{
2012-08-23 21:45:50 +08:00
template < typename T > void minScalar ( const PtrStepSzb src , double val , PtrStepSzb dst , cudaStream_t stream )
2012-03-26 22:33:43 +08:00
{
cv : : gpu : : device : : min_gpu ( src , saturate_cast < T > ( val ) , dst , stream ) ;
}
2011-02-16 16:31:45 +08:00
2012-08-23 21:45:50 +08:00
template < typename T > void maxScalar ( const PtrStepSzb src , double val , PtrStepSzb dst , cudaStream_t stream )
2010-12-20 17:51:25 +08:00
{
2012-03-26 22:33:43 +08:00
cv : : gpu : : device : : max_gpu ( src , saturate_cast < T > ( val ) , dst , stream ) ;
}
2010-12-20 17:51:25 +08:00
}
2012-03-26 22:33:43 +08:00
void cv : : gpu : : min ( const GpuMat & src , double val , GpuMat & dst , Stream & stream )
2012-03-19 22:18:12 +08:00
{
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb src1 , double src2 , PtrStepSzb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2010-12-20 17:51:25 +08:00
{
2012-03-26 22:33:43 +08:00
minScalar < unsigned char > ,
minScalar < signed char > ,
minScalar < unsigned short > ,
minScalar < short > ,
minScalar < int > ,
minScalar < float > ,
minScalar < double >
2010-12-20 17:51:25 +08:00
} ;
2012-03-26 22:33:43 +08:00
CV_Assert ( src . depth ( ) < = CV_64F ) ;
CV_Assert ( src . channels ( ) = = 1 ) ;
if ( src . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] ( src , val , dst , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:51:25 +08:00
}
2012-03-26 22:33:43 +08:00
void cv : : gpu : : max ( const GpuMat & src , double val , GpuMat & dst , Stream & stream )
2010-12-20 17:51:25 +08:00
{
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb src1 , double src2 , PtrStepSzb dst , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2010-12-20 17:51:25 +08:00
{
2012-03-26 22:33:43 +08:00
maxScalar < unsigned char > ,
maxScalar < signed char > ,
maxScalar < unsigned short > ,
maxScalar < short > ,
maxScalar < int > ,
maxScalar < float > ,
maxScalar < double >
2010-12-20 17:51:25 +08:00
} ;
2012-03-26 22:33:43 +08:00
CV_Assert ( src . depth ( ) < = CV_64F ) ;
CV_Assert ( src . channels ( ) = = 1 ) ;
if ( src . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] ( src , val , dst , StreamAccessor : : getStream ( stream ) ) ;
2010-12-20 17:51:25 +08:00
}
2011-01-24 18:11:02 +08:00
////////////////////////////////////////////////////////////////////////
// threshold
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
template < typename T >
2012-08-23 21:45:50 +08:00
void threshold_gpu ( const PtrStepSzb & src , const PtrStepSzb & dst , T thresh , T maxVal , int type , cudaStream_t stream ) ;
2011-11-14 17:02:06 +08:00
} } }
2011-01-24 18:11:02 +08:00
namespace
{
2011-08-17 19:32:24 +08:00
template < typename T > void threshold_caller ( const GpuMat & src , GpuMat & dst , double thresh , double maxVal , int type , cudaStream_t stream )
2011-01-24 18:11:02 +08:00
{
2012-02-22 18:00:53 +08:00
cv : : gpu : : device : : threshold_gpu < T > ( src , dst , saturate_cast < T > ( thresh ) , saturate_cast < T > ( maxVal ) , type , stream ) ;
2011-02-14 23:50:17 +08:00
}
}
2011-01-24 18:11:02 +08:00
2011-05-31 16:31:10 +08:00
double cv : : gpu : : threshold ( const GpuMat & src , GpuMat & dst , double thresh , double maxVal , int type , Stream & s )
2011-02-14 23:50:17 +08:00
{
2011-08-17 19:32:24 +08:00
CV_Assert ( src . channels ( ) = = 1 & & src . depth ( ) < = CV_64F ) ;
CV_Assert ( type < = THRESH_TOZERO_INV ) ;
2012-03-26 22:33:43 +08:00
if ( src . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2011-08-17 19:32:24 +08:00
dst . create ( src . size ( ) , src . type ( ) ) ;
2011-05-31 16:31:10 +08:00
cudaStream_t stream = StreamAccessor : : getStream ( s ) ;
2011-02-14 23:50:17 +08:00
if ( src . type ( ) = = CV_32FC1 & & type = = THRESH_TRUNC )
{
2011-05-31 16:31:10 +08:00
NppStreamHandler h ( stream ) ;
2011-02-14 23:50:17 +08:00
NppiSize sz ;
sz . width = src . cols ;
sz . height = src . rows ;
2011-08-08 19:28:14 +08:00
nppSafeCall ( nppiThreshold_32f_C1R ( src . ptr < Npp32f > ( ) , static_cast < int > ( src . step ) ,
dst . ptr < Npp32f > ( ) , static_cast < int > ( dst . step ) , sz , static_cast < Npp32f > ( thresh ) , NPP_CMP_GREATER ) ) ;
2011-02-14 23:50:17 +08:00
2011-05-31 16:31:10 +08:00
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
2011-02-14 23:50:17 +08:00
}
else
{
2012-03-26 22:33:43 +08:00
typedef void ( * func_t ) ( const GpuMat & src , GpuMat & dst , double thresh , double maxVal , int type , cudaStream_t stream ) ;
static const func_t funcs [ ] =
2011-01-24 18:11:02 +08:00
{
2012-03-19 22:18:12 +08:00
threshold_caller < unsigned char > , threshold_caller < signed char > ,
threshold_caller < unsigned short > , threshold_caller < short > ,
2011-02-14 23:50:17 +08:00
threshold_caller < int > , threshold_caller < float > , threshold_caller < double >
2011-01-24 18:11:02 +08:00
} ;
2011-08-17 19:32:24 +08:00
if ( src . depth ( ) ! = CV_32F & & src . depth ( ) ! = CV_64F )
2011-01-24 18:11:02 +08:00
{
thresh = cvFloor ( thresh ) ;
maxVal = cvRound ( maxVal ) ;
}
2012-03-26 22:33:43 +08:00
funcs [ src . depth ( ) ] ( src , dst , thresh , maxVal , type , stream ) ;
2011-01-24 18:11:02 +08:00
}
return thresh ;
}
2011-07-21 16:47:44 +08:00
////////////////////////////////////////////////////////////////////////
// pow
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
2012-08-23 21:45:50 +08:00
template < typename T > void pow_caller ( PtrStepSzb src , double power , PtrStepSzb dst , cudaStream_t stream ) ;
2011-11-14 17:02:06 +08:00
} } }
2011-07-21 16:47:44 +08:00
void cv : : gpu : : pow ( const GpuMat & src , double power , GpuMat & dst , Stream & stream )
2011-11-09 21:13:52 +08:00
{
2012-03-20 20:03:34 +08:00
using namespace cv : : gpu : : device ;
2011-07-21 16:47:44 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( PtrStepSzb src , double power , PtrStepSzb dst , cudaStream_t stream ) ;
2012-03-20 20:03:34 +08:00
static const func_t funcs [ ] =
2011-07-21 16:47:44 +08:00
{
2012-03-19 22:18:12 +08:00
pow_caller < unsigned char > , pow_caller < signed char > ,
pow_caller < unsigned short > , pow_caller < short > ,
2012-03-20 20:03:34 +08:00
pow_caller < int > , pow_caller < float > , pow_caller < double >
2011-07-21 16:47:44 +08:00
} ;
2012-03-26 22:33:43 +08:00
CV_Assert ( src . depth ( ) < = CV_64F ) ;
if ( src . depth ( ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2012-03-20 20:03:34 +08:00
dst . create ( src . size ( ) , src . type ( ) ) ;
funcs [ src . depth ( ) ] ( src . reshape ( 1 ) , power , dst . reshape ( 1 ) , StreamAccessor : : getStream ( stream ) ) ;
2012-02-22 18:00:53 +08:00
}
////////////////////////////////////////////////////////////////////////
// alphaComp
namespace
{
template < int DEPTH > struct NppAlphaCompFunc
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
typedef NppStatus ( * func_t ) ( const npp_t * pSrc1 , int nSrc1Step , const npp_t * pSrc2 , int nSrc2Step , npp_t * pDst , int nDstStep , NppiSize oSizeROI , NppiAlphaOp eAlphaOp ) ;
} ;
template < int DEPTH , typename NppAlphaCompFunc < DEPTH > : : func_t func > struct NppAlphaComp
{
typedef typename NppTypeTraits < DEPTH > : : npp_t npp_t ;
static void call ( const GpuMat & img1 , const GpuMat & img2 , GpuMat & dst , NppiAlphaOp eAlphaOp , cudaStream_t stream )
{
NppStreamHandler h ( stream ) ;
NppiSize oSizeROI ;
oSizeROI . width = img1 . cols ;
oSizeROI . height = img2 . rows ;
2012-03-19 22:18:12 +08:00
nppSafeCall ( func ( img1 . ptr < npp_t > ( ) , static_cast < int > ( img1 . step ) , img2 . ptr < npp_t > ( ) , static_cast < int > ( img2 . step ) ,
2012-02-22 18:00:53 +08:00
dst . ptr < npp_t > ( ) , static_cast < int > ( dst . step ) , oSizeROI , eAlphaOp ) ) ;
if ( stream = = 0 )
cudaSafeCall ( cudaDeviceSynchronize ( ) ) ;
}
} ;
}
void cv : : gpu : : alphaComp ( const GpuMat & img1 , const GpuMat & img2 , GpuMat & dst , int alpha_op , Stream & stream )
{
static const NppiAlphaOp npp_alpha_ops [ ] = {
NPPI_OP_ALPHA_OVER ,
NPPI_OP_ALPHA_IN ,
NPPI_OP_ALPHA_OUT ,
NPPI_OP_ALPHA_ATOP ,
NPPI_OP_ALPHA_XOR ,
NPPI_OP_ALPHA_PLUS ,
NPPI_OP_ALPHA_OVER_PREMUL ,
NPPI_OP_ALPHA_IN_PREMUL ,
NPPI_OP_ALPHA_OUT_PREMUL ,
NPPI_OP_ALPHA_ATOP_PREMUL ,
NPPI_OP_ALPHA_XOR_PREMUL ,
NPPI_OP_ALPHA_PLUS_PREMUL ,
NPPI_OP_ALPHA_PREMUL
} ;
typedef void ( * func_t ) ( const GpuMat & img1 , const GpuMat & img2 , GpuMat & dst , NppiAlphaOp eAlphaOp , cudaStream_t stream ) ;
2012-03-19 22:18:12 +08:00
static const func_t funcs [ ] =
2012-02-22 18:00:53 +08:00
{
NppAlphaComp < CV_8U , nppiAlphaComp_8u_AC4R > : : call ,
0 ,
NppAlphaComp < CV_16U , nppiAlphaComp_16u_AC4R > : : call ,
0 ,
NppAlphaComp < CV_32S , nppiAlphaComp_32s_AC4R > : : call ,
2012-03-26 22:33:43 +08:00
NppAlphaComp < CV_32F , nppiAlphaComp_32f_AC4R > : : call
2012-02-22 18:00:53 +08:00
} ;
CV_Assert ( img1 . type ( ) = = CV_8UC4 | | img1 . type ( ) = = CV_16UC4 | | img1 . type ( ) = = CV_32SC4 | | img1 . type ( ) = = CV_32FC4 ) ;
CV_Assert ( img1 . size ( ) = = img2 . size ( ) & & img1 . type ( ) = = img2 . type ( ) ) ;
dst . create ( img1 . size ( ) , img1 . type ( ) ) ;
const func_t func = funcs [ img1 . depth ( ) ] ;
func ( img1 , img2 , dst , npp_alpha_ops [ alpha_op ] , StreamAccessor : : getStream ( stream ) ) ;
2011-07-21 16:47:44 +08:00
}
2011-09-21 16:58:54 +08:00
////////////////////////////////////////////////////////////////////////
// addWeighted
2012-03-19 22:18:12 +08:00
namespace cv { namespace gpu { namespace device
2011-11-14 17:02:06 +08:00
{
template < typename T1 , typename T2 , typename D >
2012-08-23 21:45:50 +08:00
void addWeighted_gpu ( const PtrStepSzb & src1 , double alpha , const PtrStepSzb & src2 , double beta , double gamma , const PtrStepSzb & dst , cudaStream_t stream ) ;
2011-11-14 17:02:06 +08:00
} } }
2011-09-21 16:58:54 +08:00
void cv : : gpu : : addWeighted ( const GpuMat & src1 , double alpha , const GpuMat & src2 , double beta , double gamma , GpuMat & dst , int dtype , Stream & stream )
{
2012-03-20 20:03:34 +08:00
using namespace cv : : gpu : : device ;
2011-09-21 16:58:54 +08:00
2012-08-23 21:45:50 +08:00
typedef void ( * func_t ) ( const PtrStepSzb & src1 , double alpha , const PtrStepSzb & src2 , double beta , double gamma , const PtrStepSzb & dst , cudaStream_t stream ) ;
2011-09-21 16:58:54 +08:00
2012-03-20 20:03:34 +08:00
static const func_t funcs [ 7 ] [ 7 ] [ 7 ] =
2011-09-21 16:58:54 +08:00
{
{
{
addWeighted_gpu < unsigned char , unsigned char , unsigned char > ,
addWeighted_gpu < unsigned char , unsigned char , signed char > ,
addWeighted_gpu < unsigned char , unsigned char , unsigned short > ,
addWeighted_gpu < unsigned char , unsigned char , short > ,
addWeighted_gpu < unsigned char , unsigned char , int > ,
addWeighted_gpu < unsigned char , unsigned char , float > ,
addWeighted_gpu < unsigned char , unsigned char , double >
} ,
{
addWeighted_gpu < unsigned char , signed char , unsigned char > ,
addWeighted_gpu < unsigned char , signed char , signed char > ,
addWeighted_gpu < unsigned char , signed char , unsigned short > ,
addWeighted_gpu < unsigned char , signed char , short > ,
addWeighted_gpu < unsigned char , signed char , int > ,
addWeighted_gpu < unsigned char , signed char , float > ,
addWeighted_gpu < unsigned char , signed char , double >
} ,
{
addWeighted_gpu < unsigned char , unsigned short , unsigned char > ,
addWeighted_gpu < unsigned char , unsigned short , signed char > ,
addWeighted_gpu < unsigned char , unsigned short , unsigned short > ,
addWeighted_gpu < unsigned char , unsigned short , short > ,
addWeighted_gpu < unsigned char , unsigned short , int > ,
addWeighted_gpu < unsigned char , unsigned short , float > ,
addWeighted_gpu < unsigned char , unsigned short , double >
} ,
{
addWeighted_gpu < unsigned char , short , unsigned char > ,
addWeighted_gpu < unsigned char , short , signed char > ,
addWeighted_gpu < unsigned char , short , unsigned short > ,
addWeighted_gpu < unsigned char , short , short > ,
addWeighted_gpu < unsigned char , short , int > ,
addWeighted_gpu < unsigned char , short , float > ,
addWeighted_gpu < unsigned char , short , double >
} ,
{
addWeighted_gpu < unsigned char , int , unsigned char > ,
addWeighted_gpu < unsigned char , int , signed char > ,
addWeighted_gpu < unsigned char , int , unsigned short > ,
addWeighted_gpu < unsigned char , int , short > ,
addWeighted_gpu < unsigned char , int , int > ,
addWeighted_gpu < unsigned char , int , float > ,
addWeighted_gpu < unsigned char , int , double >
} ,
{
addWeighted_gpu < unsigned char , float , unsigned char > ,
addWeighted_gpu < unsigned char , float , signed char > ,
addWeighted_gpu < unsigned char , float , unsigned short > ,
addWeighted_gpu < unsigned char , float , short > ,
addWeighted_gpu < unsigned char , float , int > ,
addWeighted_gpu < unsigned char , float , float > ,
addWeighted_gpu < unsigned char , float , double >
} ,
{
addWeighted_gpu < unsigned char , double , unsigned char > ,
addWeighted_gpu < unsigned char , double , signed char > ,
addWeighted_gpu < unsigned char , double , unsigned short > ,
addWeighted_gpu < unsigned char , double , short > ,
addWeighted_gpu < unsigned char , double , int > ,
addWeighted_gpu < unsigned char , double , float > ,
addWeighted_gpu < unsigned char , double , double >
}
} ,
{
{
0 /*addWeighted_gpu<signed char, unsigned char, unsigned char >*/ ,
0 /*addWeighted_gpu<signed char, unsigned char, signed char >*/ ,
0 /*addWeighted_gpu<signed char, unsigned char, unsigned short>*/ ,
0 /*addWeighted_gpu<signed char, unsigned char, short >*/ ,
0 /*addWeighted_gpu<signed char, unsigned char, int >*/ ,
0 /*addWeighted_gpu<signed char, unsigned char, float >*/ ,
0 /*addWeighted_gpu<signed char, unsigned char, double>*/
} ,
{
addWeighted_gpu < signed char , signed char , unsigned char > ,
addWeighted_gpu < signed char , signed char , signed char > ,
addWeighted_gpu < signed char , signed char , unsigned short > ,
addWeighted_gpu < signed char , signed char , short > ,
addWeighted_gpu < signed char , signed char , int > ,
addWeighted_gpu < signed char , signed char , float > ,
addWeighted_gpu < signed char , signed char , double >
} ,
{
addWeighted_gpu < signed char , unsigned short , unsigned char > ,
addWeighted_gpu < signed char , unsigned short , signed char > ,
addWeighted_gpu < signed char , unsigned short , unsigned short > ,
addWeighted_gpu < signed char , unsigned short , short > ,
addWeighted_gpu < signed char , unsigned short , int > ,
addWeighted_gpu < signed char , unsigned short , float > ,
addWeighted_gpu < signed char , unsigned short , double >
} ,
{
addWeighted_gpu < signed char , short , unsigned char > ,
addWeighted_gpu < signed char , short , signed char > ,
addWeighted_gpu < signed char , short , unsigned short > ,
addWeighted_gpu < signed char , short , short > ,
addWeighted_gpu < signed char , short , int > ,
addWeighted_gpu < signed char , short , float > ,
addWeighted_gpu < signed char , short , double >
} ,
{
addWeighted_gpu < signed char , int , unsigned char > ,
addWeighted_gpu < signed char , int , signed char > ,
addWeighted_gpu < signed char , int , unsigned short > ,
addWeighted_gpu < signed char , int , short > ,
addWeighted_gpu < signed char , int , int > ,
addWeighted_gpu < signed char , int , float > ,
addWeighted_gpu < signed char , int , double >
} ,
{
addWeighted_gpu < signed char , float , unsigned char > ,
addWeighted_gpu < signed char , float , signed char > ,
addWeighted_gpu < signed char , float , unsigned short > ,
addWeighted_gpu < signed char , float , short > ,
addWeighted_gpu < signed char , float , int > ,
addWeighted_gpu < signed char , float , float > ,
addWeighted_gpu < signed char , float , double >
} ,
{
addWeighted_gpu < signed char , double , unsigned char > ,
addWeighted_gpu < signed char , double , signed char > ,
addWeighted_gpu < signed char , double , unsigned short > ,
addWeighted_gpu < signed char , double , short > ,
addWeighted_gpu < signed char , double , int > ,
addWeighted_gpu < signed char , double , float > ,
addWeighted_gpu < signed char , double , double >
}
} ,
{
{
0 /*addWeighted_gpu<unsigned short, unsigned char, unsigned char >*/ ,
0 /*addWeighted_gpu<unsigned short, unsigned char, signed char >*/ ,
0 /*addWeighted_gpu<unsigned short, unsigned char, unsigned short>*/ ,
0 /*addWeighted_gpu<unsigned short, unsigned char, short >*/ ,
0 /*addWeighted_gpu<unsigned short, unsigned char, int >*/ ,
0 /*addWeighted_gpu<unsigned short, unsigned char, float >*/ ,
0 /*addWeighted_gpu<unsigned short, unsigned char, double>*/
} ,
{
0 /*addWeighted_gpu<unsigned short, signed char, unsigned char >*/ ,
0 /*addWeighted_gpu<unsigned short, signed char, signed char >*/ ,
0 /*addWeighted_gpu<unsigned short, signed char, unsigned short>*/ ,
0 /*addWeighted_gpu<unsigned short, signed char, short >*/ ,
0 /*addWeighted_gpu<unsigned short, signed char, int >*/ ,
0 /*addWeighted_gpu<unsigned short, signed char, float >*/ ,
0 /*addWeighted_gpu<unsigned short, signed char, double>*/
} ,
{
addWeighted_gpu < unsigned short , unsigned short , unsigned char > ,
addWeighted_gpu < unsigned short , unsigned short , signed char > ,
addWeighted_gpu < unsigned short , unsigned short , unsigned short > ,
addWeighted_gpu < unsigned short , unsigned short , short > ,
addWeighted_gpu < unsigned short , unsigned short , int > ,
addWeighted_gpu < unsigned short , unsigned short , float > ,
addWeighted_gpu < unsigned short , unsigned short , double >
} ,
{
addWeighted_gpu < unsigned short , short , unsigned char > ,
addWeighted_gpu < unsigned short , short , signed char > ,
addWeighted_gpu < unsigned short , short , unsigned short > ,
addWeighted_gpu < unsigned short , short , short > ,
addWeighted_gpu < unsigned short , short , int > ,
addWeighted_gpu < unsigned short , short , float > ,
addWeighted_gpu < unsigned short , short , double >
} ,
{
addWeighted_gpu < unsigned short , int , unsigned char > ,
addWeighted_gpu < unsigned short , int , signed char > ,
addWeighted_gpu < unsigned short , int , unsigned short > ,
addWeighted_gpu < unsigned short , int , short > ,
addWeighted_gpu < unsigned short , int , int > ,
addWeighted_gpu < unsigned short , int , float > ,
addWeighted_gpu < unsigned short , int , double >
} ,
{
addWeighted_gpu < unsigned short , float , unsigned char > ,
addWeighted_gpu < unsigned short , float , signed char > ,
addWeighted_gpu < unsigned short , float , unsigned short > ,
addWeighted_gpu < unsigned short , float , short > ,
addWeighted_gpu < unsigned short , float , int > ,
addWeighted_gpu < unsigned short , float , float > ,
addWeighted_gpu < unsigned short , float , double >
} ,
{
addWeighted_gpu < unsigned short , double , unsigned char > ,
addWeighted_gpu < unsigned short , double , signed char > ,
addWeighted_gpu < unsigned short , double , unsigned short > ,
addWeighted_gpu < unsigned short , double , short > ,
addWeighted_gpu < unsigned short , double , int > ,
addWeighted_gpu < unsigned short , double , float > ,
addWeighted_gpu < unsigned short , double , double >
}
} ,
{
{
0 /*addWeighted_gpu<short, unsigned char, unsigned char >*/ ,
0 /*addWeighted_gpu<short, unsigned char, signed char >*/ ,
0 /*addWeighted_gpu<short, unsigned char, unsigned short>*/ ,
0 /*addWeighted_gpu<short, unsigned char, short >*/ ,
0 /*addWeighted_gpu<short, unsigned char, int >*/ ,
0 /*addWeighted_gpu<short, unsigned char, float >*/ ,
0 /*addWeighted_gpu<short, unsigned char, double>*/
} ,
{
0 /*addWeighted_gpu<short, signed char, unsigned char >*/ ,
0 /*addWeighted_gpu<short, signed char, signed char >*/ ,
0 /*addWeighted_gpu<short, signed char, unsigned short>*/ ,
0 /*addWeighted_gpu<short, signed char, short >*/ ,
0 /*addWeighted_gpu<short, signed char, int >*/ ,
0 /*addWeighted_gpu<short, signed char, float >*/ ,
0 /*addWeighted_gpu<short, signed char, double>*/
} ,
{
0 /*addWeighted_gpu<short, unsigned short, unsigned char >*/ ,
0 /*addWeighted_gpu<short, unsigned short, signed char >*/ ,
0 /*addWeighted_gpu<short, unsigned short, unsigned short>*/ ,
0 /*addWeighted_gpu<short, unsigned short, short >*/ ,
0 /*addWeighted_gpu<short, unsigned short, int >*/ ,
0 /*addWeighted_gpu<short, unsigned short, float >*/ ,
0 /*addWeighted_gpu<short, unsigned short, double>*/
} ,
{
addWeighted_gpu < short , short , unsigned char > ,
addWeighted_gpu < short , short , signed char > ,
addWeighted_gpu < short , short , unsigned short > ,
addWeighted_gpu < short , short , short > ,
addWeighted_gpu < short , short , int > ,
addWeighted_gpu < short , short , float > ,
addWeighted_gpu < short , short , double >
} ,
{
addWeighted_gpu < short , int , unsigned char > ,
addWeighted_gpu < short , int , signed char > ,
addWeighted_gpu < short , int , unsigned short > ,
addWeighted_gpu < short , int , short > ,
addWeighted_gpu < short , int , int > ,
addWeighted_gpu < short , int , float > ,
addWeighted_gpu < short , int , double >
} ,
{
addWeighted_gpu < short , float , unsigned char > ,
addWeighted_gpu < short , float , signed char > ,
addWeighted_gpu < short , float , unsigned short > ,
addWeighted_gpu < short , float , short > ,
addWeighted_gpu < short , float , int > ,
addWeighted_gpu < short , float , float > ,
addWeighted_gpu < short , float , double >
} ,
{
addWeighted_gpu < short , double , unsigned char > ,
addWeighted_gpu < short , double , signed char > ,
addWeighted_gpu < short , double , unsigned short > ,
addWeighted_gpu < short , double , short > ,
addWeighted_gpu < short , double , int > ,
addWeighted_gpu < short , double , float > ,
addWeighted_gpu < short , double , double >
}
} ,
{
{
0 /*addWeighted_gpu<int, unsigned char, unsigned char >*/ ,
0 /*addWeighted_gpu<int, unsigned char, signed char >*/ ,
0 /*addWeighted_gpu<int, unsigned char, unsigned short>*/ ,
0 /*addWeighted_gpu<int, unsigned char, short >*/ ,
0 /*addWeighted_gpu<int, unsigned char, int >*/ ,
0 /*addWeighted_gpu<int, unsigned char, float >*/ ,
0 /*addWeighted_gpu<int, unsigned char, double>*/
} ,
{
0 /*addWeighted_gpu<int, signed char, unsigned char >*/ ,
0 /*addWeighted_gpu<int, signed char, signed char >*/ ,
0 /*addWeighted_gpu<int, signed char, unsigned short>*/ ,
0 /*addWeighted_gpu<int, signed char, short >*/ ,
0 /*addWeighted_gpu<int, signed char, int >*/ ,
0 /*addWeighted_gpu<int, signed char, float >*/ ,
0 /*addWeighted_gpu<int, signed char, double>*/
} ,
{
0 /*addWeighted_gpu<int, unsigned short, unsigned char >*/ ,
0 /*addWeighted_gpu<int, unsigned short, signed char >*/ ,
0 /*addWeighted_gpu<int, unsigned short, unsigned short>*/ ,
0 /*addWeighted_gpu<int, unsigned short, short >*/ ,
0 /*addWeighted_gpu<int, unsigned short, int >*/ ,
0 /*addWeighted_gpu<int, unsigned short, float >*/ ,
0 /*addWeighted_gpu<int, unsigned short, double>*/
} ,
{
0 /*addWeighted_gpu<int, short, unsigned char >*/ ,
0 /*addWeighted_gpu<int, short, signed char >*/ ,
0 /*addWeighted_gpu<int, short, unsigned short>*/ ,
0 /*addWeighted_gpu<int, short, short >*/ ,
0 /*addWeighted_gpu<int, short, int >*/ ,
0 /*addWeighted_gpu<int, short, float >*/ ,
0 /*addWeighted_gpu<int, short, double>*/
} ,
{
addWeighted_gpu < int , int , unsigned char > ,
addWeighted_gpu < int , int , signed char > ,
addWeighted_gpu < int , int , unsigned short > ,
addWeighted_gpu < int , int , short > ,
addWeighted_gpu < int , int , int > ,
addWeighted_gpu < int , int , float > ,
addWeighted_gpu < int , int , double >
} ,
{
addWeighted_gpu < int , float , unsigned char > ,
addWeighted_gpu < int , float , signed char > ,
addWeighted_gpu < int , float , unsigned short > ,
addWeighted_gpu < int , float , short > ,
addWeighted_gpu < int , float , int > ,
addWeighted_gpu < int , float , float > ,
addWeighted_gpu < int , float , double >
} ,
{
addWeighted_gpu < int , double , unsigned char > ,
addWeighted_gpu < int , double , signed char > ,
addWeighted_gpu < int , double , unsigned short > ,
addWeighted_gpu < int , double , short > ,
addWeighted_gpu < int , double , int > ,
addWeighted_gpu < int , double , float > ,
addWeighted_gpu < int , double , double >
}
} ,
{
{
0 /*addWeighted_gpu<float, unsigned char, unsigned char >*/ ,
0 /*addWeighted_gpu<float, unsigned char, signed char >*/ ,
0 /*addWeighted_gpu<float, unsigned char, unsigned short>*/ ,
0 /*addWeighted_gpu<float, unsigned char, short >*/ ,
0 /*addWeighted_gpu<float, unsigned char, int >*/ ,
0 /*addWeighted_gpu<float, unsigned char, float >*/ ,
0 /*addWeighted_gpu<float, unsigned char, double>*/
} ,
{
0 /*addWeighted_gpu<float, signed char, unsigned char >*/ ,
0 /*addWeighted_gpu<float, signed char, signed char >*/ ,
0 /*addWeighted_gpu<float, signed char, unsigned short>*/ ,
0 /*addWeighted_gpu<float, signed char, short >*/ ,
0 /*addWeighted_gpu<float, signed char, int >*/ ,
0 /*addWeighted_gpu<float, signed char, float >*/ ,
0 /*addWeighted_gpu<float, signed char, double>*/
} ,
{
0 /*addWeighted_gpu<float, unsigned short, unsigned char >*/ ,
0 /*addWeighted_gpu<float, unsigned short, signed char >*/ ,
0 /*addWeighted_gpu<float, unsigned short, unsigned short>*/ ,
0 /*addWeighted_gpu<float, unsigned short, short >*/ ,
0 /*addWeighted_gpu<float, unsigned short, int >*/ ,
0 /*addWeighted_gpu<float, unsigned short, float >*/ ,
0 /*addWeighted_gpu<float, unsigned short, double>*/
} ,
{
0 /*addWeighted_gpu<float, short, unsigned char >*/ ,
0 /*addWeighted_gpu<float, short, signed char >*/ ,
0 /*addWeighted_gpu<float, short, unsigned short>*/ ,
0 /*addWeighted_gpu<float, short, short >*/ ,
0 /*addWeighted_gpu<float, short, int >*/ ,
0 /*addWeighted_gpu<float, short, float >*/ ,
0 /*addWeighted_gpu<float, short, double>*/
} ,
{
0 /*addWeighted_gpu<float, int, unsigned char >*/ ,
0 /*addWeighted_gpu<float, int, signed char >*/ ,
0 /*addWeighted_gpu<float, int, unsigned short>*/ ,
0 /*addWeighted_gpu<float, int, short >*/ ,
0 /*addWeighted_gpu<float, int, int >*/ ,
0 /*addWeighted_gpu<float, int, float >*/ ,
0 /*addWeighted_gpu<float, int, double>*/
} ,
{
addWeighted_gpu < float , float , unsigned char > ,
addWeighted_gpu < float , float , signed char > ,
addWeighted_gpu < float , float , unsigned short > ,
addWeighted_gpu < float , float , short > ,
addWeighted_gpu < float , float , int > ,
addWeighted_gpu < float , float , float > ,
addWeighted_gpu < float , float , double >
} ,
{
addWeighted_gpu < float , double , unsigned char > ,
addWeighted_gpu < float , double , signed char > ,
addWeighted_gpu < float , double , unsigned short > ,
addWeighted_gpu < float , double , short > ,
addWeighted_gpu < float , double , int > ,
addWeighted_gpu < float , double , float > ,
addWeighted_gpu < float , double , double >
}
} ,
{
{
0 /*addWeighted_gpu<double, unsigned char, unsigned char >*/ ,
0 /*addWeighted_gpu<double, unsigned char, signed char >*/ ,
0 /*addWeighted_gpu<double, unsigned char, unsigned short>*/ ,
0 /*addWeighted_gpu<double, unsigned char, short >*/ ,
0 /*addWeighted_gpu<double, unsigned char, int >*/ ,
0 /*addWeighted_gpu<double, unsigned char, float >*/ ,
0 /*addWeighted_gpu<double, unsigned char, double>*/
} ,
{
0 /*addWeighted_gpu<double, signed char, unsigned char >*/ ,
0 /*addWeighted_gpu<double, signed char, signed char >*/ ,
0 /*addWeighted_gpu<double, signed char, unsigned short>*/ ,
0 /*addWeighted_gpu<double, signed char, short >*/ ,
0 /*addWeighted_gpu<double, signed char, int >*/ ,
0 /*addWeighted_gpu<double, signed char, float >*/ ,
0 /*addWeighted_gpu<double, signed char, double>*/
} ,
{
0 /*addWeighted_gpu<double, unsigned short, unsigned char >*/ ,
0 /*addWeighted_gpu<double, unsigned short, signed char >*/ ,
0 /*addWeighted_gpu<double, unsigned short, unsigned short>*/ ,
0 /*addWeighted_gpu<double, unsigned short, short >*/ ,
0 /*addWeighted_gpu<double, unsigned short, int >*/ ,
0 /*addWeighted_gpu<double, unsigned short, float >*/ ,
0 /*addWeighted_gpu<double, unsigned short, double>*/
} ,
{
0 /*addWeighted_gpu<double, short, unsigned char >*/ ,
0 /*addWeighted_gpu<double, short, signed char >*/ ,
0 /*addWeighted_gpu<double, short, unsigned short>*/ ,
0 /*addWeighted_gpu<double, short, short >*/ ,
0 /*addWeighted_gpu<double, short, int >*/ ,
0 /*addWeighted_gpu<double, short, float >*/ ,
0 /*addWeighted_gpu<double, short, double>*/
} ,
{
0 /*addWeighted_gpu<double, int, unsigned char >*/ ,
0 /*addWeighted_gpu<double, int, signed char >*/ ,
0 /*addWeighted_gpu<double, int, unsigned short>*/ ,
0 /*addWeighted_gpu<double, int, short >*/ ,
0 /*addWeighted_gpu<double, int, int >*/ ,
0 /*addWeighted_gpu<double, int, float >*/ ,
0 /*addWeighted_gpu<double, int, double>*/
} ,
{
0 /*addWeighted_gpu<double, float, unsigned char >*/ ,
0 /*addWeighted_gpu<double, float, signed char >*/ ,
0 /*addWeighted_gpu<double, float, unsigned short>*/ ,
0 /*addWeighted_gpu<double, float, short >*/ ,
0 /*addWeighted_gpu<double, float, int >*/ ,
0 /*addWeighted_gpu<double, float, float >*/ ,
0 /*addWeighted_gpu<double, float, double>*/
} ,
{
addWeighted_gpu < double , double , unsigned char > ,
addWeighted_gpu < double , double , signed char > ,
addWeighted_gpu < double , double , unsigned short > ,
addWeighted_gpu < double , double , short > ,
addWeighted_gpu < double , double , int > ,
addWeighted_gpu < double , double , float > ,
addWeighted_gpu < double , double , double >
}
}
} ;
2012-03-20 20:03:34 +08:00
CV_Assert ( src1 . size ( ) = = src2 . size ( ) ) ;
CV_Assert ( src1 . type ( ) = = src2 . type ( ) | | ( dtype > = 0 & & src1 . channels ( ) = = src2 . channels ( ) ) ) ;
dtype = dtype > = 0 ? CV_MAKETYPE ( dtype , src1 . channels ( ) ) : src1 . type ( ) ;
2012-03-26 22:33:43 +08:00
CV_Assert ( src1 . depth ( ) < = CV_64F & & src2 . depth ( ) < = CV_64F & & CV_MAT_DEPTH ( dtype ) < = CV_64F ) ;
if ( src1 . depth ( ) = = CV_64F | | src2 . depth ( ) = = CV_64F | | CV_MAT_DEPTH ( dtype ) = = CV_64F )
{
if ( ! TargetArchs : : builtWith ( NATIVE_DOUBLE ) | | ! DeviceInfo ( ) . supports ( NATIVE_DOUBLE ) )
CV_Error ( CV_StsUnsupportedFormat , " The device doesn't support double " ) ;
}
2012-03-20 20:03:34 +08:00
dst . create ( src1 . size ( ) , dtype ) ;
const GpuMat * psrc1 = & src1 ;
const GpuMat * psrc2 = & src2 ;
if ( src1 . depth ( ) > src2 . depth ( ) )
{
std : : swap ( psrc1 , psrc2 ) ;
std : : swap ( alpha , beta ) ;
}
const func_t func = funcs [ psrc1 - > depth ( ) ] [ psrc2 - > depth ( ) ] [ dst . depth ( ) ] ;
2012-03-26 22:33:43 +08:00
if ( ! func )
CV_Error ( CV_StsUnsupportedFormat , " Unsupported combination of source and destination types " ) ;
2012-03-20 20:03:34 +08:00
func ( psrc1 - > reshape ( 1 ) , alpha , psrc2 - > reshape ( 1 ) , beta , gamma , dst . reshape ( 1 ) , StreamAccessor : : getStream ( stream ) ) ;
2011-09-21 16:58:54 +08:00
}
2011-06-30 22:39:48 +08:00
# endif