diff --git a/modules/core/include/opencv2/core/cuda/vec_math.hpp b/modules/core/include/opencv2/core/cuda/vec_math.hpp index e4f981476e..0dc92c3ef6 100644 --- a/modules/core/include/opencv2/core/cuda/vec_math.hpp +++ b/modules/core/include/opencv2/core/cuda/vec_math.hpp @@ -43,288 +43,880 @@ #ifndef __OPENCV_GPU_VECMATH_HPP__ #define __OPENCV_GPU_VECMATH_HPP__ -#include "saturate_cast.hpp" #include "vec_traits.hpp" -#include "functional.hpp" +#include "saturate_cast.hpp" namespace cv { namespace gpu { namespace cudev { - namespace vec_math_detail - { - template struct SatCastHelper; - template struct SatCastHelper<1, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x)); - } - }; - template struct SatCastHelper<2, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y)); - } - }; - template struct SatCastHelper<3, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z)); - } - }; - template struct SatCastHelper<4, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z), saturate_cast(v.w)); - } - }; - template static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v) +// saturate_cast + +namespace vec_math_detail +{ + template struct SatCastHelper; + template struct SatCastHelper<1, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - return SatCastHelper::cn, VecD>::cast(v); + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x)); } - } - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - -#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \ - __device__ __forceinline__ TypeVec::result_type, 1>::vec_type op(const type ## 1 & a) \ - { \ - func f; \ - return VecTraits::result_type, 1>::vec_type>::make(f(a.x)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 2>::vec_type op(const type ## 2 & a) \ - { \ - func f; \ - return VecTraits::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 3>::vec_type op(const type ## 3 & a) \ - { \ - func f; \ - return VecTraits::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 4>::vec_type op(const type ## 4 & a) \ - { \ - func f; \ - return VecTraits::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \ - } - - namespace vec_math_detail + }; + template struct SatCastHelper<2, VecD> { - template struct BinOpTraits + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef int argument_type; - }; - template struct BinOpTraits + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y)); + } + }; + template struct SatCastHelper<3, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef T argument_type; - }; - template struct BinOpTraits + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z)); + } + }; + template struct SatCastHelper<4, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef double argument_type; - }; - template struct BinOpTraits - { - typedef double argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; - template struct BinOpTraits - { - typedef float argument_type; - }; - template struct BinOpTraits - { - typedef float argument_type; - }; - template <> struct BinOpTraits - { - typedef float argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z), saturate_cast(v.w)); + } + }; + + template static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v) + { + return SatCastHelper::cn, VecD>::cast(v); + } +} + +template static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper(v);} + +// unary operators + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \ + { \ + return VecTraits::make(op (a.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z), op (a.w)); \ } -#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \ - __device__ __forceinline__ TypeVec::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \ +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP + +// unary functions + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \ { \ - func f; \ - return VecTraits::result_type, 1>::vec_type>::make(f(a.x, b.x)); \ + return VecTraits::make(func (a.x)); \ } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \ { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \ + return VecTraits::make(func (a.x), func (a.y)); \ } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \ { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \ + return VecTraits::make(func (a.x), func (a.y), func (a.z)); \ } \ - __device__ __forceinline__ TypeVec::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \ { \ - func f; \ - return VecTraits::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \ - { \ - func f; \ - return VecTraits::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \ - { \ - func f; \ - return VecTraits::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \ + return VecTraits::make(func (a.x), func (a.y), func (a.z), func (a.w)); \ } -#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, abs, abs_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double) -#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint) - OPENCV_GPU_IMPLEMENT_VEC_OP(float) - OPENCV_GPU_IMPLEMENT_VEC_OP(double) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double) - #undef OPENCV_GPU_IMPLEMENT_VEC_UNOP - #undef OPENCV_GPU_IMPLEMENT_VEC_BINOP - #undef OPENCV_GPU_IMPLEMENT_VEC_OP - #undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP -}}} // namespace cv { namespace gpu { namespace cudev +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC + +// binary operators (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(a.x op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP + +// binary operators (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s); \ + } \ + __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(s op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s, a.w op s); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z, s op b.w); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP + +// binary function (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func (a.x, b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC + +// binary function (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC + +}}} // namespace cv { namespace gpu { namespace device #endif // __OPENCV_GPU_VECMATH_HPP__ diff --git a/modules/core/include/opencv2/core/gpu_private.hpp b/modules/core/include/opencv2/core/gpu_private.hpp index 7692bc20e6..7a45a50f07 100644 --- a/modules/core/include/opencv2/core/gpu_private.hpp +++ b/modules/core/include/opencv2/core/gpu_private.hpp @@ -60,6 +60,8 @@ # include "opencv2/core/stream_accessor.hpp" # include "opencv2/core/cuda/common.hpp" +# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD) + # define CUDART_MINIMUM_REQUIRED_VERSION 4020 # if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION) diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index 11bb41948a..9637f86489 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -1547,48 +1547,90 @@ namespace const ErrorEntry npp_errors [] = { - error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ), - error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ), - error_entry( NPP_RESIZE_NO_OPERATION_ERROR ), - -#if defined (_MSC_VER) + #if defined (_MSC_VER) error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ), -#endif + #endif + #if NPP_VERSION < 5500 error_entry( NPP_BAD_ARG_ERROR ), - error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ), - error_entry( NPP_TEXTURE_BIND_ERROR ), error_entry( NPP_COEFF_ERROR ), error_entry( NPP_RECT_ERROR ), error_entry( NPP_QUAD_ERROR ), + error_entry( NPP_MEMFREE_ERR ), + error_entry( NPP_MEMSET_ERR ), + error_entry( NPP_MEM_ALLOC_ERR ), + error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ), + error_entry( NPP_MIRROR_FLIP_ERR ), + error_entry( NPP_INVALID_INPUT ), + error_entry( NPP_POINTER_ERROR ), + error_entry( NPP_WARNING ), + error_entry( NPP_ODD_ROI_WARNING ), + #else + error_entry( NPP_INVALID_HOST_POINTER_ERROR ), + error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ), + error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ), + error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ), + error_entry( NPP_MEMFREE_ERROR ), + error_entry( NPP_MEMSET_ERROR ), + error_entry( NPP_QUALITY_INDEX_ERROR ), + error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ), + error_entry( NPP_CHANNEL_ORDER_ERROR ), + error_entry( NPP_ZERO_MASK_VALUE_ERROR ), + error_entry( NPP_QUADRANGLE_ERROR ), + error_entry( NPP_RECTANGLE_ERROR ), + error_entry( NPP_COEFFICIENT_ERROR ), + error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ), + error_entry( NPP_COI_ERROR ), + error_entry( NPP_DIVISOR_ERROR ), + error_entry( NPP_CHANNEL_ERROR ), + error_entry( NPP_STRIDE_ERROR ), + error_entry( NPP_ANCHOR_ERROR ), + error_entry( NPP_MASK_SIZE_ERROR ), + error_entry( NPP_MIRROR_FLIP_ERROR ), + error_entry( NPP_MOMENT_00_ZERO_ERROR ), + error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ), + error_entry( NPP_THRESHOLD_ERROR ), + error_entry( NPP_CONTEXT_MATCH_ERROR ), + error_entry( NPP_FFT_FLAG_ERROR ), + error_entry( NPP_FFT_ORDER_ERROR ), + error_entry( NPP_SCALE_RANGE_ERROR ), + error_entry( NPP_DATA_TYPE_ERROR ), + error_entry( NPP_OUT_OFF_RANGE_ERROR ), + error_entry( NPP_DIVIDE_BY_ZERO_ERROR ), + error_entry( NPP_MEMORY_ALLOCATION_ERR ), + error_entry( NPP_RANGE_ERROR ), + error_entry( NPP_BAD_ARGUMENT_ERROR ), + error_entry( NPP_NO_MEMORY_ERROR ), + error_entry( NPP_ERROR_RESERVED ), + error_entry( NPP_NO_OPERATION_WARNING ), + error_entry( NPP_DIVIDE_BY_ZERO_WARNING ), + error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ), + #endif + + error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ), + error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ), + error_entry( NPP_RESIZE_NO_OPERATION_ERROR ), + error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ), + error_entry( NPP_TEXTURE_BIND_ERROR ), error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ), error_entry( NPP_NOT_EVEN_STEP_ERROR ), error_entry( NPP_INTERPOLATION_ERROR ), error_entry( NPP_RESIZE_FACTOR_ERROR ), error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ), - error_entry( NPP_MEMFREE_ERR ), - error_entry( NPP_MEMSET_ERR ), error_entry( NPP_MEMCPY_ERROR ), - error_entry( NPP_MEM_ALLOC_ERR ), - error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ), - error_entry( NPP_MIRROR_FLIP_ERR ), - error_entry( NPP_INVALID_INPUT ), error_entry( NPP_ALIGNMENT_ERROR ), error_entry( NPP_STEP_ERROR ), error_entry( NPP_SIZE_ERROR ), - error_entry( NPP_POINTER_ERROR ), error_entry( NPP_NULL_POINTER_ERROR ), error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ), error_entry( NPP_NOT_IMPLEMENTED_ERROR ), error_entry( NPP_ERROR ), error_entry( NPP_NO_ERROR ), error_entry( NPP_SUCCESS ), - error_entry( NPP_WARNING ), error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ), error_entry( NPP_MISALIGNED_DST_ROI_WARNING ), error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ), - error_entry( NPP_DOUBLE_SIZE_WARNING ), - error_entry( NPP_ODD_ROI_WARNING ) + error_entry( NPP_DOUBLE_SIZE_WARNING ) }; const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]); diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 9552f1b06f..58ceb99cd8 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace cudev template __device__ __forceinline__ bool operator() (const I& a, const I& b) const { - I d = a - b; + I d = saturate_cast(a - b); return lo.x <= d.x && d.x <= hi.x && lo.y <= d.y && d.y <= hi.y && lo.z <= d.z && d.z <= hi.z; @@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace cudev template __device__ __forceinline__ bool operator() (const I& a, const I& b) const { - I d = a - b; + I d = saturate_cast(a - b); return lo.x <= d.x && d.x <= hi.x && lo.y <= d.y && d.y <= hi.y && lo.z <= d.z && d.z <= hi.z && diff --git a/modules/gpuarithm/src/cuda/absdiff_mat.cu b/modules/gpuarithm/src/cuda/absdiff_mat.cu index d47cbdc171..d47068ee03 100644 --- a/modules/gpuarithm/src/cuda/absdiff_mat.cu +++ b/modules/gpuarithm/src/cuda/absdiff_mat.cu @@ -62,8 +62,8 @@ namespace arithm return vabsdiff4(a, b); } - __device__ __forceinline__ VAbsDiff4() {} - __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} + __host__ __device__ __forceinline__ VAbsDiff4() {} + __host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {} }; struct VAbsDiff2 : binary_function @@ -73,8 +73,8 @@ namespace arithm return vabsdiff2(a, b); } - __device__ __forceinline__ VAbsDiff2() {} - __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} + __host__ __device__ __forceinline__ VAbsDiff2() {} + __host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {} }; __device__ __forceinline__ int _abs(int a) @@ -97,8 +97,8 @@ namespace arithm return saturate_cast(_abs(a - b)); } - __device__ __forceinline__ AbsDiffMat() {} - __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} + __host__ __device__ __forceinline__ AbsDiffMat() {} + __host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {} }; } diff --git a/modules/gpuarithm/src/cuda/absdiff_scalar.cu b/modules/gpuarithm/src/cuda/absdiff_scalar.cu index e705609b71..5a89791f86 100644 --- a/modules/gpuarithm/src/cuda/absdiff_scalar.cu +++ b/modules/gpuarithm/src/cuda/absdiff_scalar.cu @@ -59,7 +59,7 @@ namespace arithm { S val; - explicit AbsDiffScalar(S val_) : val(val_) {} + __host__ explicit AbsDiffScalar(S val_) : val(val_) {} __device__ __forceinline__ T operator ()(T a) const { diff --git a/modules/gpuarithm/src/cuda/add_mat.cu b/modules/gpuarithm/src/cuda/add_mat.cu index 511e11d0f8..3b1bc1f385 100644 --- a/modules/gpuarithm/src/cuda/add_mat.cu +++ b/modules/gpuarithm/src/cuda/add_mat.cu @@ -62,8 +62,8 @@ namespace arithm return vadd4(a, b); } - __device__ __forceinline__ VAdd4() {} - __device__ __forceinline__ VAdd4(const VAdd4& other) {} + __host__ __device__ __forceinline__ VAdd4() {} + __host__ __device__ __forceinline__ VAdd4(const VAdd4&) {} }; struct VAdd2 : binary_function @@ -73,8 +73,8 @@ namespace arithm return vadd2(a, b); } - __device__ __forceinline__ VAdd2() {} - __device__ __forceinline__ VAdd2(const VAdd2& other) {} + __host__ __device__ __forceinline__ VAdd2() {} + __host__ __device__ __forceinline__ VAdd2(const VAdd2&) {} }; template struct AddMat : binary_function @@ -84,8 +84,8 @@ namespace arithm return saturate_cast(a + b); } - __device__ __forceinline__ AddMat() {} - __device__ __forceinline__ AddMat(const AddMat& other) {} + __host__ __device__ __forceinline__ AddMat() {} + __host__ __device__ __forceinline__ AddMat(const AddMat&) {} }; } diff --git a/modules/gpuarithm/src/cuda/add_scalar.cu b/modules/gpuarithm/src/cuda/add_scalar.cu index 3f43f8d7ad..3362c2b930 100644 --- a/modules/gpuarithm/src/cuda/add_scalar.cu +++ b/modules/gpuarithm/src/cuda/add_scalar.cu @@ -59,7 +59,7 @@ namespace arithm { S val; - explicit AddScalar(S val_) : val(val_) {} + __host__ explicit AddScalar(S val_) : val(val_) {} __device__ __forceinline__ D operator ()(T a) const { diff --git a/modules/gpuarithm/src/cuda/add_weighted.cu b/modules/gpuarithm/src/cuda/add_weighted.cu index 88d8de9513..bf632d68f2 100644 --- a/modules/gpuarithm/src/cuda/add_weighted.cu +++ b/modules/gpuarithm/src/cuda/add_weighted.cu @@ -74,7 +74,7 @@ namespace arithm float beta; float gamma; - AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast(alpha_)), beta(static_cast(beta_)), gamma(static_cast(gamma_)) {} + __host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast(alpha_)), beta(static_cast(beta_)), gamma(static_cast(gamma_)) {} __device__ __forceinline__ D operator ()(T1 a, T2 b) const { @@ -87,7 +87,7 @@ namespace arithm double beta; double gamma; - AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {} + __host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {} __device__ __forceinline__ D operator ()(T1 a, T2 b) const { diff --git a/modules/gpuarithm/src/cuda/cmp_mat.cu b/modules/gpuarithm/src/cuda/cmp_mat.cu index 9cf9787a94..6602edf62f 100644 --- a/modules/gpuarithm/src/cuda/cmp_mat.cu +++ b/modules/gpuarithm/src/cuda/cmp_mat.cu @@ -62,8 +62,8 @@ namespace arithm return vcmpeq4(a, b); } - __device__ __forceinline__ VCmpEq4() {} - __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {} + __host__ __device__ __forceinline__ VCmpEq4() {} + __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {} }; struct VCmpNe4 : binary_function { @@ -72,8 +72,8 @@ namespace arithm return vcmpne4(a, b); } - __device__ __forceinline__ VCmpNe4() {} - __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {} + __host__ __device__ __forceinline__ VCmpNe4() {} + __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {} }; struct VCmpLt4 : binary_function { @@ -82,8 +82,8 @@ namespace arithm return vcmplt4(a, b); } - __device__ __forceinline__ VCmpLt4() {} - __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {} + __host__ __device__ __forceinline__ VCmpLt4() {} + __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {} }; struct VCmpLe4 : binary_function { @@ -92,8 +92,8 @@ namespace arithm return vcmple4(a, b); } - __device__ __forceinline__ VCmpLe4() {} - __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {} + __host__ __device__ __forceinline__ VCmpLe4() {} + __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {} }; template diff --git a/modules/gpuarithm/src/cuda/countnonzero.cu b/modules/gpuarithm/src/cuda/countnonzero.cu index 8373921490..beab82b4b0 100644 --- a/modules/gpuarithm/src/cuda/countnonzero.cu +++ b/modules/gpuarithm/src/cuda/countnonzero.cu @@ -45,6 +45,7 @@ #include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/functional.hpp" #include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/emulation.hpp" diff --git a/modules/gpuarithm/src/cuda/div_inv.cu b/modules/gpuarithm/src/cuda/div_inv.cu index bda3df30b7..9cfda933c7 100644 --- a/modules/gpuarithm/src/cuda/div_inv.cu +++ b/modules/gpuarithm/src/cuda/div_inv.cu @@ -59,7 +59,7 @@ namespace arithm { S val; - explicit DivInv(S val_) : val(val_) {} + __host__ explicit DivInv(S val_) : val(val_) {} __device__ __forceinline__ D operator ()(T a) const { diff --git a/modules/gpuarithm/src/cuda/div_mat.cu b/modules/gpuarithm/src/cuda/div_mat.cu index 9d50dc7b6a..aab6638900 100644 --- a/modules/gpuarithm/src/cuda/div_mat.cu +++ b/modules/gpuarithm/src/cuda/div_mat.cu @@ -91,8 +91,8 @@ namespace arithm return b != 0 ? saturate_cast(a / b) : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct Div : binary_function { @@ -101,8 +101,8 @@ namespace arithm return b != 0 ? static_cast(a) / b : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct Div : binary_function { @@ -111,15 +111,15 @@ namespace arithm return b != 0 ? static_cast(a) / b : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct DivScale : binary_function { S scale; - explicit DivScale(S scale_) : scale(scale_) {} + __host__ explicit DivScale(S scale_) : scale(scale_) {} __device__ __forceinline__ D operator ()(T a, T b) const { diff --git a/modules/gpuarithm/src/cuda/div_scalar.cu b/modules/gpuarithm/src/cuda/div_scalar.cu index b176cfa2c2..42ba90cb0c 100644 --- a/modules/gpuarithm/src/cuda/div_scalar.cu +++ b/modules/gpuarithm/src/cuda/div_scalar.cu @@ -59,7 +59,7 @@ namespace arithm { S val; - explicit DivScalar(S val_) : val(val_) {} + __host__ explicit DivScalar(S val_) : val(val_) {} __device__ __forceinline__ D operator ()(T a) const { diff --git a/modules/gpuarithm/src/cuda/math.cu b/modules/gpuarithm/src/cuda/math.cu index 86be98ea0e..1f2e7b8a14 100644 --- a/modules/gpuarithm/src/cuda/math.cu +++ b/modules/gpuarithm/src/cuda/math.cu @@ -94,8 +94,8 @@ namespace arithm return saturate_cast(x * x); } - __device__ __forceinline__ Sqr() {} - __device__ __forceinline__ Sqr(const Sqr& other) {} + __host__ __device__ __forceinline__ Sqr() {} + __host__ __device__ __forceinline__ Sqr(const Sqr&) {} }; } @@ -190,8 +190,8 @@ namespace arithm return saturate_cast(f(x)); } - __device__ __forceinline__ Exp() {} - __device__ __forceinline__ Exp(const Exp& other) {} + __host__ __device__ __forceinline__ Exp() {} + __host__ __device__ __forceinline__ Exp(const Exp&) {} }; } @@ -228,7 +228,7 @@ namespace arithm { float power; - PowOp(double power_) : power(static_cast(power_)) {} + __host__ explicit PowOp(double power_) : power(static_cast(power_)) {} __device__ __forceinline__ T operator()(T e) const { @@ -239,7 +239,7 @@ namespace arithm { float power; - PowOp(double power_) : power(static_cast(power_)) {} + __host__ explicit PowOp(double power_) : power(static_cast(power_)) {} __device__ __forceinline__ T operator()(T e) const { @@ -255,7 +255,7 @@ namespace arithm { float power; - PowOp(double power_) : power(static_cast(power_)) {} + __host__ explicit PowOp(double power_) : power(static_cast(power_)) {} __device__ __forceinline__ float operator()(float e) const { @@ -266,7 +266,7 @@ namespace arithm { double power; - PowOp(double power_) : power(power_) {} + __host__ explicit PowOp(double power_) : power(power_) {} __device__ __forceinline__ double operator()(double e) const { diff --git a/modules/gpuarithm/src/cuda/minmax.cu b/modules/gpuarithm/src/cuda/minmax.cu index dd1a1f39d8..1bdeb7ddd6 100644 --- a/modules/gpuarithm/src/cuda/minmax.cu +++ b/modules/gpuarithm/src/cuda/minmax.cu @@ -45,6 +45,7 @@ #include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/functional.hpp" #include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/limits.hpp" diff --git a/modules/gpuarithm/src/cuda/minmax_mat.cu b/modules/gpuarithm/src/cuda/minmax_mat.cu index 6e9c247223..0bf5a468d9 100644 --- a/modules/gpuarithm/src/cuda/minmax_mat.cu +++ b/modules/gpuarithm/src/cuda/minmax_mat.cu @@ -65,8 +65,8 @@ namespace arithm return vmin4(a, b); } - __device__ __forceinline__ VMin4() {} - __device__ __forceinline__ VMin4(const VMin4& other) {} + __host__ __device__ __forceinline__ VMin4() {} + __host__ __device__ __forceinline__ VMin4(const VMin4&) {} }; struct VMin2 : binary_function @@ -76,8 +76,8 @@ namespace arithm return vmin2(a, b); } - __device__ __forceinline__ VMin2() {} - __device__ __forceinline__ VMin2(const VMin2& other) {} + __host__ __device__ __forceinline__ VMin2() {} + __host__ __device__ __forceinline__ VMin2(const VMin2&) {} }; } @@ -151,8 +151,8 @@ namespace arithm return vmax4(a, b); } - __device__ __forceinline__ VMax4() {} - __device__ __forceinline__ VMax4(const VMax4& other) {} + __host__ __device__ __forceinline__ VMax4() {} + __host__ __device__ __forceinline__ VMax4(const VMax4&) {} }; struct VMax2 : binary_function @@ -162,8 +162,8 @@ namespace arithm return vmax2(a, b); } - __device__ __forceinline__ VMax2() {} - __device__ __forceinline__ VMax2(const VMax2& other) {} + __host__ __device__ __forceinline__ VMax2() {} + __host__ __device__ __forceinline__ VMax2(const VMax2&) {} }; } diff --git a/modules/gpuarithm/src/cuda/minmaxloc.cu b/modules/gpuarithm/src/cuda/minmaxloc.cu index 08c594d3a8..fbd7029861 100644 --- a/modules/gpuarithm/src/cuda/minmaxloc.cu +++ b/modules/gpuarithm/src/cuda/minmaxloc.cu @@ -45,6 +45,7 @@ #include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/functional.hpp" #include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/limits.hpp" diff --git a/modules/gpuarithm/src/cuda/mul_mat.cu b/modules/gpuarithm/src/cuda/mul_mat.cu index cde44ba563..25bc8a5970 100644 --- a/modules/gpuarithm/src/cuda/mul_mat.cu +++ b/modules/gpuarithm/src/cuda/mul_mat.cu @@ -69,8 +69,8 @@ namespace arithm return res; } - __device__ __forceinline__ Mul_8uc4_32f() {} - __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} + __host__ __device__ __forceinline__ Mul_8uc4_32f() {} + __host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {} }; struct Mul_16sc4_32f : binary_function @@ -81,8 +81,8 @@ namespace arithm saturate_cast(a.z * b), saturate_cast(a.w * b)); } - __device__ __forceinline__ Mul_16sc4_32f() {} - __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} + __host__ __device__ __forceinline__ Mul_16sc4_32f() {} + __host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {} }; template struct Mul : binary_function @@ -92,15 +92,15 @@ namespace arithm return saturate_cast(a * b); } - __device__ __forceinline__ Mul() {} - __device__ __forceinline__ Mul(const Mul& other) {} + __host__ __device__ __forceinline__ Mul() {} + __host__ __device__ __forceinline__ Mul(const Mul&) {} }; template struct MulScale : binary_function { S scale; - explicit MulScale(S scale_) : scale(scale_) {} + __host__ explicit MulScale(S scale_) : scale(scale_) {} __device__ __forceinline__ D operator ()(T a, T b) const { diff --git a/modules/gpuarithm/src/cuda/mul_scalar.cu b/modules/gpuarithm/src/cuda/mul_scalar.cu index 208bfc6228..6546550275 100644 --- a/modules/gpuarithm/src/cuda/mul_scalar.cu +++ b/modules/gpuarithm/src/cuda/mul_scalar.cu @@ -59,7 +59,7 @@ namespace arithm { S val; - explicit MulScalar(S val_) : val(val_) {} + __host__ explicit MulScalar(S val_) : val(val_) {} __device__ __forceinline__ D operator ()(T a) const { diff --git a/modules/gpuarithm/src/cuda/reduce.cu b/modules/gpuarithm/src/cuda/reduce.cu index f2056b97be..51c354cf95 100644 --- a/modules/gpuarithm/src/cuda/reduce.cu +++ b/modules/gpuarithm/src/cuda/reduce.cu @@ -46,6 +46,7 @@ #include "opencv2/core/cuda/saturate_cast.hpp" #include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/functional.hpp" #include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/limits.hpp" @@ -76,8 +77,8 @@ namespace reduce return r; } - __device__ __forceinline__ Sum() {} - __device__ __forceinline__ Sum(const Sum&) {} + __host__ __device__ __forceinline__ Sum() {} + __host__ __device__ __forceinline__ Sum(const Sum&) {} }; struct Avg @@ -100,8 +101,8 @@ namespace reduce return r / sz; } - __device__ __forceinline__ Avg() {} - __device__ __forceinline__ Avg(const Avg&) {} + __host__ __device__ __forceinline__ Avg() {} + __host__ __device__ __forceinline__ Avg(const Avg&) {} }; struct Min @@ -125,8 +126,8 @@ namespace reduce return r; } - __device__ __forceinline__ Min() {} - __device__ __forceinline__ Min(const Min&) {} + __host__ __device__ __forceinline__ Min() {} + __host__ __device__ __forceinline__ Min(const Min&) {} }; struct Max @@ -150,8 +151,8 @@ namespace reduce return r; } - __device__ __forceinline__ Max() {} - __device__ __forceinline__ Max(const Max&) {} + __host__ __device__ __forceinline__ Max() {} + __host__ __device__ __forceinline__ Max(const Max&) {} }; /////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/src/cuda/sub_mat.cu b/modules/gpuarithm/src/cuda/sub_mat.cu index adbdb2f501..077eafb356 100644 --- a/modules/gpuarithm/src/cuda/sub_mat.cu +++ b/modules/gpuarithm/src/cuda/sub_mat.cu @@ -62,8 +62,8 @@ namespace arithm return vsub4(a, b); } - __device__ __forceinline__ VSub4() {} - __device__ __forceinline__ VSub4(const VSub4& other) {} + __host__ __device__ __forceinline__ VSub4() {} + __host__ __device__ __forceinline__ VSub4(const VSub4&) {} }; struct VSub2 : binary_function @@ -73,8 +73,8 @@ namespace arithm return vsub2(a, b); } - __device__ __forceinline__ VSub2() {} - __device__ __forceinline__ VSub2(const VSub2& other) {} + __host__ __device__ __forceinline__ VSub2() {} + __host__ __device__ __forceinline__ VSub2(const VSub2&) {} }; template struct SubMat : binary_function @@ -84,8 +84,8 @@ namespace arithm return saturate_cast(a - b); } - __device__ __forceinline__ SubMat() {} - __device__ __forceinline__ SubMat(const SubMat& other) {} + __host__ __device__ __forceinline__ SubMat() {} + __host__ __device__ __forceinline__ SubMat(const SubMat&) {} }; } diff --git a/modules/gpuarithm/src/cuda/sub_scalar.cu b/modules/gpuarithm/src/cuda/sub_scalar.cu index ed1c96e002..05c0cc703b 100644 --- a/modules/gpuarithm/src/cuda/sub_scalar.cu +++ b/modules/gpuarithm/src/cuda/sub_scalar.cu @@ -59,7 +59,7 @@ namespace arithm { S val; - explicit SubScalar(S val_) : val(val_) {} + __host__ explicit SubScalar(S val_) : val(val_) {} __device__ __forceinline__ D operator ()(T a) const { diff --git a/modules/gpuarithm/src/cuda/sum.cu b/modules/gpuarithm/src/cuda/sum.cu index 3838a7b6cf..2af7692061 100644 --- a/modules/gpuarithm/src/cuda/sum.cu +++ b/modules/gpuarithm/src/cuda/sum.cu @@ -45,6 +45,7 @@ #include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/functional.hpp" #include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/utility.hpp" diff --git a/modules/gpufilters/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp index 0dc506bc93..64cf4cc5db 100644 --- a/modules/gpufilters/perf/perf_filters.cpp +++ b/modules/gpufilters/perf/perf_filters.cpp @@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur, TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize)); - GPU_SANITY_CHECK(dst); + GPU_SANITY_CHECK(dst, 1); } else { diff --git a/modules/gpuimgproc/src/cuda/hough.cu b/modules/gpuimgproc/src/cuda/hough.cu index 5a4481b6e5..696ed38453 100644 --- a/modules/gpuimgproc/src/cuda/hough.cu +++ b/modules/gpuimgproc/src/cuda/hough.cu @@ -48,6 +48,7 @@ #include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/functional.hpp" #include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/dynamic_smem.hpp" @@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace cudev const int ind = ::atomicAdd(r_sizes + n, 1); if (ind < maxSize) - r_table(n, ind) = p - templCenter; + r_table(n, ind) = saturate_cast(p - templCenter); } void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, @@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace cudev for (int j = 0; j < r_row_size; ++j) { - short2 c = p - r_row[j]; + int2 c = p - r_row[j]; c.x = __float2int_rn(c.x * idp); c.y = __float2int_rn(c.y * idp); diff --git a/modules/gpuoptflow/perf/perf_optflow.cpp b/modules/gpuoptflow/perf/perf_optflow.cpp index 6f2527fe9e..545225d62e 100644 --- a/modules/gpuoptflow/perf/perf_optflow.cpp +++ b/modules/gpuoptflow/perf/perf_optflow.cpp @@ -84,7 +84,7 @@ PERF_TEST_P(ImagePair, InterpolateFrames, TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf); - GPU_SANITY_CHECK(newFrame); + GPU_SANITY_CHECK(newFrame, 1e-4); } else { @@ -123,7 +123,7 @@ PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap, TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors); - GPU_SANITY_CHECK(vertex); + GPU_SANITY_CHECK(vertex, 1e-6); GPU_SANITY_CHECK(colors); } else @@ -161,8 +161,8 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow, TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v); - GPU_SANITY_CHECK(u); - GPU_SANITY_CHECK(v); + GPU_SANITY_CHECK(u, 1e-1); + GPU_SANITY_CHECK(v, 1e-1); } else { diff --git a/modules/gpuoptflow/test/test_optflow.cpp b/modules/gpuoptflow/test/test_optflow.cpp index cf05ebc249..fce07551dc 100644 --- a/modules/gpuoptflow/test/test_optflow.cpp +++ b/modules/gpuoptflow/test/test_optflow.cpp @@ -103,8 +103,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression) for (int i = 0; i < v_gold.rows; ++i) f.read(v_gold.ptr(i), v_gold.cols * sizeof(float)); - EXPECT_MAT_NEAR(u_gold, u, 0); - EXPECT_MAT_NEAR(v_gold, v, 0); + EXPECT_MAT_SIMILAR(u_gold, u, 1e-3); + EXPECT_MAT_SIMILAR(v_gold, v, 1e-3); #else std::ofstream f(fname.c_str(), std::ios_base::binary);