From a893969ec96347e1cdf95cf47716948143859df9 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 3 Nov 2019 10:37:39 +0000 Subject: [PATCH] core(simd): v_setall template --- cmake/templates/opencv_abi.xml.in | 2 + .../core/include/opencv2/core/hal/intrin.hpp | 11 +- .../opencv2/core/hal/simd_utils.impl.hpp | 146 ++++++++++++++++++ modules/core/test/test_intrin_utils.hpp | 34 ++++ 4 files changed, 188 insertions(+), 5 deletions(-) create mode 100644 modules/core/include/opencv2/core/hal/simd_utils.impl.hpp diff --git a/cmake/templates/opencv_abi.xml.in b/cmake/templates/opencv_abi.xml.in index ed142d8917..bb694c79f8 100644 --- a/cmake/templates/opencv_abi.xml.in +++ b/cmake/templates/opencv_abi.xml.in @@ -22,6 +22,8 @@ opencv2/core/hal/intrin* + opencv2/core/hal/*macros.* + opencv2/core/hal/*.impl.* opencv2/core/cuda* opencv2/core/opencl* opencv2/core/private* diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 3bdbf05f2f..2b877f4497 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -455,10 +455,6 @@ namespace CV__SIMD_NAMESPACE { using namespace CV__SIMD_NAMESPACE; #endif -#ifndef CV_DOXYGEN -CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END -#endif - #ifndef CV_SIMD_64F #define CV_SIMD_64F 0 #endif @@ -467,11 +463,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END #define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types #endif - #ifndef CV_SIMD #define CV_SIMD 0 #endif +#include "simd_utils.impl.hpp" + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif + } // cv:: //! @endcond diff --git a/modules/core/include/opencv2/core/hal/simd_utils.impl.hpp b/modules/core/include/opencv2/core/hal/simd_utils.impl.hpp new file mode 100644 index 0000000000..fff8f942b8 --- /dev/null +++ b/modules/core/include/opencv2/core/hal/simd_utils.impl.hpp @@ -0,0 +1,146 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This header is not standalone. Don't include directly, use "intrin.hpp" instead. +#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp + + +#if CV_SIMD128 || CV_SIMD128_CPP + +template struct Type2Vec128_Traits; +#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec128_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2); +#if CV_SIMD128_64F +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2); +#endif + +template static inline +typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a); + +template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); } +template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); } +template<> inline Type2Vec128_Traits::vec_type v_setall(const ushort& a) { return v_setall_u16(a); } +template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); } +template<> inline Type2Vec128_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); } +template<> inline Type2Vec128_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); } +template<> inline Type2Vec128_Traits::vec_type v_setall(const uint64& a) { return v_setall_u64(a); } +template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); } +template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); } +#if CV_SIMD128_64F +template<> inline Type2Vec128_Traits::vec_type v_setall(const double& a) { return v_setall_f64(a); } +#endif + +#endif // SIMD128 + + +#if CV_SIMD256 + +template struct Type2Vec256_Traits; +#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec256_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4); +#if CV_SIMD256_64F +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4); +#endif + +template static inline +typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a); + +template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const uchar& a) { return v256_setall_u8(a); } +template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const schar& a) { return v256_setall_s8(a); } +template<> inline Type2Vec256_Traits::vec_type v256_setall(const ushort& a) { return v256_setall_u16(a); } +template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const short& a) { return v256_setall_s16(a); } +template<> inline Type2Vec256_Traits< uint>::vec_type v256_setall< uint>(const uint& a) { return v256_setall_u32(a); } +template<> inline Type2Vec256_Traits< int>::vec_type v256_setall< int>(const int& a) { return v256_setall_s32(a); } +template<> inline Type2Vec256_Traits::vec_type v256_setall(const uint64& a) { return v256_setall_u64(a); } +template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const int64& a) { return v256_setall_s64(a); } +template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const float& a) { return v256_setall_f32(a); } +#if CV_SIMD256_64F +template<> inline Type2Vec256_Traits::vec_type v256_setall(const double& a) { return v256_setall_f64(a); } +#endif + +#endif // SIMD256 + + +#if CV_SIMD512 + +template struct Type2Vec512_Traits; +#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec512_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8); +#if CV_SIMD512_64F +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8); +#endif + +template static inline +typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a); + +template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const uchar& a) { return v512_setall_u8(a); } +template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const schar& a) { return v512_setall_s8(a); } +template<> inline Type2Vec512_Traits::vec_type v512_setall(const ushort& a) { return v512_setall_u16(a); } +template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const short& a) { return v512_setall_s16(a); } +template<> inline Type2Vec512_Traits< uint>::vec_type v512_setall< uint>(const uint& a) { return v512_setall_u32(a); } +template<> inline Type2Vec512_Traits< int>::vec_type v512_setall< int>(const int& a) { return v512_setall_s32(a); } +template<> inline Type2Vec512_Traits::vec_type v512_setall(const uint64& a) { return v512_setall_u64(a); } +template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const int64& a) { return v512_setall_s64(a); } +template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const float& a) { return v512_setall_f32(a); } +#if CV_SIMD512_64F +template<> inline Type2Vec512_Traits::vec_type v512_setall(const double& a) { return v512_setall_f64(a); } +#endif + +#endif // SIMD512 + + +#if CV_SIMD_WIDTH == 16 +template static inline +typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); } +#elif CV_SIMD_WIDTH == 32 +template static inline +typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); } +#elif CV_SIMD_WIDTH == 64 +template static inline +typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); } +#else +#error "Build configuration error, unsupported CV_SIMD_WIDTH" +#endif + + +#endif // OPENCV_HAL_INTRIN_HPP diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index bd1e24722c..f50eef46f6 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -332,6 +332,40 @@ template struct TheTest v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a); #endif +#if CV_SIMD_WIDTH == 16 + R setall_res1 = v_setall((LaneType)5); + R setall_res2 = v_setall(6); +#elif CV_SIMD_WIDTH == 32 + R setall_res1 = v256_setall((LaneType)5); + R setall_res2 = v256_setall(6); +#elif CV_SIMD_WIDTH == 64 + R setall_res1 = v512_setall((LaneType)5); + R setall_res2 = v512_setall(6); +#else +#error "Configuration error" +#endif +#if CV_SIMD_WIDTH > 0 + Data setall_res1_; v_store(setall_res1_.d, setall_res1); + Data setall_res2_; v_store(setall_res2_.d, setall_res2); + for (int i = 0; i < R::nlanes; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); + EXPECT_EQ((LaneType)5, setall_res1_[i]); + EXPECT_EQ((LaneType)6, setall_res2_[i]); + } +#endif + + R vx_setall_res1 = vx_setall((LaneType)11); + R vx_setall_res2 = vx_setall(12); + Data vx_setall_res1_; v_store(vx_setall_res1_.d, vx_setall_res1); + Data vx_setall_res2_; v_store(vx_setall_res2_.d, vx_setall_res2); + for (int i = 0; i < R::nlanes; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); + EXPECT_EQ((LaneType)11, vx_setall_res1_[i]); + EXPECT_EQ((LaneType)12, vx_setall_res2_[i]); + } + return *this; }