Merge pull request #15839 from alalek:core_simd_v_setall_template

2025-07-24 14:06:27 +08:00 · 2019-11-27 19:19:34 +00:00 · 2019-11-27 19:19:34 +00:00 · 70146700aa
commit 70146700aa
parent 5af747c6fd a893969ec9
4 changed files with 188 additions and 5 deletions
--- a/cmake/templates/opencv_abi.xml.in
+++ b/cmake/templates/opencv_abi.xml.in
@ -22,6 +22,8 @@

 <skip_headers>
    opencv2/core/hal/intrin*
+    opencv2/core/hal/*macros.*
+    opencv2/core/hal/*.impl.*
    opencv2/core/cuda*
    opencv2/core/opencl*
    opencv2/core/private*
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@ -457,10 +457,6 @@ namespace CV__SIMD_NAMESPACE {
 using namespace CV__SIMD_NAMESPACE;
 #endif

-#ifndef CV_DOXYGEN
-CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
-#endif
-
 #ifndef CV_SIMD_64F
 #define CV_SIMD_64F 0
 #endif
@ -469,11 +465,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 #define CV_SIMD_FP16 0  //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
 #endif

-
 #ifndef CV_SIMD
 #define CV_SIMD 0
 #endif

+#include "simd_utils.impl.hpp"
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+#endif
+
 } // cv::

 //! @endcond
--- a/modules/core/include/opencv2/core/hal/simd_utils.impl.hpp
+++ b/modules/core/include/opencv2/core/hal/simd_utils.impl.hpp
@ -0,0 +1,146 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This header is not standalone. Don't include directly, use "intrin.hpp" instead.
+#ifdef OPENCV_HAL_INTRIN_HPP  // defined in intrin.hpp
+
+
+#if CV_SIMD128 || CV_SIMD128_CPP
+
+template<typename _T> struct Type2Vec128_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec128_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2);
+#if CV_SIMD128_64F
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a);
+
+template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const  uchar& a) { return v_setall_u8(a); }
+template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const  schar& a) { return v_setall_s8(a); }
+template<> inline Type2Vec128_Traits<ushort>::vec_type v_setall<ushort>(const ushort& a) { return v_setall_u16(a); }
+template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const  short& a) { return v_setall_s16(a); }
+template<> inline Type2Vec128_Traits<  uint>::vec_type v_setall<  uint>(const   uint& a) { return v_setall_u32(a); }
+template<> inline Type2Vec128_Traits<   int>::vec_type v_setall<   int>(const    int& a) { return v_setall_s32(a); }
+template<> inline Type2Vec128_Traits<uint64>::vec_type v_setall<uint64>(const uint64& a) { return v_setall_u64(a); }
+template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const  int64& a) { return v_setall_s64(a); }
+template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const  float& a) { return v_setall_f32(a); }
+#if CV_SIMD128_64F
+template<> inline Type2Vec128_Traits<double>::vec_type v_setall<double>(const double& a) { return v_setall_f64(a); }
+#endif
+
+#endif  // SIMD128
+
+
+#if CV_SIMD256
+
+template<typename _T> struct Type2Vec256_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec256_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4);
+#if CV_SIMD256_64F
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a);
+
+template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const  uchar& a) { return v256_setall_u8(a); }
+template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const  schar& a) { return v256_setall_s8(a); }
+template<> inline Type2Vec256_Traits<ushort>::vec_type v256_setall<ushort>(const ushort& a) { return v256_setall_u16(a); }
+template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const  short& a) { return v256_setall_s16(a); }
+template<> inline Type2Vec256_Traits<  uint>::vec_type v256_setall<  uint>(const   uint& a) { return v256_setall_u32(a); }
+template<> inline Type2Vec256_Traits<   int>::vec_type v256_setall<   int>(const    int& a) { return v256_setall_s32(a); }
+template<> inline Type2Vec256_Traits<uint64>::vec_type v256_setall<uint64>(const uint64& a) { return v256_setall_u64(a); }
+template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const  int64& a) { return v256_setall_s64(a); }
+template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const  float& a) { return v256_setall_f32(a); }
+#if CV_SIMD256_64F
+template<> inline Type2Vec256_Traits<double>::vec_type v256_setall<double>(const double& a) { return v256_setall_f64(a); }
+#endif
+
+#endif  // SIMD256
+
+
+#if CV_SIMD512
+
+template<typename _T> struct Type2Vec512_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec512_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8);
+#if CV_SIMD512_64F
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a);
+
+template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const  uchar& a) { return v512_setall_u8(a); }
+template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const  schar& a) { return v512_setall_s8(a); }
+template<> inline Type2Vec512_Traits<ushort>::vec_type v512_setall<ushort>(const ushort& a) { return v512_setall_u16(a); }
+template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const  short& a) { return v512_setall_s16(a); }
+template<> inline Type2Vec512_Traits<  uint>::vec_type v512_setall<  uint>(const   uint& a) { return v512_setall_u32(a); }
+template<> inline Type2Vec512_Traits<   int>::vec_type v512_setall<   int>(const    int& a) { return v512_setall_s32(a); }
+template<> inline Type2Vec512_Traits<uint64>::vec_type v512_setall<uint64>(const uint64& a) { return v512_setall_u64(a); }
+template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const  int64& a) { return v512_setall_s64(a); }
+template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const  float& a) { return v512_setall_f32(a); }
+#if CV_SIMD512_64F
+template<> inline Type2Vec512_Traits<double>::vec_type v512_setall<double>(const double& a) { return v512_setall_f64(a); }
+#endif
+
+#endif  // SIMD512
+
+
+#if CV_SIMD_WIDTH == 16
+template<typename _T> static inline
+typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); }
+#elif CV_SIMD_WIDTH == 32
+template<typename _T> static inline
+typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); }
+#elif CV_SIMD_WIDTH == 64
+template<typename _T> static inline
+typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); }
+#else
+#error "Build configuration error, unsupported CV_SIMD_WIDTH"
+#endif
+
+
+#endif  // OPENCV_HAL_INTRIN_HPP
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@ -336,6 +336,40 @@ template<typename R> struct TheTest
        v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
 #endif

+#if CV_SIMD_WIDTH == 16
+        R setall_res1 = v_setall((LaneType)5);
+        R setall_res2 = v_setall<LaneType>(6);
+#elif CV_SIMD_WIDTH == 32
+        R setall_res1 = v256_setall((LaneType)5);
+        R setall_res2 = v256_setall<LaneType>(6);
+#elif CV_SIMD_WIDTH == 64
+        R setall_res1 = v512_setall((LaneType)5);
+        R setall_res2 = v512_setall<LaneType>(6);
+#else
+#error "Configuration error"
+#endif
+#if CV_SIMD_WIDTH > 0
+        Data<R> setall_res1_; v_store(setall_res1_.d, setall_res1);
+        Data<R> setall_res2_; v_store(setall_res2_.d, setall_res2);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            SCOPED_TRACE(cv::format("i=%d", i));
+            EXPECT_EQ((LaneType)5, setall_res1_[i]);
+            EXPECT_EQ((LaneType)6, setall_res2_[i]);
+        }
+#endif
+
+        R vx_setall_res1 = vx_setall((LaneType)11);
+        R vx_setall_res2 = vx_setall<LaneType>(12);
+        Data<R> vx_setall_res1_; v_store(vx_setall_res1_.d, vx_setall_res1);
+        Data<R> vx_setall_res2_; v_store(vx_setall_res2_.d, vx_setall_res2);
+        for (int i = 0; i < R::nlanes; ++i)
+        {
+            SCOPED_TRACE(cv::format("i=%d", i));
+            EXPECT_EQ((LaneType)11, vx_setall_res1_[i]);
+            EXPECT_EQ((LaneType)12, vx_setall_res2_[i]);
+        }
+
        return *this;
    }