diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 0041030dad..d79c995a27 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -695,236 +695,14 @@ namespace CV__SIMD_NAMESPACE { /** @brief SIMD processing state cleanup call */ inline void vx_cleanup() { VXPREFIX(_cleanup)(); } -#if CV_SIMD +#if !CV_SIMD_SCALABLE // Compatibility layer - #define CV_SIMD_SCALABLE 0 - #define CV_SIMD_SCALABLE_64F 0 - template - struct VTraits; -#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) - template <> - struct VTraits - { - static inline int vlanes() { return v_uint8::nlanes; } - enum { nlanes = 64, max_nlanes = nlanes }; - using lane_type = uchar; + template struct VTraits { + static inline int vlanes() { return T::nlanes; } + enum { nlanes = T::nlanes, max_nlanes = T::nlanes }; + using lane_type = typename T::lane_type; }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int8::nlanes; } - enum { nlanes = 64, max_nlanes = nlanes }; - using lane_type = schar; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint16::nlanes; } - enum { nlanes = 32, max_nlanes = nlanes }; - using lane_type = ushort; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int16::nlanes; } - enum { nlanes = 32, max_nlanes = nlanes }; - using lane_type = short; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint32::nlanes; } - enum { nlanes = 16, max_nlanes = nlanes }; - using lane_type = uint; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int32::nlanes; } - enum { nlanes = 16, max_nlanes = nlanes }; - using lane_type = int; - }; - - template <> - struct VTraits - { - static inline int vlanes() { return v_float32::nlanes; } - enum { nlanes = 16, max_nlanes = nlanes }; - using lane_type = float; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint64::nlanes; } - enum { nlanes = 8, max_nlanes = nlanes }; - using lane_type = uint64; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int64::nlanes; } - enum { nlanes = 8, max_nlanes = nlanes }; - using lane_type = int64; - }; - #if CV_SIMD_64F - template <> - struct VTraits - { - static inline int vlanes() { return v_float64::nlanes; } - enum { nlanes = 8, max_nlanes = nlanes }; - using lane_type = double; - }; - #endif -#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) - template <> - struct VTraits - { - static inline int vlanes() { return v_uint8::nlanes; } - enum { nlanes = 32, max_nlanes = nlanes }; - using lane_type = uchar; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int8::nlanes; } - enum { nlanes = 32, max_nlanes = nlanes }; - using lane_type = schar; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint16::nlanes; } - enum { nlanes = 16, max_nlanes = nlanes }; - using lane_type = ushort; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int16::nlanes; } - enum { nlanes = 16, max_nlanes = nlanes }; - using lane_type = short; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint32::nlanes; } - enum { nlanes = 8, max_nlanes = nlanes }; - using lane_type = uint; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int32::nlanes; } - enum { nlanes = 8, max_nlanes = nlanes }; - using lane_type = int; - }; - - template <> - struct VTraits - { - static inline int vlanes() { return v_float32::nlanes; } - enum { nlanes = 8, max_nlanes = nlanes }; - using lane_type = float; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint64::nlanes; } - enum { nlanes = 4, max_nlanes = nlanes }; - using lane_type = uint64; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int64::nlanes; } - enum { nlanes = 4, max_nlanes = nlanes }; - using lane_type = int64; - }; - #if CV_SIMD_64F - template <> - struct VTraits - { - static inline int vlanes() { return v_float64::nlanes; } - enum { nlanes = 4, max_nlanes = nlanes }; - using lane_type = double; - }; - #endif -#elif CV_SIMD128 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) - template <> - struct VTraits - { - static inline int vlanes() { return v_uint8::nlanes; } - enum { nlanes = 16, max_nlanes = nlanes }; - using lane_type = uchar; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int8::nlanes; } - enum { nlanes = 16, max_nlanes = nlanes }; - using lane_type = schar; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint16::nlanes; } - enum { nlanes = 8, max_nlanes = nlanes }; - using lane_type = ushort; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int16::nlanes; } - enum { nlanes = 8, max_nlanes = nlanes }; - using lane_type = short; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint32::nlanes; } - enum { nlanes = 4, max_nlanes = nlanes }; - using lane_type = uint; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int32::nlanes; } - enum { nlanes = 4, max_nlanes = nlanes }; - using lane_type = int; - }; - - template <> - struct VTraits - { - static inline int vlanes() { return v_float32::nlanes; } - enum { nlanes = 4, max_nlanes = nlanes }; - using lane_type = float; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_uint64::nlanes; } - enum { nlanes = 2, max_nlanes = nlanes }; - using lane_type = uint64; - }; - template <> - struct VTraits - { - static inline int vlanes() { return v_int64::nlanes; } - enum { nlanes = 2, max_nlanes = nlanes }; - using lane_type = int64; - }; - #if CV_SIMD_64F - template <> - struct VTraits - { - static inline int vlanes() { return v_float64::nlanes; } - enum { nlanes = 2, max_nlanes = nlanes }; - using lane_type = double; - }; - #endif -#endif #define OPENCV_HAL_WRAP_BIN_OP_ADDSUB(_Tpvec) \ inline _Tpvec v_add(const _Tpvec& a, const _Tpvec& b) \ diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 46222140e6..633a58bca4 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -48,6 +48,7 @@ #include #include #include +#include "opencv2/core/utility.hpp" #include "opencv2/core/saturate.hpp" //! @cond IGNORED diff --git a/modules/core/include/opencv2/core/saturate.hpp b/modules/core/include/opencv2/core/saturate.hpp index 8127e3d9ef..e0cc965ab6 100644 --- a/modules/core/include/opencv2/core/saturate.hpp +++ b/modules/core/include/opencv2/core/saturate.hpp @@ -46,6 +46,7 @@ #define OPENCV_CORE_SATURATE_HPP #include "opencv2/core/cvdef.h" +#include #include "opencv2/core/fast_math.hpp" namespace cv diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 5d4442f111..7477a004ef 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -25,61 +25,7 @@ void test_hal_intrin_float16(); template struct Data; template struct initializer; -#if CV_SIMD -template <> struct initializer<64> -{ - template static R init(const Data & d) - { - return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15], - d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31], - d[32], d[33], d[34], d[35], d[36], d[37], d[38], d[39], d[40], d[41], d[42], d[43], d[44], d[45], d[46], d[47], - d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]); - } -}; - -template <> struct initializer<32> -{ - template static R init(const Data & d) - { - return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15], - d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]); - } -}; - -template <> struct initializer<16> -{ - template static R init(const Data & d) - { - return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); - } -}; - -template <> struct initializer<8> -{ - template static R init(const Data & d) - { - return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]); - } -}; - -template <> struct initializer<4> -{ - template static R init(const Data & d) - { - return R(d[0], d[1], d[2], d[3]); - } -}; - -template <> struct initializer<2> -{ - template static R init(const Data & d) - { - return R(d[0], d[1]); - } -}; - -#else - +#if CV_SIMD_SCALABLE template <> struct initializer<128> { template static R init(const Data & d) @@ -146,6 +92,59 @@ template <> struct initializer<2> return v_load({d[0], d[1]}); } }; + +#else +template <> struct initializer<64> +{ + template static R init(const Data & d) + { + return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15], + d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31], + d[32], d[33], d[34], d[35], d[36], d[37], d[38], d[39], d[40], d[41], d[42], d[43], d[44], d[45], d[46], d[47], + d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]); + } +}; + +template <> struct initializer<32> +{ + template static R init(const Data & d) + { + return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15], + d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]); + } +}; + +template <> struct initializer<16> +{ + template static R init(const Data & d) + { + return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); + } +}; + +template <> struct initializer<8> +{ + template static R init(const Data & d) + { + return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]); + } +}; + +template <> struct initializer<4> +{ + template static R init(const Data & d) + { + return R(d[0], d[1], d[2], d[3]); + } +}; + +template <> struct initializer<2> +{ + template static R init(const Data & d) + { + return R(d[0], d[1]); + } +}; #endif //================================================================================================== @@ -1726,8 +1725,122 @@ template struct TheTest #endif }; +#if CV_SIMD_SCALABLE //Temporary +#define DUMP_ENTRY(type) printf("SIMD: %s\n", CV__TRACE_FUNCTION); + + +//============= 8-bit integer ===================================================================== + +void test_hal_intrin_uint8() +{ + DUMP_ENTRY(v_uint8); + // typedef v_uint8 R; + TheTest() + .test_loadstore() + .test_min_max() + ; +} + +void test_hal_intrin_int8() +{ + DUMP_ENTRY(v_int8); + // typedef v_int8 R; + TheTest() + .test_loadstore() + .test_min_max() + ; +} + +//============= 16-bit integer ===================================================================== + +void test_hal_intrin_uint16() +{ + DUMP_ENTRY(v_uint16); + // typedef v_uint16 R; + TheTest() + .test_loadstore() + .test_min_max() + ; +} + +void test_hal_intrin_int16() +{ + DUMP_ENTRY(v_int16); + // typedef v_int16 R; + TheTest() + .test_loadstore() + .test_min_max() + ; +} + +//============= 32-bit integer ===================================================================== + +void test_hal_intrin_uint32() +{ + DUMP_ENTRY(v_uint32); + // typedef v_uint32 R; + TheTest() + .test_loadstore() + .test_min_max() + ; +} + +void test_hal_intrin_int32() +{ + DUMP_ENTRY(v_int32); + // typedef v_int32 R; + TheTest() + .test_loadstore() + .test_min_max() + ; +} + +//============= 64-bit integer ===================================================================== + +void test_hal_intrin_uint64() +{ + DUMP_ENTRY(v_uint64); + // typedef v_uint64 R; + TheTest() + .test_loadstore() + ; +} + +void test_hal_intrin_int64() +{ + DUMP_ENTRY(v_int64); + // typedef v_int64 R; + TheTest() + .test_loadstore() + ; +} + +//============= Floating point ===================================================================== +void test_hal_intrin_float32() +{ + DUMP_ENTRY(v_float32); + // typedef v_float32 R; + TheTest() + .test_loadstore() + .test_min_max() + ; +} + +void test_hal_intrin_float64() +{ + DUMP_ENTRY(v_float64); +#if CV_SIMD_64F + // typedef v_float64 R; + TheTest() + .test_loadstore() + .test_min_max() + ; + +#endif +} + +#else -#if CV_SIMD #define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*(int)sizeof(v_uint8), CV__TRACE_FUNCTION); //============= 8-bit integer ===================================================================== @@ -2075,119 +2188,6 @@ void test_hal_intrin_float16() std::cout << "SKIP: CV_FP16 is not available" << std::endl; #endif } -#elif CV_SIMD_SCALABLE //Temporary -#define DUMP_ENTRY(type) printf("SIMD: %s\n", CV__TRACE_FUNCTION); - - -//============= 8-bit integer ===================================================================== - -void test_hal_intrin_uint8() -{ - DUMP_ENTRY(v_uint8); - // typedef v_uint8 R; - TheTest() - .test_loadstore() - .test_min_max() - ; -} - -void test_hal_intrin_int8() -{ - DUMP_ENTRY(v_int8); - // typedef v_int8 R; - TheTest() - .test_loadstore() - .test_min_max() - ; -} - -//============= 16-bit integer ===================================================================== - -void test_hal_intrin_uint16() -{ - DUMP_ENTRY(v_uint16); - // typedef v_uint16 R; - TheTest() - .test_loadstore() - .test_min_max() - ; -} - -void test_hal_intrin_int16() -{ - DUMP_ENTRY(v_int16); - // typedef v_int16 R; - TheTest() - .test_loadstore() - .test_min_max() - ; -} - -//============= 32-bit integer ===================================================================== - -void test_hal_intrin_uint32() -{ - DUMP_ENTRY(v_uint32); - // typedef v_uint32 R; - TheTest() - .test_loadstore() - .test_min_max() - ; -} - -void test_hal_intrin_int32() -{ - DUMP_ENTRY(v_int32); - // typedef v_int32 R; - TheTest() - .test_loadstore() - .test_min_max() - ; -} - -//============= 64-bit integer ===================================================================== - -void test_hal_intrin_uint64() -{ - DUMP_ENTRY(v_uint64); - // typedef v_uint64 R; - TheTest() - .test_loadstore() - ; -} - -void test_hal_intrin_int64() -{ - DUMP_ENTRY(v_int64); - // typedef v_int64 R; - TheTest() - .test_loadstore() - ; -} - -//============= Floating point ===================================================================== -void test_hal_intrin_float32() -{ - DUMP_ENTRY(v_float32); - // typedef v_float32 R; - TheTest() - .test_loadstore() - .test_min_max() - ; -} - -void test_hal_intrin_float64() -{ - DUMP_ENTRY(v_float64); -#if CV_SIMD_64F - // typedef v_float64 R; - TheTest() - .test_loadstore() - .test_min_max() - ; - -#endif -} #endif diff --git a/modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp b/modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp index c98c3d6549..4eb47c46b2 100644 --- a/modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp +++ b/modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp @@ -19,7 +19,7 @@ static void depthWiseBlock(const float *inptr, float *outptr, const float *weigh int dilation_y, int stride_x, int stride_y, int inner_xleft, int inner_xright, int inner_ytop, int inner_ybottom, bool ifMinMaxAct, bool useSIMD, bool is3x3) { -#ifdef CV_SIMD128 +#if CV_SIMD128 v_float32x4 vminval = v_setall_f32(minval), vmaxval = v_setall_f32(maxval); v_float32x4 w0 = v_setall_f32( @@ -44,7 +44,7 @@ static void depthWiseBlock(const float *inptr, float *outptr, const float *weigh int dy0 = 1; for (int y0 = 0; y0 < H0; y0 += dy0, outptr += W0 * dy0) { -#ifdef CV_SIMD128 +#if CV_SIMD128 dy0 = inner_ytop <= y0 && y0 + 3 < inner_ybottom && is3x3 && stride_y == 1 && dilation_y == 1 ? 3 : 1; #endif @@ -103,7 +103,7 @@ static void depthWiseBlock(const float *inptr, float *outptr, const float *weigh if (x0 == W0) break; x1 = inner_xright; -#ifdef CV_SIMD128 +#if CV_SIMD128 if (useSIMD) { if (is3x3)