Merge pull request #15839 from alalek:core_simd_v_setall_template

This commit is contained in:
Alexander Alekhin 2019-11-27 19:19:34 +00:00
commit 70146700aa
4 changed files with 188 additions and 5 deletions

View File

@ -22,6 +22,8 @@
<skip_headers>
opencv2/core/hal/intrin*
opencv2/core/hal/*macros.*
opencv2/core/hal/*.impl.*
opencv2/core/cuda*
opencv2/core/opencl*
opencv2/core/private*

View File

@ -457,10 +457,6 @@ namespace CV__SIMD_NAMESPACE {
using namespace CV__SIMD_NAMESPACE;
#endif
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
#ifndef CV_SIMD_64F
#define CV_SIMD_64F 0
#endif
@ -469,11 +465,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
#endif
#ifndef CV_SIMD
#define CV_SIMD 0
#endif
#include "simd_utils.impl.hpp"
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
} // cv::
//! @endcond

View File

@ -0,0 +1,146 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This header is not standalone. Don't include directly, use "intrin.hpp" instead.
#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp
#if CV_SIMD128 || CV_SIMD128_CPP
template<typename _T> struct Type2Vec128_Traits;
#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \
template<> struct Type2Vec128_Traits<type_> \
{ \
typedef vec_type_ vec_type; \
}
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2);
#if CV_SIMD128_64F
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2);
#endif
template<typename _T> static inline
typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a);
template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); }
template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); }
template<> inline Type2Vec128_Traits<ushort>::vec_type v_setall<ushort>(const ushort& a) { return v_setall_u16(a); }
template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); }
template<> inline Type2Vec128_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); }
template<> inline Type2Vec128_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); }
template<> inline Type2Vec128_Traits<uint64>::vec_type v_setall<uint64>(const uint64& a) { return v_setall_u64(a); }
template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); }
template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); }
#if CV_SIMD128_64F
template<> inline Type2Vec128_Traits<double>::vec_type v_setall<double>(const double& a) { return v_setall_f64(a); }
#endif
#endif // SIMD128
#if CV_SIMD256
template<typename _T> struct Type2Vec256_Traits;
#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \
template<> struct Type2Vec256_Traits<type_> \
{ \
typedef vec_type_ vec_type; \
}
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4);
#if CV_SIMD256_64F
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4);
#endif
template<typename _T> static inline
typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a);
template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const uchar& a) { return v256_setall_u8(a); }
template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const schar& a) { return v256_setall_s8(a); }
template<> inline Type2Vec256_Traits<ushort>::vec_type v256_setall<ushort>(const ushort& a) { return v256_setall_u16(a); }
template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const short& a) { return v256_setall_s16(a); }
template<> inline Type2Vec256_Traits< uint>::vec_type v256_setall< uint>(const uint& a) { return v256_setall_u32(a); }
template<> inline Type2Vec256_Traits< int>::vec_type v256_setall< int>(const int& a) { return v256_setall_s32(a); }
template<> inline Type2Vec256_Traits<uint64>::vec_type v256_setall<uint64>(const uint64& a) { return v256_setall_u64(a); }
template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const int64& a) { return v256_setall_s64(a); }
template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const float& a) { return v256_setall_f32(a); }
#if CV_SIMD256_64F
template<> inline Type2Vec256_Traits<double>::vec_type v256_setall<double>(const double& a) { return v256_setall_f64(a); }
#endif
#endif // SIMD256
#if CV_SIMD512
template<typename _T> struct Type2Vec512_Traits;
#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \
template<> struct Type2Vec512_Traits<type_> \
{ \
typedef vec_type_ vec_type; \
}
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8);
#if CV_SIMD512_64F
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8);
#endif
template<typename _T> static inline
typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a);
template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const uchar& a) { return v512_setall_u8(a); }
template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const schar& a) { return v512_setall_s8(a); }
template<> inline Type2Vec512_Traits<ushort>::vec_type v512_setall<ushort>(const ushort& a) { return v512_setall_u16(a); }
template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const short& a) { return v512_setall_s16(a); }
template<> inline Type2Vec512_Traits< uint>::vec_type v512_setall< uint>(const uint& a) { return v512_setall_u32(a); }
template<> inline Type2Vec512_Traits< int>::vec_type v512_setall< int>(const int& a) { return v512_setall_s32(a); }
template<> inline Type2Vec512_Traits<uint64>::vec_type v512_setall<uint64>(const uint64& a) { return v512_setall_u64(a); }
template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const int64& a) { return v512_setall_s64(a); }
template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const float& a) { return v512_setall_f32(a); }
#if CV_SIMD512_64F
template<> inline Type2Vec512_Traits<double>::vec_type v512_setall<double>(const double& a) { return v512_setall_f64(a); }
#endif
#endif // SIMD512
#if CV_SIMD_WIDTH == 16
template<typename _T> static inline
typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); }
#elif CV_SIMD_WIDTH == 32
template<typename _T> static inline
typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); }
#elif CV_SIMD_WIDTH == 64
template<typename _T> static inline
typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); }
#else
#error "Build configuration error, unsupported CV_SIMD_WIDTH"
#endif
#endif // OPENCV_HAL_INTRIN_HPP

View File

@ -336,6 +336,40 @@ template<typename R> struct TheTest
v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
#endif
#if CV_SIMD_WIDTH == 16
R setall_res1 = v_setall((LaneType)5);
R setall_res2 = v_setall<LaneType>(6);
#elif CV_SIMD_WIDTH == 32
R setall_res1 = v256_setall((LaneType)5);
R setall_res2 = v256_setall<LaneType>(6);
#elif CV_SIMD_WIDTH == 64
R setall_res1 = v512_setall((LaneType)5);
R setall_res2 = v512_setall<LaneType>(6);
#else
#error "Configuration error"
#endif
#if CV_SIMD_WIDTH > 0
Data<R> setall_res1_; v_store(setall_res1_.d, setall_res1);
Data<R> setall_res2_; v_store(setall_res2_.d, setall_res2);
for (int i = 0; i < R::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ((LaneType)5, setall_res1_[i]);
EXPECT_EQ((LaneType)6, setall_res2_[i]);
}
#endif
R vx_setall_res1 = vx_setall((LaneType)11);
R vx_setall_res2 = vx_setall<LaneType>(12);
Data<R> vx_setall_res1_; v_store(vx_setall_res1_.d, vx_setall_res1);
Data<R> vx_setall_res2_; v_store(vx_setall_res2_.d, vx_setall_res2);
for (int i = 0; i < R::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ((LaneType)11, vx_setall_res1_[i]);
EXPECT_EQ((LaneType)12, vx_setall_res2_[i]);
}
return *this;
}