mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
core:test Expand hal_intrin tests to support SIMD256
This commit is contained in:
parent
5336b9ad19
commit
6499263b41
@ -154,7 +154,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
||||
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
|
||||
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
|
||||
// available instruction set) will get vx_ prefix
|
||||
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v245_load())
|
||||
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
|
||||
#if CV_AVX2
|
||||
|
||||
#include "opencv2/core/hal/intrin_avx.hpp"
|
||||
@ -214,14 +214,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
|
||||
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
|
||||
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
|
||||
inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
|
||||
inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
|
||||
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
|
||||
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); }
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
|
||||
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
|
||||
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
|
||||
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
|
||||
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
|
||||
@ -316,7 +318,7 @@ template<typename _Tp> struct V_RegTraits
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
|
||||
inline void vx_cleanup() { v256_cleanup(); }
|
||||
#elif CV_SIMD128
|
||||
#elif CV_SIMD128 || CV_SIMD128_CPP
|
||||
typedef v_uint8x16 v_uint8;
|
||||
typedef v_int8x16 v_int8;
|
||||
typedef v_uint16x8 v_uint16;
|
||||
|
@ -407,6 +407,11 @@ inline v_float16x16 v256_load_f16(const short* ptr)
|
||||
inline v_float16x16 v256_load_f16_aligned(const short* ptr)
|
||||
{ return v_float16x16(_mm256_load_si256((const __m256i*)ptr)); }
|
||||
|
||||
inline v_float16x16 v256_load_f16_low(const short* ptr)
|
||||
{ return v_float16x16(v256_load_low(ptr).val); }
|
||||
inline v_float16x16 v256_load_f16_halves(const short* ptr0, const short* ptr1)
|
||||
{ return v_float16x16(v256_load_halves(ptr0, ptr1).val); }
|
||||
|
||||
inline void v_store(short* ptr, const v_float16x16& a)
|
||||
{ _mm256_storeu_si256((__m256i*)ptr, a.val); }
|
||||
inline void v_store_aligned(short* ptr, const v_float16x16& a)
|
||||
@ -819,94 +824,80 @@ OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd)
|
||||
template<int imm>
|
||||
inline v_uint8x32 v_rotate_left(const v_uint8x32& a, const v_uint8x32& b)
|
||||
{
|
||||
enum {IMM_R = (16 - imm) & 0xFF};
|
||||
enum {IMM_R2 = (32 - imm) & 0xFF};
|
||||
|
||||
if (imm == 0) return a;
|
||||
if (imm == 32) return b;
|
||||
if (imm > 32) return v_uint8x32();
|
||||
|
||||
__m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03);
|
||||
|
||||
switch(imm)
|
||||
{
|
||||
case 0: return a;
|
||||
case 32: return b;
|
||||
case 16: return v_uint8x32(swap);
|
||||
}
|
||||
|
||||
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, 16 - imm));
|
||||
if (imm < 32) return v_uint8x32(_mm256_alignr_epi8(swap, b.val, 32 - imm));
|
||||
|
||||
return v_uint8x32();
|
||||
if (imm == 16) return v_uint8x32(swap);
|
||||
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, IMM_R));
|
||||
return v_uint8x32(_mm256_alignr_epi8(swap, b.val, IMM_R2)); // imm < 32
|
||||
}
|
||||
|
||||
template<int imm>
|
||||
inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b)
|
||||
{
|
||||
enum {IMM_L = (imm - 16) & 0xFF};
|
||||
|
||||
if (imm == 0) return a;
|
||||
if (imm == 32) return b;
|
||||
if (imm > 32) return v_uint8x32();
|
||||
|
||||
__m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21);
|
||||
|
||||
switch(imm)
|
||||
{
|
||||
case 0: return a;
|
||||
case 32: return b;
|
||||
case 16: return v_uint8x32(swap);
|
||||
}
|
||||
|
||||
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm));
|
||||
if (imm < 32) return v_uint8x32(_mm256_alignr_epi8(b.val, swap, imm - 16));
|
||||
|
||||
return v_uint8x32();
|
||||
if (imm == 16) return v_uint8x32(swap);
|
||||
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm));
|
||||
return v_uint8x32(_mm256_alignr_epi8(b.val, swap, IMM_L));
|
||||
}
|
||||
|
||||
template<int imm>
|
||||
inline v_uint8x32 v_rotate_left(const v_uint8x32& a)
|
||||
{
|
||||
v_uint8x32 res;
|
||||
enum {IMM_L = (imm - 16) & 0xFF};
|
||||
enum {IMM_R = (16 - imm) & 0xFF};
|
||||
|
||||
if (imm == 0) return a;
|
||||
if (imm > 32) return v_uint8x32();
|
||||
|
||||
// ESAC control[3] ? [127:0] = 0
|
||||
__m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(0, 0, 2, 0));
|
||||
|
||||
if (imm == 0)
|
||||
return a;
|
||||
if (imm == 16)
|
||||
res.val = swapz;
|
||||
else if (imm < 16)
|
||||
res.val = _mm256_alignr_epi8(a.val, swapz, 16 - imm);
|
||||
else if (imm < 32)
|
||||
res.val = _mm256_slli_si256(swapz, imm - 16);
|
||||
else
|
||||
return v_uint8x32();
|
||||
return res;
|
||||
if (imm == 16) return v_uint8x32(swapz);
|
||||
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swapz, IMM_R));
|
||||
return v_uint8x32(_mm256_slli_si256(swapz, IMM_L));
|
||||
}
|
||||
|
||||
template<int imm>
|
||||
inline v_uint8x32 v_rotate_right(const v_uint8x32& a)
|
||||
{
|
||||
v_uint8x32 res;
|
||||
enum {IMM_L = (imm - 16) & 0xFF};
|
||||
|
||||
if (imm == 0) return a;
|
||||
if (imm > 32) return v_uint8x32();
|
||||
|
||||
// ESAC control[3] ? [127:0] = 0
|
||||
__m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(2, 0, 0, 1));
|
||||
|
||||
if (imm == 0)
|
||||
return a;
|
||||
if (imm == 16)
|
||||
res.val = swapz;
|
||||
else if (imm < 16)
|
||||
res.val = _mm256_alignr_epi8(swapz, a.val, imm);
|
||||
else if (imm < 32)
|
||||
res.val = _mm256_srli_si256(swapz, imm - 16);
|
||||
else
|
||||
return v_uint8x32();
|
||||
return res;
|
||||
if (imm == 16) return v_uint8x32(swapz);
|
||||
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swapz, a.val, imm));
|
||||
return v_uint8x32(_mm256_srli_si256(swapz, IMM_L));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \
|
||||
template<int imm> \
|
||||
inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
const int w = sizeof(typename _Tpvec::lane_type); \
|
||||
v_uint8x32 ret = intrin<imm*w>(v_reinterpret_as_u8(a), \
|
||||
v_reinterpret_as_u8(b)); \
|
||||
return _Tpvec(cast(ret.val)); \
|
||||
} \
|
||||
template<int imm> \
|
||||
inline _Tpvec intrin(const _Tpvec& a) \
|
||||
{ \
|
||||
const int w = sizeof(typename _Tpvec::lane_type); \
|
||||
v_uint8x32 ret = intrin<imm*w>(v_reinterpret_as_u8(a)); \
|
||||
return _Tpvec(cast(ret.val)); \
|
||||
#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \
|
||||
template<int imm> \
|
||||
inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \
|
||||
v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a), \
|
||||
v_reinterpret_as_u8(b)); \
|
||||
return _Tpvec(cast(ret.val)); \
|
||||
} \
|
||||
template<int imm> \
|
||||
inline _Tpvec intrin(const _Tpvec& a) \
|
||||
{ \
|
||||
enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \
|
||||
v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a)); \
|
||||
return _Tpvec(cast(ret.val)); \
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec) \
|
||||
|
@ -319,6 +319,9 @@ static inline void cv_vst1_f16(void* ptr, float16x4_t a)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef vdup_n_f16
|
||||
#define vdup_n_f16(v) (float16x4_t){v, v, v, v}
|
||||
#endif
|
||||
|
||||
struct v_float16x8
|
||||
{
|
||||
@ -889,6 +892,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
|
||||
inline v_float16x8 v_load_f16_aligned(const short* ptr)
|
||||
{ return v_float16x8(cv_vld1q_f16(ptr)); }
|
||||
|
||||
inline v_float16x8 v_load_f16_low(const short* ptr)
|
||||
{ return v_float16x8(vcombine_f16(cv_vld1_f16(ptr), vdup_n_f16((float16_t)0))); }
|
||||
inline v_float16x8 v_load_f16_halves(const short* ptr0, const short* ptr1)
|
||||
{ return v_float16x8(vcombine_f16(cv_vld1_f16(ptr0), cv_vld1_f16(ptr1))); }
|
||||
|
||||
inline void v_store(short* ptr, const v_float16x8& a)
|
||||
{ cv_vst1q_f16(ptr, a.val); }
|
||||
inline void v_store_aligned(short* ptr, const v_float16x8& a)
|
||||
|
@ -1308,6 +1308,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
|
||||
inline v_float16x8 v_load_f16_aligned(const short* ptr)
|
||||
{ return v_float16x8(_mm_load_si128((const __m128i*)ptr)); }
|
||||
|
||||
inline v_float16x8 v_load_f16_low(const short* ptr)
|
||||
{ return v_float16x8(v_load_low(ptr).val); }
|
||||
inline v_float16x8 v_load_f16_halves(const short* ptr0, const short* ptr1)
|
||||
{ return v_float16x8(v_load_halves(ptr0, ptr1).val); }
|
||||
|
||||
inline void v_store(short* ptr, const v_float16x8& a)
|
||||
{ _mm_storeu_si128((__m128i*)ptr, a.val); }
|
||||
inline void v_store_aligned(short* ptr, const v_float16x8& a)
|
||||
|
5
modules/core/test/test_intrin.avx2.cpp
Normal file
5
modules/core/test/test_intrin.avx2.cpp
Normal file
@ -0,0 +1,5 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "test_intrin.simd.hpp"
|
@ -2,249 +2,101 @@
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "test_intrin.simd.hpp"
|
||||
|
||||
#include "test_intrin_utils.hpp"
|
||||
|
||||
#define CV_CPU_SIMD_FILENAME "test_intrin_utils.hpp"
|
||||
#define CV_CPU_SIMD_FILENAME "test_intrin.simd.hpp"
|
||||
#define CV_CPU_DISPATCH_MODE FP16
|
||||
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
||||
|
||||
|
||||
using namespace cv;
|
||||
#define CV_CPU_DISPATCH_MODE AVX2
|
||||
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
||||
|
||||
namespace opencv_test { namespace hal {
|
||||
using namespace CV_CPU_OPTIMIZATION_NAMESPACE;
|
||||
|
||||
//============= 8-bit integer =====================================================================
|
||||
TEST(hal_intrin, uint8x16)
|
||||
{ test_hal_intrin_uint8(); }
|
||||
|
||||
TEST(hal_intrin, uint8x16) {
|
||||
TheTest<v_uint8x16>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||
;
|
||||
}
|
||||
TEST(hal_intrin, int8x16)
|
||||
{ test_hal_intrin_int8(); }
|
||||
|
||||
TEST(hal_intrin, int8x16) {
|
||||
TheTest<v_int8x16>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_abs()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||
;
|
||||
}
|
||||
TEST(hal_intrin, uint16x8)
|
||||
{ test_hal_intrin_uint16(); }
|
||||
|
||||
//============= 16-bit integer =====================================================================
|
||||
TEST(hal_intrin, int16x8)
|
||||
{ test_hal_intrin_int16(); }
|
||||
|
||||
TEST(hal_intrin, uint16x8) {
|
||||
TheTest<v_uint16x8>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||
;
|
||||
}
|
||||
TEST(hal_intrin, int32x4)
|
||||
{ test_hal_intrin_int32(); }
|
||||
|
||||
TEST(hal_intrin, int16x8) {
|
||||
TheTest<v_int16x8>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_dot_prod()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_abs()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||
;
|
||||
}
|
||||
TEST(hal_intrin, uint32x4)
|
||||
{ test_hal_intrin_uint32(); }
|
||||
|
||||
//============= 32-bit integer =====================================================================
|
||||
TEST(hal_intrin, uint64x2)
|
||||
{ test_hal_intrin_uint64(); }
|
||||
|
||||
TEST(hal_intrin, uint32x4) {
|
||||
TheTest<v_uint32x4>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
.test_transpose()
|
||||
;
|
||||
}
|
||||
TEST(hal_intrin, int64x2)
|
||||
{ test_hal_intrin_int64(); }
|
||||
|
||||
TEST(hal_intrin, int32x4) {
|
||||
TheTest<v_int32x4>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_abs()
|
||||
.test_cmp()
|
||||
.test_popcount()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
.test_float_cvt32()
|
||||
.test_float_cvt64()
|
||||
.test_transpose()
|
||||
;
|
||||
}
|
||||
TEST(hal_intrin, float32x4)
|
||||
{ test_hal_intrin_float32(); }
|
||||
|
||||
//============= 64-bit integer =====================================================================
|
||||
TEST(hal_intrin, float64x2)
|
||||
{ test_hal_intrin_float64(); }
|
||||
|
||||
TEST(hal_intrin, uint64x2) {
|
||||
TheTest<v_uint64x2>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
|
||||
TEST(hal_intrin, int64x2) {
|
||||
TheTest<v_int64x2>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= Floating point =====================================================================
|
||||
|
||||
TEST(hal_intrin, float32x4) {
|
||||
TheTest<v_float32x4>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_interleave_2channel()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_div()
|
||||
.test_cmp()
|
||||
.test_sqrt_abs()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
.test_float_cvt64()
|
||||
.test_matmul()
|
||||
.test_transpose()
|
||||
.test_reduce_sum4()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
;
|
||||
}
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
TEST(hal_intrin, float64x2) {
|
||||
TheTest<v_float64x2>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_div()
|
||||
.test_cmp()
|
||||
.test_sqrt_abs()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
.test_float_cvt32()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(hal_intrin,float16)
|
||||
TEST(hal_intrin, float16x8)
|
||||
{
|
||||
CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
|
||||
throw SkipTestException("Unsupported hardware: FP16 is not available");
|
||||
}
|
||||
|
||||
}}
|
||||
#define DISPATCH_SIMD_MODES AVX2
|
||||
#define DISPATCH_SIMD_NAME "SIMD256"
|
||||
#define DISPATCH_SIMD(fun) \
|
||||
do { \
|
||||
CV_CPU_DISPATCH(fun, (), DISPATCH_SIMD_MODES); \
|
||||
throw SkipTestException( \
|
||||
"Unsupported hardware: " \
|
||||
DISPATCH_SIMD_NAME \
|
||||
" is not available" \
|
||||
); \
|
||||
} while(0)
|
||||
|
||||
TEST(hal_intrin256, uint8x32)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_uint8); }
|
||||
|
||||
TEST(hal_intrin256, int8x32)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_int8); }
|
||||
|
||||
TEST(hal_intrin256, uint16x16)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_uint16); }
|
||||
|
||||
TEST(hal_intrin256, int16x16)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_int16); }
|
||||
|
||||
TEST(hal_intrin256, uint32x8)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_uint32); }
|
||||
|
||||
TEST(hal_intrin256, int32x8)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_int32); }
|
||||
|
||||
TEST(hal_intrin256, uint64x4)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_uint64); }
|
||||
|
||||
TEST(hal_intrin256, int64x4)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_int64); }
|
||||
|
||||
TEST(hal_intrin256, float32x8)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_float32); }
|
||||
|
||||
TEST(hal_intrin256, float64x4)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_float64); }
|
||||
|
||||
TEST(hal_intrin256, float16x16)
|
||||
{
|
||||
if (!CV_CPU_HAS_SUPPORT_FP16)
|
||||
throw SkipTestException("Unsupported hardware: FP16 is not available");
|
||||
DISPATCH_SIMD(test_hal_intrin_float16);
|
||||
}
|
||||
|
||||
}} // namespace
|
296
modules/core/test/test_intrin.simd.hpp
Normal file
296
modules/core/test/test_intrin.simd.hpp
Normal file
@ -0,0 +1,296 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "test_intrin_utils.hpp"
|
||||
|
||||
namespace opencv_test { namespace hal {
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
|
||||
void test_hal_intrin_uint8();
|
||||
void test_hal_intrin_int8();
|
||||
void test_hal_intrin_uint16();
|
||||
void test_hal_intrin_int16();
|
||||
void test_hal_intrin_uint32();
|
||||
void test_hal_intrin_int32();
|
||||
void test_hal_intrin_uint64();
|
||||
void test_hal_intrin_int64();
|
||||
void test_hal_intrin_float32();
|
||||
void test_hal_intrin_float64();
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
//============= 8-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint8()
|
||||
{
|
||||
TheTest<v_uint8>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||
;
|
||||
|
||||
#if CV_SIMD256
|
||||
TheTest<v_uint8>()
|
||||
.test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>()
|
||||
.test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>()
|
||||
.test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>()
|
||||
.test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>()
|
||||
;
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_hal_intrin_int8()
|
||||
{
|
||||
TheTest<v_int8>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_abs()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 16-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint16()
|
||||
{
|
||||
TheTest<v_uint16>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int16()
|
||||
{
|
||||
TheTest<v_int16>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_dot_prod()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_abs()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 32-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint32()
|
||||
{
|
||||
TheTest<v_uint32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
.test_transpose()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int32()
|
||||
{
|
||||
TheTest<v_int32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_abs()
|
||||
.test_cmp()
|
||||
.test_popcount()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
.test_float_cvt32()
|
||||
.test_float_cvt64()
|
||||
.test_transpose()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 64-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint64()
|
||||
{
|
||||
TheTest<v_uint64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int64()
|
||||
{
|
||||
TheTest<v_int64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= Floating point =====================================================================
|
||||
void test_hal_intrin_float32()
|
||||
{
|
||||
TheTest<v_float32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_interleave_2channel()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_div()
|
||||
.test_cmp()
|
||||
.test_sqrt_abs()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
.test_float_cvt64()
|
||||
.test_matmul()
|
||||
.test_transpose()
|
||||
.test_reduce_sum4()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
;
|
||||
|
||||
#if CV_SIMD256
|
||||
TheTest<v_float32>()
|
||||
.test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
|
||||
.test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
|
||||
;
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_hal_intrin_float64()
|
||||
{
|
||||
#if CV_SIMD_64F
|
||||
TheTest<v_float64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_div()
|
||||
.test_cmp()
|
||||
.test_sqrt_abs()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
.test_float_cvt32()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
|
||||
#if CV_SIMD256
|
||||
TheTest<v_float64>()
|
||||
.test_extract<2>().test_extract<3>()
|
||||
.test_rotate<2>().test_rotate<3>()
|
||||
;
|
||||
#endif //CV_SIMD256
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CV_FP16 && CV_SIMD_WIDTH > 16
|
||||
void test_hal_intrin_float16()
|
||||
{
|
||||
TheTest<v_float16>()
|
||||
.test_loadstore_fp16()
|
||||
.test_float_cvt_fp16()
|
||||
;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
|
||||
}} //namespace
|
@ -13,6 +13,27 @@ void test_hal_intrin_float16();
|
||||
template <typename R> struct Data;
|
||||
template <int N> struct initializer;
|
||||
|
||||
template <> struct initializer<64>
|
||||
{
|
||||
template <typename R> static R init(const Data<R> & d)
|
||||
{
|
||||
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15],
|
||||
d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31],
|
||||
d[32], d[33], d[34], d[35], d[36], d[37], d[38], d[39], d[40], d[41], d[42], d[43], d[44], d[45], d[46], d[47],
|
||||
d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[50], d[51], d[52], d[53],
|
||||
d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct initializer<32>
|
||||
{
|
||||
template <typename R> static R init(const Data<R> & d)
|
||||
{
|
||||
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15],
|
||||
d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct initializer<16>
|
||||
{
|
||||
template <typename R> static R init(const Data<R> & d)
|
||||
@ -125,6 +146,17 @@ template <typename R> struct Data
|
||||
{
|
||||
return d + R::nlanes / 2;
|
||||
}
|
||||
LaneType sum(int s, int c)
|
||||
{
|
||||
LaneType res = 0;
|
||||
for (int i = s; i < s + c; ++i)
|
||||
res += d[i];
|
||||
return res;
|
||||
}
|
||||
LaneType sum()
|
||||
{
|
||||
return sum(0, R::nlanes);
|
||||
}
|
||||
bool operator==(const Data<R> & other) const
|
||||
{
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
@ -147,13 +179,12 @@ template <typename R> struct Data
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
LaneType d[R::nlanes];
|
||||
};
|
||||
|
||||
template<typename R> struct AlignedData
|
||||
{
|
||||
Data<R> CV_DECL_ALIGNED(16) a; // aligned
|
||||
Data<R> CV_DECL_ALIGNED(CV_SIMD_WIDTH) a; // aligned
|
||||
char dummy;
|
||||
Data<R> u; // unaligned
|
||||
};
|
||||
@ -207,22 +238,22 @@ template<typename R> struct TheTest
|
||||
AlignedData<R> out;
|
||||
|
||||
// check if addresses are aligned and unaligned respectively
|
||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
|
||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
|
||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
|
||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
|
||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH);
|
||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH);
|
||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH);
|
||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH);
|
||||
|
||||
// check some initialization methods
|
||||
R r1 = data.a;
|
||||
R r2 = v_load(data.u.d);
|
||||
R r3 = v_load_aligned(data.a.d);
|
||||
R r2 = vx_load(data.u.d);
|
||||
R r3 = vx_load_aligned(data.a.d);
|
||||
R r4(r2);
|
||||
EXPECT_EQ(data.a[0], r1.get0());
|
||||
EXPECT_EQ(data.u[0], r2.get0());
|
||||
EXPECT_EQ(data.a[0], r3.get0());
|
||||
EXPECT_EQ(data.u[0], r4.get0());
|
||||
|
||||
R r_low = v_load_low((LaneType*)data.u.d);
|
||||
R r_low = vx_load_low((LaneType*)data.u.d);
|
||||
EXPECT_EQ(data.u[0], r_low.get0());
|
||||
v_store(out.u.d, r_low);
|
||||
for (int i = 0; i < R::nlanes/2; ++i)
|
||||
@ -230,7 +261,7 @@ template<typename R> struct TheTest
|
||||
EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]);
|
||||
}
|
||||
|
||||
R r_low_align8byte = v_load_low((LaneType*)((char*)data.u.d + 8));
|
||||
R r_low_align8byte = vx_load_low((LaneType*)((char*)data.u.d + (CV_SIMD_WIDTH / 2)));
|
||||
EXPECT_EQ(data.u[R::nlanes/2], r_low_align8byte.get0());
|
||||
v_store(out.u.d, r_low_align8byte);
|
||||
for (int i = 0; i < R::nlanes/2; ++i)
|
||||
@ -255,7 +286,7 @@ template<typename R> struct TheTest
|
||||
|
||||
// check halves load correctness
|
||||
res.clear();
|
||||
R r6 = v_load_halves(d.d, d.mid());
|
||||
R r6 = vx_load_halves(d.d, d.mid());
|
||||
v_store(res.d, r6);
|
||||
EXPECT_EQ(d, res);
|
||||
|
||||
@ -270,17 +301,17 @@ template<typename R> struct TheTest
|
||||
}
|
||||
|
||||
// reinterpret_as
|
||||
v_uint8x16 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a);
|
||||
v_int8x16 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a);
|
||||
v_uint16x8 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a);
|
||||
v_int16x8 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a);
|
||||
v_uint32x4 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a);
|
||||
v_int32x4 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a);
|
||||
v_uint64x2 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
|
||||
v_int64x2 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
|
||||
v_float32x4 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
|
||||
#if CV_SIMD128_64F
|
||||
v_float64x2 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
|
||||
v_uint8 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a);
|
||||
v_int8 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a);
|
||||
v_uint16 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a);
|
||||
v_int16 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a);
|
||||
v_uint32 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a);
|
||||
v_int32 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a);
|
||||
v_uint64 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
|
||||
v_int64 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
|
||||
v_float32 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
|
||||
#if CV_SIMD_64F
|
||||
v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
|
||||
#endif
|
||||
|
||||
return *this;
|
||||
@ -357,7 +388,7 @@ template<typename R> struct TheTest
|
||||
Data<R> dataA;
|
||||
R a = dataA;
|
||||
|
||||
Data<Rx2> resB = v_load_expand(dataA.d);
|
||||
Data<Rx2> resB = vx_load_expand(dataA.d);
|
||||
|
||||
Rx2 c, d;
|
||||
v_expand(a, c, d);
|
||||
@ -378,7 +409,7 @@ template<typename R> struct TheTest
|
||||
{
|
||||
typedef typename V_RegTraits<R>::q_reg Rx4;
|
||||
Data<R> data;
|
||||
Data<Rx4> out = v_load_expand_q(data.d);
|
||||
Data<Rx4> out = vx_load_expand_q(data.d);
|
||||
const int n = Rx4::nlanes;
|
||||
for (int i = 0; i < n; ++i)
|
||||
EXPECT_EQ(data[i], out[i]);
|
||||
@ -610,7 +641,13 @@ template<typename R> struct TheTest
|
||||
|
||||
TheTest & test_popcount()
|
||||
{
|
||||
static unsigned popcountTable[] = {0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33};
|
||||
static unsigned popcountTable[] = {
|
||||
0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33,
|
||||
35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81,
|
||||
83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123,
|
||||
128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172,
|
||||
176, 181, 186, 192, 193
|
||||
};
|
||||
Data<R> dataA;
|
||||
R a = dataA;
|
||||
|
||||
@ -918,7 +955,7 @@ template<typename R> struct TheTest
|
||||
|
||||
TheTest & test_float_cvt32()
|
||||
{
|
||||
typedef v_float32x4 Rt;
|
||||
typedef v_float32 Rt;
|
||||
Data<R> dataA;
|
||||
dataA *= 1.1;
|
||||
R a = dataA;
|
||||
@ -934,8 +971,8 @@ template<typename R> struct TheTest
|
||||
|
||||
TheTest & test_float_cvt64()
|
||||
{
|
||||
#if CV_SIMD128_64F
|
||||
typedef v_float64x2 Rt;
|
||||
#if CV_SIMD_64F
|
||||
typedef v_float64 Rt;
|
||||
Data<R> dataA;
|
||||
dataA *= 1.1;
|
||||
R a = dataA;
|
||||
@ -965,23 +1002,29 @@ template<typename R> struct TheTest
|
||||
R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD;
|
||||
|
||||
Data<R> res = v_matmul(v, a, b, c, d);
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
for (int i = 0; i < R::nlanes; i += 4)
|
||||
{
|
||||
LaneType val = dataV[0] * dataA[i]
|
||||
+ dataV[1] * dataB[i]
|
||||
+ dataV[2] * dataC[i]
|
||||
+ dataV[3] * dataD[i];
|
||||
EXPECT_DOUBLE_EQ(val, res[i]);
|
||||
for (int j = i; j < i + 4; ++j)
|
||||
{
|
||||
LaneType val = dataV[i] * dataA[j]
|
||||
+ dataV[i + 1] * dataB[j]
|
||||
+ dataV[i + 2] * dataC[j]
|
||||
+ dataV[i + 3] * dataD[j];
|
||||
EXPECT_COMPARE_EQ(val, res[j]);
|
||||
}
|
||||
}
|
||||
|
||||
Data<R> resAdd = v_matmuladd(v, a, b, c, d);
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
for (int i = 0; i < R::nlanes; i += 4)
|
||||
{
|
||||
LaneType val = dataV[0] * dataA[i]
|
||||
+ dataV[1] * dataB[i]
|
||||
+ dataV[2] * dataC[i]
|
||||
+ dataD[i];
|
||||
EXPECT_DOUBLE_EQ(val, resAdd[i]);
|
||||
for (int j = i; j < i + 4; ++j)
|
||||
{
|
||||
LaneType val = dataV[i] * dataA[j]
|
||||
+ dataV[i + 1] * dataB[j]
|
||||
+ dataV[i + 2] * dataC[j]
|
||||
+ dataD[j];
|
||||
EXPECT_COMPARE_EQ(val, resAdd[j]);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@ -998,30 +1041,36 @@ template<typename R> struct TheTest
|
||||
e, f, g, h);
|
||||
|
||||
Data<R> res[4] = {e, f, g, h};
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
for (int i = 0; i < R::nlanes; i += 4)
|
||||
{
|
||||
EXPECT_EQ(dataA[i], res[i][0]);
|
||||
EXPECT_EQ(dataB[i], res[i][1]);
|
||||
EXPECT_EQ(dataC[i], res[i][2]);
|
||||
EXPECT_EQ(dataD[i], res[i][3]);
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
EXPECT_EQ(dataA[i + j], res[j][i]);
|
||||
EXPECT_EQ(dataB[i + j], res[j][i + 1]);
|
||||
EXPECT_EQ(dataC[i + j], res[j][i + 2]);
|
||||
EXPECT_EQ(dataD[i + j], res[j][i + 3]);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_reduce_sum4()
|
||||
{
|
||||
R a(0.1f, 0.02f, 0.003f, 0.0004f);
|
||||
R b(1, 20, 300, 4000);
|
||||
R c(10, 2, 0.3f, 0.04f);
|
||||
R d(1, 2, 3, 4);
|
||||
Data<R> dataA, dataB, dataC, dataD;
|
||||
dataB *= 0.01f;
|
||||
dataC *= 0.001f;
|
||||
dataD *= 0.002f;
|
||||
|
||||
R sum = v_reduce_sum4(a, b, c, d);
|
||||
R a = dataA, b = dataB, c = dataC, d = dataD;
|
||||
Data<R> res = v_reduce_sum4(a, b, c, d);
|
||||
|
||||
Data<R> res = sum;
|
||||
EXPECT_EQ(0.1234f, res[0]);
|
||||
EXPECT_EQ(4321.0f, res[1]);
|
||||
EXPECT_EQ(12.34f, res[2]);
|
||||
EXPECT_EQ(10.0f, res[3]);
|
||||
for (int i = 0; i < R::nlanes; i += 4)
|
||||
{
|
||||
EXPECT_COMPARE_EQ(dataA.sum(i, 4), res[i]);
|
||||
EXPECT_COMPARE_EQ(dataB.sum(i, 4), res[i + 1]);
|
||||
EXPECT_COMPARE_EQ(dataC.sum(i, 4), res[i + 2]);
|
||||
EXPECT_COMPARE_EQ(dataD.sum(i, 4), res[i + 3]);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -1032,14 +1081,14 @@ template<typename R> struct TheTest
|
||||
AlignedData<R> out;
|
||||
|
||||
// check if addresses are aligned and unaligned respectively
|
||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
|
||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
|
||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
|
||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
|
||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH);
|
||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH);
|
||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH);
|
||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH);
|
||||
|
||||
// check some initialization methods
|
||||
R r1 = data.u;
|
||||
R r2 = v_load_f16(data.a.d);
|
||||
R r2 = vx_load_f16(data.a.d);
|
||||
R r3(r2);
|
||||
EXPECT_EQ(data.u[0], r1.get0());
|
||||
EXPECT_EQ(data.a[0], r2.get0());
|
||||
|
Loading…
Reference in New Issue
Block a user