mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 21:20:18 +08:00
core(intrin): v_load_low() workaround for aarch64+clang
This commit is contained in:
parent
e8ff5cac1d
commit
e8a703a71d
@ -875,13 +875,27 @@ OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64)
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) && defined(__aarch64__)
|
||||
// avoid LD2 instruction. details: https://github.com/opencv/opencv/issues/14863
|
||||
#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ \
|
||||
typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \
|
||||
uint64 v = *(unaligned_uint64*)ptr; \
|
||||
return _Tpvec(v_reinterpret_as_##suffix(v_uint64x2(v, (uint64)123456))); \
|
||||
}
|
||||
#else
|
||||
#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); }
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load(const _Tp* ptr) \
|
||||
{ return _Tpvec(vld1q_##suffix(ptr)); } \
|
||||
inline _Tpvec v_load_aligned(const _Tp* ptr) \
|
||||
{ return _Tpvec(vld1q_##suffix(ptr)); } \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } \
|
||||
OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
|
||||
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
|
||||
inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
||||
|
Loading…
Reference in New Issue
Block a user