mirror of
https://github.com/opencv/opencv.git
synced 2025-07-20 19:17:36 +08:00
Merge pull request #26750 from mshabunin:fix-ppc64-vsx
core: fixed VSX intrinsics implementation
This commit is contained in:
commit
bb79493a89
@ -262,7 +262,7 @@ OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float64x2, double)
|
|||||||
inline _Tpvec v_setzero_##suffix() { return _Tpvec(vec_splats((_Tp)0)); } \
|
inline _Tpvec v_setzero_##suffix() { return _Tpvec(vec_splats((_Tp)0)); } \
|
||||||
inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \
|
inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \
|
||||||
template <> inline _Tpvec v_setzero_() { return v_setzero_##suffix(); } \
|
template <> inline _Tpvec v_setzero_() { return v_setzero_##suffix(); } \
|
||||||
template <> inline _Tpvec v_setall_(_Tp v) { return v_setall_##suffix(_Tp v); } \
|
template <> inline _Tpvec v_setall_(_Tp v) { return v_setall_##suffix(v); } \
|
||||||
template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \
|
template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \
|
||||||
{ return _Tpvec((cast)a.val); }
|
{ return _Tpvec((cast)a.val); }
|
||||||
|
|
||||||
@ -650,11 +650,11 @@ OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c)
|
|||||||
#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec) \
|
#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec) \
|
||||||
inline _Tpvec v_eq(const _Tpvec& a, const _Tpvec& b) \
|
inline _Tpvec v_eq(const _Tpvec& a, const _Tpvec& b) \
|
||||||
{ return _Tpvec(vec_cmpeq(a.val, b.val)); } \
|
{ return _Tpvec(vec_cmpeq(a.val, b.val)); } \
|
||||||
inline _Tpvec V_ne(const _Tpvec& a, const _Tpvec& b) \
|
inline _Tpvec v_ne(const _Tpvec& a, const _Tpvec& b) \
|
||||||
{ return _Tpvec(vec_cmpne(a.val, b.val)); } \
|
{ return _Tpvec(vec_cmpne(a.val, b.val)); } \
|
||||||
inline _Tpvec v_lt(const _Tpvec& a, const _Tpvec& b) \
|
inline _Tpvec v_lt(const _Tpvec& a, const _Tpvec& b) \
|
||||||
{ return _Tpvec(vec_cmplt(a.val, b.val)); } \
|
{ return _Tpvec(vec_cmplt(a.val, b.val)); } \
|
||||||
inline _Tpvec V_gt(const _Tpvec& a, const _Tpvec& b) \
|
inline _Tpvec v_gt(const _Tpvec& a, const _Tpvec& b) \
|
||||||
{ return _Tpvec(vec_cmpgt(a.val, b.val)); } \
|
{ return _Tpvec(vec_cmpgt(a.val, b.val)); } \
|
||||||
inline _Tpvec v_le(const _Tpvec& a, const _Tpvec& b) \
|
inline _Tpvec v_le(const _Tpvec& a, const _Tpvec& b) \
|
||||||
{ return _Tpvec(vec_cmple(a.val, b.val)); } \
|
{ return _Tpvec(vec_cmple(a.val, b.val)); } \
|
||||||
@ -1507,7 +1507,7 @@ inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, cons
|
|||||||
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
|
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
|
||||||
{ return v_dotprod(a, b); }
|
{ return v_dotprod(a, b); }
|
||||||
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||||
{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)) + c; }
|
{ return v_add(v_int32x4(vec_msum(a.val, b.val, vec_int4_z)), c); }
|
||||||
// 32 >> 64
|
// 32 >> 64
|
||||||
inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
|
inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
|
||||||
{ return v_dotprod(a, b); }
|
{ return v_dotprod(a, b); }
|
||||||
@ -1518,7 +1518,7 @@ inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_
|
|||||||
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
|
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
|
||||||
{ return v_dotprod_expand(a, b); }
|
{ return v_dotprod_expand(a, b); }
|
||||||
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
|
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
|
||||||
{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)) + c; }
|
{ return v_add(v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)), c); }
|
||||||
|
|
||||||
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
|
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user