mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
hal: vsx: further optimize v_signmask
Use the quadword bit permutation instruction to creatively move the sign bits to create the mask. Note that values above 127 will result in 0.
This commit is contained in:
parent
7295983964
commit
1031b7f4bc
@ -845,36 +845,24 @@ inline v_uint64x2 v_popcount(const v_int64x2& a)
|
||||
/** Mask **/
|
||||
inline int v_signmask(const v_uint8x16& a)
|
||||
{
|
||||
vec_uchar16 sv = vec_sr(a.val, vec_uchar16_sp(7));
|
||||
static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
|
||||
sv = vec_sl(sv, slm);
|
||||
vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);
|
||||
static const vec_uint4 slm4 = {0, 0, 8, 8};
|
||||
sv4 = vec_sl(sv4, slm4);
|
||||
return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);
|
||||
static const vec_uchar16 qperm = {120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0};
|
||||
return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
|
||||
}
|
||||
inline int v_signmask(const v_int8x16& a)
|
||||
{ return v_signmask(v_reinterpret_as_u8(a)); }
|
||||
|
||||
inline int v_signmask(const v_int16x8& a)
|
||||
{
|
||||
static const vec_ushort8 slm = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
vec_short8 sv = vec_sr(a.val, vec_ushort8_sp(15));
|
||||
sv = vec_sl(sv, slm);
|
||||
vec_int4 svi = vec_int4_z;
|
||||
svi = vec_sums(vec_sum4s(sv, svi), svi);
|
||||
return vec_extract(svi, 3);
|
||||
static const vec_uchar16 qperm = {112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128};
|
||||
return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
|
||||
}
|
||||
inline int v_signmask(const v_uint16x8& a)
|
||||
{ return v_signmask(v_reinterpret_as_s16(a)); }
|
||||
|
||||
inline int v_signmask(const v_int32x4& a)
|
||||
{
|
||||
static const vec_uint4 slm = {0, 1, 2, 3};
|
||||
vec_int4 sv = vec_sr(a.val, vec_uint4_sp(31));
|
||||
sv = vec_sl(sv, slm);
|
||||
sv = vec_sums(sv, vec_int4_z);
|
||||
return vec_extract(sv, 3);
|
||||
static const vec_uchar16 qperm = {96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
|
||||
return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
|
||||
}
|
||||
inline int v_signmask(const v_uint32x4& a)
|
||||
{ return v_signmask(v_reinterpret_as_s32(a)); }
|
||||
|
Loading…
Reference in New Issue
Block a user