mirror of
https://github.com/opencv/opencv.git
synced 2025-01-19 15:04:01 +08:00
build: fix AVX2/AVX512 builds failed due to intrinsics operator usage
This commit is contained in:
parent
f9a297e52c
commit
4a81b4e51f
@ -799,9 +799,9 @@ Point3f getNormalVoxel(
|
||||
v_float32x8 czp = v_lut(vals, v256_load(idxzp));
|
||||
v_float32x8 czn = v_lut(vals, v256_load(idxzn));
|
||||
|
||||
v_float32x8 vcxv = cxn - cxp;
|
||||
v_float32x8 vcyv = cyn - cyp;
|
||||
v_float32x8 vczv = czn - czp;
|
||||
v_float32x8 vcxv = v_sub(cxn, cxp);
|
||||
v_float32x8 vcyv = v_sub(cyn, cyp);
|
||||
v_float32x8 vczv = v_sub(czn, czp);
|
||||
|
||||
v_store(cxv, vcxv);
|
||||
v_store(cyv, vcyv);
|
||||
@ -942,9 +942,9 @@ Point3f ocl_getNormalVoxel(
|
||||
v_float32x8 czp = v_lut(vals, v256_load(idxzp));
|
||||
v_float32x8 czn = v_lut(vals, v256_load(idxzn));
|
||||
|
||||
v_float32x8 vcxv = cxn - cxp;
|
||||
v_float32x8 vcyv = cyn - cyp;
|
||||
v_float32x8 vczv = czn - czp;
|
||||
v_float32x8 vcxv = v_sub(cxn, cxp);
|
||||
v_float32x8 vcyv = v_sub(cyn, cyp);
|
||||
v_float32x8 vczv = v_sub(czn, czp);
|
||||
|
||||
v_store(cxv, vcxv);
|
||||
v_store(cyv, vcyv);
|
||||
|
@ -85,15 +85,15 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[],
|
||||
v_deinterleave(low1, high1, s00, s01);
|
||||
|
||||
// v_float32 res0 = s00*alpha0 + s01*alpha1;
|
||||
v_float32x8 res0 = v_fma(s00 - s01, alpha0, s01);
|
||||
v_float32x8 res0 = v_fma(v_sub(s00, s01), alpha0, s01);
|
||||
|
||||
v_gather_pairs(src1[line], &mapsx[x], low2, high2);
|
||||
v_deinterleave(low2, high2, s10, s11);
|
||||
|
||||
// v_float32 res1 = s10*alpha0 + s11*alpha1;
|
||||
v_float32x8 res1 = v_fma(s10 - s11, alpha0, s11);
|
||||
v_float32x8 res1 = v_fma(v_sub(s10, s11), alpha0, s11);
|
||||
// v_float32 d = res0*beta0 + res1*beta1;
|
||||
v_float32x8 d = v_fma(res0 - res1, v_beta0, res1);
|
||||
v_float32x8 d = v_fma(v_sub(res0, res1), v_beta0, res1);
|
||||
|
||||
v_store(&dst[line][x], d);
|
||||
}
|
||||
@ -126,7 +126,7 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[],
|
||||
v_deinterleave(low, high, s00, s01);
|
||||
|
||||
// v_float32 d = s00*alpha0 + s01*alpha1;
|
||||
v_float32x8 d = v_fma(s00 - s01, alpha0, s01);
|
||||
v_float32x8 d = v_fma(v_sub(s00, s01), alpha0, s01);
|
||||
|
||||
v_store(&dst[line][x], d);
|
||||
}
|
||||
@ -157,7 +157,7 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[],
|
||||
v_float32x8 s1 = v256_load(&src1[line][x]);
|
||||
|
||||
// v_float32 d = s0*beta0 + s1*beta1;
|
||||
v_float32x8 d = v_fma(s0 - s1, v_beta0, s1);
|
||||
v_float32x8 d = v_fma(v_sub(s0, s1), v_beta0, s1);
|
||||
|
||||
v_store(&dst[line][x], d);
|
||||
}
|
||||
|
@ -2535,7 +2535,7 @@ public:
|
||||
#elif CV_SIMD_WIDTH == 64
|
||||
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
|
||||
v_zip(s0, s3, t0, t1); v_zip(s1, s4, t2, t3); v_zip(s2, s5, t4, t5);
|
||||
bl = t0 + t3; gl = t1 + t4; rl = t2 + t5;
|
||||
bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5);
|
||||
#endif
|
||||
s0 = v_add(vx_load_expand(S0 + 6 * VTraits<v_uint16>::vlanes()), vx_load_expand(S1 + 6 * VTraits<v_uint16>::vlanes()));
|
||||
s1 = v_add(vx_load_expand(S0 + 7 * VTraits<v_uint16>::vlanes()), vx_load_expand(S1 + 7 * VTraits<v_uint16>::vlanes()));
|
||||
@ -2555,7 +2555,7 @@ public:
|
||||
#elif CV_SIMD_WIDTH == 64
|
||||
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
|
||||
v_zip(s0, s3, t0, t1); v_zip(s1, s4, t2, t3); v_zip(s2, s5, t4, t5);
|
||||
bh = t0 + t3; gh = t1 + t4; rh = t2 + t5;
|
||||
bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5);
|
||||
#endif
|
||||
v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh));
|
||||
}
|
||||
@ -2642,7 +2642,7 @@ public:
|
||||
bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5);
|
||||
#else //CV_SIMD_WIDTH == 64
|
||||
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
|
||||
bl = s0 + s3; gl = s1 + s4; rl = s2 + s5;
|
||||
bl = v_add(s0, s3); gl = v_add(s1, s4); rl = v_add(s2, s5);
|
||||
#endif
|
||||
s0 = v_add(vx_load_expand(S0 + 6 * VTraits<v_uint32>::vlanes()), vx_load_expand(S1 + 6 * VTraits<v_uint32>::vlanes()));
|
||||
s1 = v_add(vx_load_expand(S0 + 7 * VTraits<v_uint32>::vlanes()), vx_load_expand(S1 + 7 * VTraits<v_uint32>::vlanes()));
|
||||
@ -2658,7 +2658,7 @@ public:
|
||||
bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5);
|
||||
#else //CV_SIMD_WIDTH == 64
|
||||
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
|
||||
bh = s0 + s3; gh = s1 + s4; rh = s2 + s5;
|
||||
bh = v_add(s0, s3); gh = v_add(s1, s4); rh = v_add(s2, s5);
|
||||
#endif
|
||||
v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh));
|
||||
}
|
||||
@ -2696,7 +2696,7 @@ public:
|
||||
v_expand(v_reinterpret_as_u16(r01), r01l, r01h);
|
||||
v_expand(v_reinterpret_as_u16(r10), r10l, r10h);
|
||||
v_expand(v_reinterpret_as_u16(r11), r11l, r11h);
|
||||
v_store(D, v_rshr_pack<2>(r00l + r01l + r10l + r11l, r00h + r01h + r10h + r11h));
|
||||
v_store(D, v_rshr_pack<2>(v_add(r00l, r01l, r10l, r11l), v_add(r00h, r01h, r10h, r11h)));
|
||||
}
|
||||
#else
|
||||
for ( ; dx <= w - VTraits<v_uint32>::vlanes(); dx += VTraits<v_uint32>::vlanes(), S0 += VTraits<v_uint16>::vlanes(), S1 += VTraits<v_uint16>::vlanes(), D += VTraits<v_uint32>::vlanes())
|
||||
@ -2772,7 +2772,7 @@ public:
|
||||
bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5);
|
||||
#else //CV_SIMD_WIDTH == 64
|
||||
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
|
||||
bl = s0 + s3; gl = s1 + s4; rl = s2 + s5;
|
||||
bl = v_add(s0, s3); gl = v_add(s1, s4); rl = v_add(s2, s5);
|
||||
#endif
|
||||
s0 = v_add(vx_load_expand(S0 + 6 * VTraits<v_int32>::vlanes()), vx_load_expand(S1 + 6 * VTraits<v_int32>::vlanes()));
|
||||
s1 = v_add(vx_load_expand(S0 + 7 * VTraits<v_int32>::vlanes()), vx_load_expand(S1 + 7 * VTraits<v_int32>::vlanes()));
|
||||
@ -2788,7 +2788,7 @@ public:
|
||||
bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5);
|
||||
#else //CV_SIMD_WIDTH == 64
|
||||
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
|
||||
bh = s0 + s3; gh = s1 + s4; rh = s2 + s5;
|
||||
bh = v_add(s0, s3); gh = v_add(s1, s4); rh = v_add(s2, s5);
|
||||
#endif
|
||||
v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh));
|
||||
}
|
||||
@ -2825,7 +2825,7 @@ public:
|
||||
v_expand(v_reinterpret_as_s16(r01), r01l, r01h);
|
||||
v_expand(v_reinterpret_as_s16(r10), r10l, r10h);
|
||||
v_expand(v_reinterpret_as_s16(r11), r11l, r11h);
|
||||
v_store(D, v_rshr_pack<2>(r00l + r01l + r10l + r11l, r00h + r01h + r10h + r11h));
|
||||
v_store(D, v_rshr_pack<2>(v_add(r00l, r01l, r10l, r11l), v_add(r00h, r01h, r10h, r11h)));
|
||||
#else
|
||||
v_int32 r0, r1, r2, r3;
|
||||
r0 = v_add(vx_load_expand(S0), vx_load_expand(S1));
|
||||
|
Loading…
Reference in New Issue
Block a user