mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 11:45:30 +08:00
Merge branch 4.x
This commit is contained in:
commit
0213483c18
@ -906,7 +906,7 @@ inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return
|
||||
{ return _Tpuvec(__lasx_xvsll_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \
|
||||
inline _Tpsvec v_shl(const _Tpsvec& a, int imm) \
|
||||
{ return _Tpsvec(__lasx_xvsll_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \
|
||||
inline _Tpuvec V_shr(const _Tpuvec& a, int imm) \
|
||||
inline _Tpuvec v_shr(const _Tpuvec& a, int imm) \
|
||||
{ return _Tpuvec(__lasx_xvsrl_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \
|
||||
inline _Tpsvec v_shr(const _Tpsvec& a, int imm) \
|
||||
{ return _Tpsvec(srai(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \
|
||||
|
@ -304,35 +304,35 @@ static const unsigned char popCountTable[] =
|
||||
} // namespace
|
||||
|
||||
static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) {
|
||||
return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23);
|
||||
return wasm_i8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23);
|
||||
}
|
||||
|
||||
static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) {
|
||||
return wasm_v8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23);
|
||||
return wasm_i8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23);
|
||||
}
|
||||
|
||||
static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) {
|
||||
return wasm_v8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23);
|
||||
return wasm_i8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23);
|
||||
}
|
||||
|
||||
static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) {
|
||||
return wasm_v8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
return wasm_i8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
}
|
||||
|
||||
static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) {
|
||||
return wasm_v8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31);
|
||||
return wasm_i8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31);
|
||||
}
|
||||
|
||||
static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) {
|
||||
return wasm_v8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31);
|
||||
return wasm_i8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31);
|
||||
}
|
||||
|
||||
static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) {
|
||||
return wasm_v8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31);
|
||||
return wasm_i8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31);
|
||||
}
|
||||
|
||||
static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) {
|
||||
return wasm_v8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
return wasm_i8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
}
|
||||
|
||||
/** Convert **/
|
||||
@ -423,7 +423,7 @@ inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
|
||||
v128_t maxval = wasm_i16x8_splat(255);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
|
||||
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval));
|
||||
return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
return v_uint8x16(wasm_i8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
}
|
||||
inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
@ -433,14 +433,14 @@ inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
|
||||
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval));
|
||||
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
|
||||
v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval));
|
||||
return v_int8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
return v_int8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
}
|
||||
inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
|
||||
{
|
||||
v128_t maxval = wasm_i32x4_splat(65535);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
|
||||
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval));
|
||||
return v_uint16x8(wasm_v8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
return v_uint16x8(wasm_i8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
}
|
||||
inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
@ -450,15 +450,15 @@ inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
|
||||
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval));
|
||||
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
|
||||
v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval));
|
||||
return v_int16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
return v_int16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
}
|
||||
inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b)
|
||||
{
|
||||
return v_uint32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
|
||||
return v_uint32x4(wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
|
||||
}
|
||||
inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b)
|
||||
{
|
||||
return v_int32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
|
||||
return v_int32x4(wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
|
||||
}
|
||||
inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
@ -468,7 +468,7 @@ inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b)
|
||||
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval));
|
||||
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
|
||||
v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval));
|
||||
return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
return v_uint8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
}
|
||||
inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
@ -478,7 +478,7 @@ inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
|
||||
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval));
|
||||
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
|
||||
v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval));
|
||||
return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
return v_uint16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
}
|
||||
|
||||
template<int n>
|
||||
@ -490,7 +490,7 @@ inline v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b)
|
||||
v128_t maxval = wasm_i16x8_splat(255);
|
||||
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval));
|
||||
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval));
|
||||
return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
return v_uint8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
}
|
||||
template<int n>
|
||||
inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b)
|
||||
@ -504,7 +504,7 @@ inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b)
|
||||
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval));
|
||||
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
|
||||
v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval));
|
||||
return v_int8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
return v_int8x16(wasm_i8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
}
|
||||
template<int n>
|
||||
inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b)
|
||||
@ -515,7 +515,7 @@ inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b)
|
||||
v128_t maxval = wasm_i32x4_splat(65535);
|
||||
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval));
|
||||
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval));
|
||||
return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
return v_uint16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
}
|
||||
template<int n>
|
||||
inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
|
||||
@ -529,7 +529,7 @@ inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
|
||||
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval));
|
||||
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
|
||||
v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval));
|
||||
return v_int16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
return v_int16x8(wasm_i8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
}
|
||||
template<int n>
|
||||
inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
|
||||
@ -537,7 +537,7 @@ inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
|
||||
v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
|
||||
v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n);
|
||||
v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n);
|
||||
return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
|
||||
return v_uint32x4(wasm_i8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
|
||||
}
|
||||
template<int n>
|
||||
inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
|
||||
@ -545,7 +545,7 @@ inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
|
||||
v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
|
||||
v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n);
|
||||
v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n);
|
||||
return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
|
||||
return v_int32x4(wasm_i8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
|
||||
}
|
||||
template<int n>
|
||||
inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
|
||||
@ -559,7 +559,7 @@ inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
|
||||
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval));
|
||||
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
|
||||
v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval));
|
||||
return v_uint8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
return v_uint8x16(wasm_i8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
}
|
||||
template<int n>
|
||||
inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
|
||||
@ -573,14 +573,14 @@ inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
|
||||
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval));
|
||||
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
|
||||
v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval));
|
||||
return v_uint16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
return v_uint16x8(wasm_i8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
|
||||
}
|
||||
|
||||
inline void v_pack_store(uchar* ptr, const v_uint16x8& a)
|
||||
{
|
||||
v128_t maxval = wasm_i16x8_splat(255);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
|
||||
v128_t r = wasm_v8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
v128_t r = wasm_i8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
uchar t_ptr[16];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<8; ++i) {
|
||||
@ -593,7 +593,7 @@ inline void v_pack_store(schar* ptr, const v_int16x8& a)
|
||||
v128_t minval = wasm_i16x8_splat(-128);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
|
||||
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
|
||||
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
schar t_ptr[16];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<8; ++i) {
|
||||
@ -604,7 +604,7 @@ inline void v_pack_store(ushort* ptr, const v_uint32x4& a)
|
||||
{
|
||||
v128_t maxval = wasm_i32x4_splat(65535);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
|
||||
v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
ushort t_ptr[8];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<4; ++i) {
|
||||
@ -617,7 +617,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a)
|
||||
v128_t minval = wasm_i32x4_splat(-32768);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
|
||||
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
|
||||
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
short t_ptr[8];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<4; ++i) {
|
||||
@ -626,7 +626,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a)
|
||||
}
|
||||
inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
|
||||
{
|
||||
v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
|
||||
v128_t r = wasm_i8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
|
||||
unsigned t_ptr[4];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<2; ++i) {
|
||||
@ -635,7 +635,7 @@ inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
|
||||
}
|
||||
inline void v_pack_store(int* ptr, const v_int64x2& a)
|
||||
{
|
||||
v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
|
||||
v128_t r = wasm_i8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
|
||||
int t_ptr[4];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<2; ++i) {
|
||||
@ -648,7 +648,7 @@ inline void v_pack_u_store(uchar* ptr, const v_int16x8& a)
|
||||
v128_t minval = wasm_i16x8_splat(0);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
|
||||
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
|
||||
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
uchar t_ptr[16];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<8; ++i) {
|
||||
@ -661,7 +661,7 @@ inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
|
||||
v128_t minval = wasm_i32x4_splat(0);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
|
||||
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
|
||||
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
ushort t_ptr[8];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<4; ++i) {
|
||||
@ -676,7 +676,7 @@ inline void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a)
|
||||
v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n);
|
||||
v128_t maxval = wasm_i16x8_splat(255);
|
||||
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval));
|
||||
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
uchar t_ptr[16];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<8; ++i) {
|
||||
@ -692,7 +692,7 @@ inline void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
|
||||
v128_t minval = wasm_i16x8_splat(-128);
|
||||
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
|
||||
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
|
||||
v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
v128_t r = wasm_i8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
schar t_ptr[16];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<8; ++i) {
|
||||
@ -706,7 +706,7 @@ inline void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a)
|
||||
v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n);
|
||||
v128_t maxval = wasm_i32x4_splat(65535);
|
||||
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval));
|
||||
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
ushort t_ptr[8];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<4; ++i) {
|
||||
@ -722,7 +722,7 @@ inline void v_rshr_pack_store(short* ptr, const v_int32x4& a)
|
||||
v128_t minval = wasm_i32x4_splat(-32768);
|
||||
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
|
||||
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
|
||||
v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
v128_t r = wasm_i8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
short t_ptr[8];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<4; ++i) {
|
||||
@ -734,7 +734,7 @@ inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a)
|
||||
{
|
||||
v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
|
||||
v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n);
|
||||
v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
|
||||
v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
|
||||
unsigned t_ptr[4];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<2; ++i) {
|
||||
@ -746,7 +746,7 @@ inline void v_rshr_pack_store(int* ptr, const v_int64x2& a)
|
||||
{
|
||||
v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
|
||||
v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n);
|
||||
v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
|
||||
v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
|
||||
int t_ptr[4];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<2; ++i) {
|
||||
@ -762,7 +762,7 @@ inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a)
|
||||
v128_t minval = wasm_i16x8_splat(0);
|
||||
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
|
||||
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
|
||||
v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
v128_t r = wasm_i8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
|
||||
uchar t_ptr[16];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<8; ++i) {
|
||||
@ -778,7 +778,7 @@ inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a)
|
||||
v128_t minval = wasm_i32x4_splat(0);
|
||||
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
|
||||
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
|
||||
v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
v128_t r = wasm_i8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
|
||||
ushort t_ptr[8];
|
||||
wasm_v128_store(t_ptr, r);
|
||||
for (int i=0; i<4; ++i) {
|
||||
@ -791,7 +791,7 @@ inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
|
||||
v128_t maxval = wasm_i16x8_splat(255);
|
||||
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
|
||||
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval));
|
||||
return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
return v_uint8x16(wasm_i8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
|
||||
}
|
||||
|
||||
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
|
||||
@ -802,9 +802,9 @@ inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
|
||||
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval));
|
||||
v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval));
|
||||
v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval));
|
||||
v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
|
||||
v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
|
||||
return v_uint8x16(wasm_v8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
|
||||
v128_t ab = wasm_i8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
|
||||
v128_t cd = wasm_i8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
|
||||
return v_uint8x16(wasm_i8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
|
||||
}
|
||||
|
||||
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
|
||||
@ -820,13 +820,13 @@ inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uin
|
||||
v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval));
|
||||
v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval));
|
||||
v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval));
|
||||
v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
|
||||
v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
|
||||
v128_t ef = wasm_v8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
|
||||
v128_t gh = wasm_v8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
|
||||
v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
|
||||
v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
|
||||
return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
|
||||
v128_t ab = wasm_i8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
|
||||
v128_t cd = wasm_i8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
|
||||
v128_t ef = wasm_i8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
|
||||
v128_t gh = wasm_i8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
|
||||
v128_t abcd = wasm_i8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
|
||||
v128_t efgh = wasm_i8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
|
||||
return v_uint8x16(wasm_i8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
|
||||
}
|
||||
|
||||
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
|
||||
@ -964,7 +964,7 @@ inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
|
||||
v_expand(b, b0, b1);
|
||||
v128_t c = wasm_i32x4_mul(a0.val, b0.val);
|
||||
v128_t d = wasm_i32x4_mul(a1.val, b1.val);
|
||||
return v_int16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
|
||||
return v_int16x8(wasm_i8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
|
||||
}
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
@ -973,7 +973,7 @@ inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
||||
v_expand(b, b0, b1);
|
||||
v128_t c = wasm_i32x4_mul(a0.val, b0.val);
|
||||
v128_t d = wasm_i32x4_mul(a1.val, b1.val);
|
||||
return v_uint16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
|
||||
return v_uint16x8(wasm_i8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
|
||||
}
|
||||
|
||||
//////// Dot Product ////////
|
||||
@ -1398,7 +1398,7 @@ inline _Tpsvec v_shl(const _Tpsvec& a, int imm) \
|
||||
{ \
|
||||
return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \
|
||||
} \
|
||||
inline _Tpuvec V_shr(const _Tpuvec& a, int imm) \
|
||||
inline _Tpuvec v_shr(const _Tpuvec& a, int imm) \
|
||||
{ \
|
||||
return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \
|
||||
} \
|
||||
@ -1471,7 +1471,7 @@ namespace hal_wasm_internal
|
||||
inline v128_t operator()(const v128_t& a, const v128_t& b) const
|
||||
{
|
||||
enum { imm2 = (sizeof(v128_t) - imm) };
|
||||
return wasm_v8x16_shuffle(a, b,
|
||||
return wasm_i8x16_shuffle(a, b,
|
||||
imm, imm+1, imm+2, imm+3,
|
||||
imm+4, imm+5, imm+6, imm+7,
|
||||
imm+8, imm+9, imm+10, imm+11,
|
||||
@ -1582,19 +1582,19 @@ OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double)
|
||||
|
||||
/** Reverse **/
|
||||
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
|
||||
{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); }
|
||||
{ return v_uint8x16(wasm_i8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); }
|
||||
|
||||
inline v_int8x16 v_reverse(const v_int8x16 &a)
|
||||
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
|
||||
|
||||
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
|
||||
{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); }
|
||||
{ return v_uint16x8(wasm_i8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); }
|
||||
|
||||
inline v_int16x8 v_reverse(const v_int16x8 &a)
|
||||
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
|
||||
|
||||
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
|
||||
{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); }
|
||||
{ return v_uint32x4(wasm_i8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); }
|
||||
|
||||
inline v_int32x4 v_reverse(const v_int32x4 &a)
|
||||
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
@ -1603,7 +1603,7 @@ inline v_float32x4 v_reverse(const v_float32x4 &a)
|
||||
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
|
||||
{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); }
|
||||
{ return v_uint64x2(wasm_i8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); }
|
||||
|
||||
inline v_int64x2 v_reverse(const v_int64x2 &a)
|
||||
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
@ -1616,8 +1616,8 @@ inline v_float64x2 v_reverse(const v_float64x2 &a)
|
||||
inline scalartype v_reduce_sum(const _Tpvec& a) \
|
||||
{ \
|
||||
regtype val = a.val; \
|
||||
val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
|
||||
val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \
|
||||
val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
|
||||
val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \
|
||||
return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \
|
||||
}
|
||||
|
||||
@ -1649,7 +1649,7 @@ OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int)
|
||||
inline scalartype v_reduce_sum(const _Tpvec& a) \
|
||||
{ \
|
||||
regtype val = a.val; \
|
||||
val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
|
||||
val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
|
||||
return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \
|
||||
}
|
||||
OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_uint64x2, uint64, v128_t, i64x2, i64x2)
|
||||
@ -1996,8 +1996,8 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
|
||||
v128_t t00 = wasm_v128_load(ptr);
|
||||
v128_t t01 = wasm_v128_load(ptr + 16);
|
||||
|
||||
a.val = wasm_v8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30);
|
||||
b.val = wasm_v8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31);
|
||||
a.val = wasm_i8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30);
|
||||
b.val = wasm_i8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
|
||||
@ -2006,13 +2006,13 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
|
||||
v128_t t01 = wasm_v128_load(ptr + 16);
|
||||
v128_t t02 = wasm_v128_load(ptr + 32);
|
||||
|
||||
v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7);
|
||||
v128_t t11 = wasm_v8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6);
|
||||
v128_t t12 = wasm_v8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7);
|
||||
v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7);
|
||||
v128_t t11 = wasm_i8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6);
|
||||
v128_t t12 = wasm_i8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7);
|
||||
|
||||
a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29);
|
||||
b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30);
|
||||
c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31);
|
||||
a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29);
|
||||
b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30);
|
||||
c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d)
|
||||
@ -2022,15 +2022,15 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
|
||||
v128_t u2 = wasm_v128_load(ptr + 32); // a8 b8 c8 d8 ...
|
||||
v128_t u3 = wasm_v128_load(ptr + 48); // a12 b12 c12 d12 ...
|
||||
|
||||
v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
|
||||
v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
|
||||
v128_t v2 = wasm_v8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
|
||||
v128_t v3 = wasm_v8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
|
||||
v128_t v0 = wasm_i8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
|
||||
v128_t v1 = wasm_i8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
|
||||
v128_t v2 = wasm_i8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
|
||||
v128_t v3 = wasm_i8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
|
||||
|
||||
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
b.val = wasm_i8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
c.val = wasm_i8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
d.val = wasm_i8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
|
||||
@ -2038,8 +2038,8 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
|
||||
v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 a2 b2 a3 b3
|
||||
v128_t v1 = wasm_v128_load(ptr + 8); // a4 b4 a5 b5 a6 b6 a7 b7
|
||||
|
||||
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7
|
||||
b.val = wasm_v8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7
|
||||
a.val = wasm_i8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7
|
||||
b.val = wasm_i8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
|
||||
@ -2048,13 +2048,13 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
|
||||
v128_t t01 = wasm_v128_load(ptr + 8); // c2 a3 b3 c3 a4 b4 c4 a5
|
||||
v128_t t02 = wasm_v128_load(ptr + 16); // b5 c5 a6 b6 c6 a7 b7 c7
|
||||
|
||||
v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5);
|
||||
v128_t t11 = wasm_v8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7);
|
||||
v128_t t12 = wasm_v8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7);
|
||||
v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5);
|
||||
v128_t t11 = wasm_i8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7);
|
||||
v128_t t12 = wasm_i8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7);
|
||||
|
||||
a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27);
|
||||
b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29);
|
||||
c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31);
|
||||
a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27);
|
||||
b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29);
|
||||
c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d)
|
||||
@ -2064,15 +2064,15 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
|
||||
v128_t u2 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ...
|
||||
v128_t u3 = wasm_v128_load(ptr + 24); // a6 b6 c6 d6 ...
|
||||
|
||||
v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3
|
||||
v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7
|
||||
v128_t v2 = wasm_v8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3
|
||||
v128_t v3 = wasm_v8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7
|
||||
v128_t v0 = wasm_i8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3
|
||||
v128_t v1 = wasm_i8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7
|
||||
v128_t v2 = wasm_i8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3
|
||||
v128_t v3 = wasm_i8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7
|
||||
|
||||
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
b.val = wasm_i8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
c.val = wasm_i8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
d.val = wasm_i8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b)
|
||||
@ -2080,8 +2080,8 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
|
||||
v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1
|
||||
v128_t v1 = wasm_v128_load(ptr + 4); // a2 b2 a3 b3
|
||||
|
||||
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3
|
||||
b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3
|
||||
a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3
|
||||
b.val = wasm_i8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c)
|
||||
@ -2090,13 +2090,13 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
|
||||
v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3
|
||||
v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4
|
||||
|
||||
v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
|
||||
v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
|
||||
v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
|
||||
v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
|
||||
v128_t t11 = wasm_i8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
|
||||
v128_t t12 = wasm_i8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
|
||||
|
||||
a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
|
||||
b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
|
||||
c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
|
||||
a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
|
||||
b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
|
||||
c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d)
|
||||
@ -2114,8 +2114,8 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
|
||||
v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1
|
||||
v128_t v1 = wasm_v128_load((ptr + 4)); // a2 b2 a3 b3
|
||||
|
||||
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3
|
||||
b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3
|
||||
a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3
|
||||
b.val = wasm_i8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c)
|
||||
@ -2124,13 +2124,13 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
|
||||
v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3
|
||||
v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4
|
||||
|
||||
v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
|
||||
v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
|
||||
v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
|
||||
v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
|
||||
v128_t t11 = wasm_i8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
|
||||
v128_t t12 = wasm_i8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
|
||||
|
||||
a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
|
||||
b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
|
||||
c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
|
||||
a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
|
||||
b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
|
||||
c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d)
|
||||
@ -2158,9 +2158,9 @@ inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b,
|
||||
v128_t t1 = wasm_v128_load(ptr + 2); // c0, a1
|
||||
v128_t t2 = wasm_v128_load(ptr + 4); // b1, c1
|
||||
|
||||
a.val = wasm_v8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
|
||||
b.val = wasm_v8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23);
|
||||
c.val = wasm_v8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
|
||||
a.val = wasm_i8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
|
||||
b.val = wasm_i8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23);
|
||||
c.val = wasm_i8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a,
|
||||
@ -2192,13 +2192,13 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
|
||||
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
|
||||
const v_uint8x16& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
|
||||
{
|
||||
v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5);
|
||||
v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26);
|
||||
v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0);
|
||||
v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5);
|
||||
v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26);
|
||||
v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0);
|
||||
|
||||
v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15);
|
||||
v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15);
|
||||
v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31);
|
||||
v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15);
|
||||
v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15);
|
||||
v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31);
|
||||
|
||||
wasm_v128_store(ptr, t10);
|
||||
wasm_v128_store(ptr + 16, t11);
|
||||
@ -2243,13 +2243,13 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
|
||||
const v_uint16x8& b, const v_uint16x8& c,
|
||||
hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
|
||||
{
|
||||
v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21);
|
||||
v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11);
|
||||
v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0);
|
||||
v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21);
|
||||
v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11);
|
||||
v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0);
|
||||
|
||||
v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15);
|
||||
v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15);
|
||||
v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31);
|
||||
v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15);
|
||||
v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15);
|
||||
v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31);
|
||||
|
||||
wasm_v128_store(ptr, t10);
|
||||
wasm_v128_store(ptr + 8, t11);
|
||||
@ -2293,13 +2293,13 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint
|
||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
|
||||
const v_uint32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
|
||||
{
|
||||
v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
|
||||
v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
|
||||
v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
|
||||
v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
|
||||
v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
|
||||
v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
|
||||
|
||||
v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
|
||||
v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
|
||||
v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
|
||||
v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
|
||||
v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
|
||||
v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
|
||||
|
||||
wasm_v128_store(ptr, t10);
|
||||
wasm_v128_store(ptr + 4, t11);
|
||||
@ -2333,13 +2333,13 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
|
||||
inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
|
||||
const v_float32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
|
||||
{
|
||||
v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
|
||||
v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
|
||||
v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
|
||||
v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
|
||||
v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
|
||||
v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
|
||||
|
||||
v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
|
||||
v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
|
||||
v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
|
||||
v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
|
||||
v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
|
||||
v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
|
||||
|
||||
wasm_v128_store(ptr, t10);
|
||||
wasm_v128_store(ptr + 4, t11);
|
||||
@ -2372,9 +2372,9 @@ inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x
|
||||
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
|
||||
const v_uint64x2& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
|
||||
{
|
||||
v128_t v0 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
v128_t v1 = wasm_v8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15);
|
||||
v128_t v2 = wasm_v8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
v128_t v0 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
|
||||
v128_t v1 = wasm_i8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15);
|
||||
v128_t v2 = wasm_i8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
|
||||
|
||||
wasm_v128_store(ptr, v0);
|
||||
wasm_v128_store(ptr + 2, v1);
|
||||
@ -2687,45 +2687,45 @@ inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_flo
|
||||
|
||||
inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
|
||||
{
|
||||
return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15));
|
||||
return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15));
|
||||
}
|
||||
inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
|
||||
inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
|
||||
{
|
||||
return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15));
|
||||
return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15));
|
||||
}
|
||||
inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
|
||||
|
||||
inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
|
||||
{
|
||||
return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15));
|
||||
return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15));
|
||||
}
|
||||
inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
|
||||
inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
|
||||
{
|
||||
return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15));
|
||||
return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15));
|
||||
}
|
||||
inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
|
||||
|
||||
inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
|
||||
{
|
||||
return v_int32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
|
||||
return v_int32x4(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
|
||||
}
|
||||
inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
|
||||
inline v_float32x4 v_interleave_pairs(const v_float32x4& vec)
|
||||
{
|
||||
return v_float32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
|
||||
return v_float32x4(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
|
||||
}
|
||||
|
||||
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
|
||||
{
|
||||
return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16));
|
||||
return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16));
|
||||
}
|
||||
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
|
||||
|
||||
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
|
||||
{
|
||||
return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7));
|
||||
return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7));
|
||||
}
|
||||
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
|
||||
|
||||
|
@ -728,7 +728,7 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , RotateTest,
|
||||
testing::Combine(
|
||||
testing::Values(szVGA, sz720p, sz1080p),
|
||||
testing::Values(ROTATE_180, ROTATE_90_CLOCKWISE, ROTATE_90_COUNTERCLOCKWISE),
|
||||
testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_8SC1, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4, CV_32SC1, CV_32FC1)
|
||||
testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, CV_8SC1, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4, CV_32SC1, CV_32FC1)
|
||||
)
|
||||
);
|
||||
|
||||
|
@ -962,7 +962,11 @@ static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
|
||||
|
||||
static const bool enableBitExactness = true;
|
||||
static const bool enableRGB2LabInterpolation = true;
|
||||
|
||||
#if CV_SIMD
|
||||
static const bool enablePackedLab = true;
|
||||
#endif
|
||||
|
||||
enum
|
||||
{
|
||||
lab_lut_shift = 5,
|
||||
@ -979,8 +983,12 @@ static const int minABvalue = -8145;
|
||||
static const int *abToXZ_b;
|
||||
// Luv constants
|
||||
static const bool enableRGB2LuvInterpolation = true;
|
||||
|
||||
#if CV_SIMD
|
||||
static const bool enablePackedRGB2Luv = true;
|
||||
static const bool enablePackedLuv2RGB = true;
|
||||
#endif
|
||||
|
||||
static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow);
|
||||
static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow);
|
||||
|
||||
@ -1381,7 +1389,7 @@ static inline void trilinearInterpolate(int cx, int cy, int cz, const int16_t* L
|
||||
c = CV_DESCALE(c, trilinear_shift*3);
|
||||
}
|
||||
|
||||
#if CV_SIMD_WIDTH == 16
|
||||
#if (CV_SIMD && CV_SIMD_WIDTH == 16)
|
||||
|
||||
// 8 inValues are in [0; LAB_BASE]
|
||||
static inline void trilinearPackedInterpolate(const v_uint16x8& inX, const v_uint16x8& inY, const v_uint16x8& inZ,
|
||||
|
@ -299,8 +299,13 @@ public:
|
||||
*/
|
||||
CV_WRAP Status stitch(InputArrayOfArrays images, InputArrayOfArrays masks, OutputArray pano);
|
||||
|
||||
std::vector<int> component() const { return indices_; }
|
||||
std::vector<detail::CameraParams> cameras() const { return cameras_; }
|
||||
/** @brief Returns indeces of input images used in panorama stitching
|
||||
*/
|
||||
CV_WRAP std::vector<int> component() const { return indices_; }
|
||||
|
||||
/** Returns estimated camera parameters for all stitched images
|
||||
*/
|
||||
CV_WRAP std::vector<cv::detail::CameraParams> cameras() const { return cameras_; }
|
||||
CV_WRAP double workScale() const { return work_scale_; }
|
||||
|
||||
/** @brief Return the mask of the panorama.
|
||||
|
@ -98,10 +98,12 @@ class Builder:
|
||||
"-DWITH_GTK=OFF",
|
||||
"-DWITH_GTK_2_X=OFF",
|
||||
"-DWITH_IPP=OFF",
|
||||
"-DWITH_AVIF=OFF",
|
||||
"-DWITH_JASPER=OFF",
|
||||
"-DWITH_JPEG=OFF",
|
||||
"-DWITH_WEBP=OFF",
|
||||
"-DWITH_OPENEXR=OFF",
|
||||
"-DWITH_OPENJPEG=OFF",
|
||||
"-DWITH_OPENGL=OFF",
|
||||
"-DWITH_OPENNI=OFF",
|
||||
"-DWITH_OPENNI2=OFF",
|
||||
|
Loading…
Reference in New Issue
Block a user