mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #11332 from alalek:v_select_x86
This commit is contained in:
commit
909a25571e
@ -1042,13 +1042,16 @@ template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @brief Bitwise select
|
/** @brief Per-element select (blend operation)
|
||||||
|
|
||||||
Return value will be built by combining values a and b using the following scheme:
|
Return value will be built by combining values _a_ and _b_ using the following scheme:
|
||||||
If the i-th bit in _mask_ is 1
|
result[i] = mask[i] ? a[i] : b[i];
|
||||||
select i-th bit from _a_
|
|
||||||
else
|
@Note: _mask_ element values are restricted to these values:
|
||||||
select i-th bit from _b_ */
|
- 0: select element from _b_
|
||||||
|
- 0xff/0xffff/etc: select element from _a_
|
||||||
|
(fully compatible with bitwise-based operator)
|
||||||
|
*/
|
||||||
template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
|
template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
|
||||||
const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
|
||||||
{
|
{
|
||||||
@ -1058,8 +1061,8 @@ template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>&
|
|||||||
for( int i = 0; i < n; i++ )
|
for( int i = 0; i < n; i++ )
|
||||||
{
|
{
|
||||||
int_type m = Traits::reinterpret_int(mask.s[i]);
|
int_type m = Traits::reinterpret_int(mask.s[i]);
|
||||||
c.s[i] = Traits::reinterpret_from_int((Traits::reinterpret_int(a.s[i]) & m)
|
CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc
|
||||||
| (Traits::reinterpret_int(b.s[i]) & ~m));
|
c.s[i] = m ? a.s[i] : b.s[i];
|
||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
@ -438,10 +438,14 @@ void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// bit-wise "mask ? a : b"
|
// byte-wise "mask ? a : b"
|
||||||
inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b)
|
inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b)
|
||||||
{
|
{
|
||||||
|
#if CV_SSE4_1
|
||||||
|
return _mm_blendv_epi8(b, a, mask);
|
||||||
|
#else
|
||||||
return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask));
|
return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
|
inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
|
||||||
@ -1403,6 +1407,26 @@ OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND,
|
|||||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
|
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
|
||||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
|
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
|
||||||
|
|
||||||
|
#if CV_SSE4_1
|
||||||
|
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \
|
||||||
|
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
|
||||||
|
{ \
|
||||||
|
return _Tpvec(cast_ret(_mm_blendv_##suffix(cast(b.val), cast(a.val), cast(mask.val)))); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
|
||||||
|
OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
|
||||||
|
OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
|
||||||
|
OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
|
||||||
|
OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, _mm_castps_si128, _mm_castsi128_ps, ps)
|
||||||
|
OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, _mm_castps_si128, _mm_castsi128_ps, ps)
|
||||||
|
// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, TBD, TBD, pd)
|
||||||
|
// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, TBD, TBD, ps)
|
||||||
|
OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, OPENCV_HAL_NOP, OPENCV_HAL_NOP, ps)
|
||||||
|
OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, OPENCV_HAL_NOP, OPENCV_HAL_NOP, pd)
|
||||||
|
|
||||||
|
#else // CV_SSE4_1
|
||||||
|
|
||||||
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \
|
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \
|
||||||
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
|
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
|
||||||
{ \
|
{ \
|
||||||
@ -1419,6 +1443,7 @@ OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128)
|
|||||||
// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
|
// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
|
||||||
OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps)
|
OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps)
|
||||||
OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd)
|
OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \
|
#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \
|
||||||
inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \
|
inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \
|
||||||
|
@ -96,8 +96,8 @@ PERF_TEST_P(Size_MatType, Mat_Clone_Roi,
|
|||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P(Size_MatType, Mat_CopyToWithMask,
|
PERF_TEST_P(Size_MatType, Mat_CopyToWithMask,
|
||||||
testing::Combine(testing::Values(TYPICAL_MAT_SIZES),
|
testing::Combine(testing::Values(::perf::sz1080p, ::perf::szODD),
|
||||||
testing::Values(CV_8UC1, CV_8UC2))
|
testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_16UC1, CV_32SC1, CV_32FC4))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
const Size_MatType_t params = GetParam();
|
const Size_MatType_t params = GetParam();
|
||||||
|
@ -91,11 +91,7 @@ copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mste
|
|||||||
uchar* dst = (uchar*)_dst;
|
uchar* dst = (uchar*)_dst;
|
||||||
int x = 0;
|
int x = 0;
|
||||||
#if CV_SIMD128
|
#if CV_SIMD128
|
||||||
if( hasSIMD128()
|
{
|
||||||
#if CV_SSE4_2
|
|
||||||
&& USE_SSE4_2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
v_uint8x16 v_zero = v_setzero_u8();
|
v_uint8x16 v_zero = v_setzero_u8();
|
||||||
|
|
||||||
for( ; x <= size.width - 16; x += 16 )
|
for( ; x <= size.width - 16; x += 16 )
|
||||||
@ -104,11 +100,7 @@ copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mste
|
|||||||
v_dst = v_load(dst + x),
|
v_dst = v_load(dst + x),
|
||||||
v_nmask = v_load(mask + x) == v_zero;
|
v_nmask = v_load(mask + x) == v_zero;
|
||||||
|
|
||||||
#if CV_SSE4_2
|
|
||||||
v_dst = v_uint8x16(_mm_blendv_epi8(v_src.val, v_dst.val, v_nmask.val));
|
|
||||||
#else
|
|
||||||
v_dst = v_select(v_nmask, v_dst, v_src);
|
v_dst = v_select(v_nmask, v_dst, v_src);
|
||||||
#endif
|
|
||||||
v_store(dst + x, v_dst);
|
v_store(dst + x, v_dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -130,11 +122,7 @@ copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mst
|
|||||||
ushort* dst = (ushort*)_dst;
|
ushort* dst = (ushort*)_dst;
|
||||||
int x = 0;
|
int x = 0;
|
||||||
#if CV_SIMD128
|
#if CV_SIMD128
|
||||||
if( hasSIMD128()
|
{
|
||||||
#if CV_SSE4_2
|
|
||||||
&& USE_SSE4_2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
v_uint8x16 v_zero = v_setzero_u8();
|
v_uint8x16 v_zero = v_setzero_u8();
|
||||||
|
|
||||||
for( ; x <= size.width - 16; x += 16 )
|
for( ; x <= size.width - 16; x += 16 )
|
||||||
@ -146,13 +134,8 @@ copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mst
|
|||||||
v_uint8x16 v_nmask = v_load(mask + x) == v_zero;
|
v_uint8x16 v_nmask = v_load(mask + x) == v_zero;
|
||||||
v_zip(v_nmask, v_nmask, v_nmask1, v_nmask2);
|
v_zip(v_nmask, v_nmask, v_nmask1, v_nmask2);
|
||||||
|
|
||||||
#if CV_SSE4_2
|
|
||||||
v_dst1 = v_uint16x8(_mm_blendv_epi8(v_src1.val, v_dst1.val, v_nmask1.val));
|
|
||||||
v_dst2 = v_uint16x8(_mm_blendv_epi8(v_src2.val, v_dst2.val, v_nmask2.val));
|
|
||||||
#else
|
|
||||||
v_dst1 = v_select(v_reinterpret_as_u16(v_nmask1), v_dst1, v_src1);
|
v_dst1 = v_select(v_reinterpret_as_u16(v_nmask1), v_dst1, v_src1);
|
||||||
v_dst2 = v_select(v_reinterpret_as_u16(v_nmask2), v_dst2, v_src2);
|
v_dst2 = v_select(v_reinterpret_as_u16(v_nmask2), v_dst2, v_src2);
|
||||||
#endif
|
|
||||||
v_store(dst + x, v_dst1);
|
v_store(dst + x, v_dst1);
|
||||||
v_store(dst + x + 8, v_dst2);
|
v_store(dst + x + 8, v_dst2);
|
||||||
}
|
}
|
||||||
|
@ -657,8 +657,15 @@ template<typename R> struct TheTest
|
|||||||
|
|
||||||
TheTest & test_mask()
|
TheTest & test_mask()
|
||||||
{
|
{
|
||||||
Data<R> dataA, dataB, dataC, dataD(1), dataE(2);
|
typedef V_TypeTraits<LaneType> Traits;
|
||||||
|
typedef typename Traits::int_type int_type;
|
||||||
|
|
||||||
|
Data<R> dataA, dataB(0), dataC, dataD(1), dataE(2);
|
||||||
dataA[1] *= (LaneType)-1;
|
dataA[1] *= (LaneType)-1;
|
||||||
|
const LaneType mask_one = Traits::reinterpret_from_int(~(typename Traits::uint_type)(0));
|
||||||
|
dataB[1] = mask_one;
|
||||||
|
dataB[R::nlanes / 2] = mask_one;
|
||||||
|
dataB[R::nlanes - 1] = mask_one;
|
||||||
dataC *= (LaneType)-1;
|
dataC *= (LaneType)-1;
|
||||||
R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE;
|
R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE;
|
||||||
|
|
||||||
@ -670,12 +677,9 @@ template<typename R> struct TheTest
|
|||||||
EXPECT_EQ(true, v_check_all(c));
|
EXPECT_EQ(true, v_check_all(c));
|
||||||
|
|
||||||
EXPECT_EQ(true, v_check_any(a));
|
EXPECT_EQ(true, v_check_any(a));
|
||||||
EXPECT_EQ(false, v_check_any(b));
|
EXPECT_EQ(true, v_check_any(b));
|
||||||
EXPECT_EQ(true, v_check_any(c));
|
EXPECT_EQ(true, v_check_any(c));
|
||||||
|
|
||||||
typedef V_TypeTraits<LaneType> Traits;
|
|
||||||
typedef typename Traits::int_type int_type;
|
|
||||||
|
|
||||||
R f = v_select(b, d, e);
|
R f = v_select(b, d, e);
|
||||||
Data<R> resF = f;
|
Data<R> resF = f;
|
||||||
for (int i = 0; i < R::nlanes; ++i)
|
for (int i = 0; i < R::nlanes; ++i)
|
||||||
|
Loading…
Reference in New Issue
Block a user