mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 06:26:29 +08:00
Merge pull request #9024 from tomoaki0705:featureDispatchAccumulate
This commit is contained in:
commit
10e6491c22
@ -899,6 +899,15 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
|
||||
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
|
||||
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \
|
||||
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
|
||||
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64);
|
||||
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64);
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
|
||||
@ -1520,6 +1529,35 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
|
||||
v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
|
||||
{
|
||||
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
|
||||
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2));
|
||||
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4));
|
||||
|
||||
a = v_uint64x2(_mm_unpacklo_epi64(t0, _mm_unpackhi_epi64(t1, t1)));
|
||||
b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
|
||||
c = v_uint64x2(_mm_unpacklo_epi64(t1, _mm_unpackhi_epi64(t2, t2)));
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const int64 *ptr, v_int64x2& a, v_int64x2& b, v_int64x2& c)
|
||||
{
|
||||
v_uint64x2 t0, t1, t2;
|
||||
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
|
||||
a = v_reinterpret_as_s64(t0);
|
||||
b = v_reinterpret_as_s64(t1);
|
||||
c = v_reinterpret_as_s64(t2);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2& b, v_float64x2& c)
|
||||
{
|
||||
v_uint64x2 t0, t1, t2;
|
||||
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
|
||||
a = v_reinterpret_as_f64(t0);
|
||||
b = v_reinterpret_as_f64(t1);
|
||||
c = v_reinterpret_as_f64(t2);
|
||||
}
|
||||
|
||||
// 2-channel, float only
|
||||
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
|
||||
{
|
||||
@ -1717,6 +1755,27 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
|
||||
_mm_storeu_ps((ptr + 4), u1);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c)
|
||||
{
|
||||
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
|
||||
__m128i t1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val));
|
||||
__m128i t2 = _mm_unpackhi_epi64(b.val, c.val);
|
||||
|
||||
_mm_storeu_si128((__m128i*)ptr, t0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(int64 *ptr, const v_int64x2& a, const v_int64x2& b, const v_int64x2& c)
|
||||
{
|
||||
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
|
||||
}
|
||||
|
||||
inline void v_store_interleave(double *ptr, const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
|
||||
{
|
||||
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
|
||||
inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
|
||||
_Tpvec& b0, _Tpvec& c0 ) \
|
||||
|
@ -1,2 +1,3 @@
|
||||
set(the_description "Image Processing")
|
||||
ocv_add_dispatched_file(accum SSE2 AVX NEON)
|
||||
ocv_define_module(imgproc opencv_core WRAP java python)
|
||||
|
File diff suppressed because it is too large
Load Diff
20
modules/imgproc/src/accum.dispatch.cpp
Normal file
20
modules/imgproc/src/accum.dispatch.cpp
Normal file
@ -0,0 +1,20 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "accum.simd.hpp"
|
||||
#include "accum.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
|
||||
|
||||
namespace cv {
|
||||
|
||||
DEF_ACC_INT_FUNCS(8u32f, uchar, float)
|
||||
DEF_ACC_INT_FUNCS(8u64f, uchar, double)
|
||||
DEF_ACC_INT_FUNCS(16u32f, ushort, float)
|
||||
DEF_ACC_INT_FUNCS(16u64f, ushort, double)
|
||||
DEF_ACC_FLT_FUNCS(32f, float, float)
|
||||
DEF_ACC_FLT_FUNCS(32f64f, float, double)
|
||||
DEF_ACC_FLT_FUNCS(64f, double, double)
|
||||
|
||||
} //cv::hal
|
3187
modules/imgproc/src/accum.simd.hpp
Normal file
3187
modules/imgproc/src/accum.simd.hpp
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user