From 6ad8a9c09d9187136a397336b95b74a4b8585152 Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Tue, 28 Aug 2018 13:45:50 +0300 Subject: [PATCH] Replaced core module calls to universal intrinsics with wide universal intrinsics --- modules/core/src/arithm.cpp | 80 ++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index e1fc23840f..3d1376dc8b 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1379,7 +1379,7 @@ struct InRange_SIMD } }; -#if CV_SIMD128 +#if CV_SIMD template <> struct InRange_SIMD @@ -1388,16 +1388,17 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_uint8x16::nlanes; + const int width = v_uint8::nlanes; for (; x <= len - width; x += width) { - v_uint8x16 values = v_load(src1 + x); - v_uint8x16 low = v_load(src2 + x); - v_uint8x16 high = v_load(src3 + x); + v_uint8 values = vx_load(src1 + x); + v_uint8 low = vx_load(src2 + x); + v_uint8 high = vx_load(src3 + x); v_store(dst + x, (values >= low) & (high >= values)); } + vx_cleanup(); return x; } }; @@ -1409,16 +1410,17 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_int8x16::nlanes; + const int width = v_int8::nlanes; for (; x <= len - width; x += width) { - v_int8x16 values = v_load(src1 + x); - v_int8x16 low = v_load(src2 + x); - v_int8x16 high = v_load(src3 + x); + v_int8 values = vx_load(src1 + x); + v_int8 low = vx_load(src2 + x); + v_int8 high = vx_load(src3 + x); v_store((schar*)(dst + x), (values >= low) & (high >= values)); } + vx_cleanup(); return x; } }; @@ -1430,20 +1432,21 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_uint16x8::nlanes * 2; + const int width = v_uint16::nlanes * 2; for (; x <= len - width; x += width) { - v_uint16x8 values1 = v_load(src1 + x); - v_uint16x8 low1 = v_load(src2 + x); - v_uint16x8 high1 = v_load(src3 + x); + v_uint16 values1 = vx_load(src1 + x); + v_uint16 low1 = vx_load(src2 + x); + v_uint16 high1 = vx_load(src3 + x); - v_uint16x8 values2 = v_load(src1 + x + v_uint16x8::nlanes); - v_uint16x8 low2 = v_load(src2 + x + v_uint16x8::nlanes); - v_uint16x8 high2 = v_load(src3 + x + v_uint16x8::nlanes); + v_uint16 values2 = vx_load(src1 + x + v_uint16::nlanes); + v_uint16 low2 = vx_load(src2 + x + v_uint16::nlanes); + v_uint16 high2 = vx_load(src3 + x + v_uint16::nlanes); v_store(dst + x, v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))); } + vx_cleanup(); return x; } }; @@ -1455,20 +1458,21 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_int16x8::nlanes * 2; + const int width = (int)v_int16::nlanes * 2; for (; x <= len - width; x += width) { - v_int16x8 values1 = v_load(src1 + x); - v_int16x8 low1 = v_load(src2 + x); - v_int16x8 high1 = v_load(src3 + x); + v_int16 values1 = vx_load(src1 + x); + v_int16 low1 = vx_load(src2 + x); + v_int16 high1 = vx_load(src3 + x); - v_int16x8 values2 = v_load(src1 + x + v_int16x8::nlanes); - v_int16x8 low2 = v_load(src2 + x + v_int16x8::nlanes); - v_int16x8 high2 = v_load(src3 + x + v_int16x8::nlanes); + v_int16 values2 = vx_load(src1 + x + v_int16::nlanes); + v_int16 low2 = vx_load(src2 + x + v_int16::nlanes); + v_int16 high2 = vx_load(src3 + x + v_int16::nlanes); v_store((schar*)(dst + x), v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))); } + vx_cleanup(); return x; } }; @@ -1480,20 +1484,21 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_int32x4::nlanes * 2; + const int width = (int)v_int32::nlanes * 2; for (; x <= len - width; x += width) { - v_int32x4 values1 = v_load(src1 + x); - v_int32x4 low1 = v_load(src2 + x); - v_int32x4 high1 = v_load(src3 + x); + v_int32 values1 = vx_load(src1 + x); + v_int32 low1 = vx_load(src2 + x); + v_int32 high1 = vx_load(src3 + x); - v_int32x4 values2 = v_load(src1 + x + v_int32x4::nlanes); - v_int32x4 low2 = v_load(src2 + x + v_int32x4::nlanes); - v_int32x4 high2 = v_load(src3 + x + v_int32x4::nlanes); + v_int32 values2 = vx_load(src1 + x + v_int32::nlanes); + v_int32 low2 = vx_load(src2 + x + v_int32::nlanes); + v_int32 high2 = vx_load(src3 + x + v_int32::nlanes); v_pack_store(dst + x, v_reinterpret_as_u16(v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)))); } + vx_cleanup(); return x; } }; @@ -1505,20 +1510,21 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_float32x4::nlanes * 2; + const int width = (int)v_float32::nlanes * 2; for (; x <= len - width; x += width) { - v_float32x4 values1 = v_load(src1 + x); - v_float32x4 low1 = v_load(src2 + x); - v_float32x4 high1 = v_load(src3 + x); + v_float32 values1 = vx_load(src1 + x); + v_float32 low1 = vx_load(src2 + x); + v_float32 high1 = vx_load(src3 + x); - v_float32x4 values2 = v_load(src1 + x + v_float32x4::nlanes); - v_float32x4 low2 = v_load(src2 + x + v_float32x4::nlanes); - v_float32x4 high2 = v_load(src3 + x + v_float32x4::nlanes); + v_float32 values2 = vx_load(src1 + x + v_float32::nlanes); + v_float32 low2 = vx_load(src2 + x + v_float32::nlanes); + v_float32 high2 = vx_load(src3 + x + v_float32::nlanes); v_pack_store(dst + x, v_pack(v_reinterpret_as_u32((values1 >= low1) & (high1 >= values1)), v_reinterpret_as_u32((values2 >= low2) & (high2 >= values2)))); } + vx_cleanup(); return x; } };