From 1c17b3281a5f02c86bd28c41bb63d9656d2323fa Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Tue, 24 Sep 2019 18:41:39 +0300 Subject: [PATCH] Fixed OOB reading in pyrDown --- modules/imgproc/src/pyramids.cpp | 84 ++++++++++++++++---------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index 5a229b61f0..7b817d8887 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -87,13 +87,13 @@ template int PyrUpVecV(T1**, T2**, int) { return 0; } template<> int PyrDownVecH(const uchar* src, int* row, int width) { int x = 0; - const uchar *src0 = src, *src2 = src + 2, *src4 = src + 3; + const uchar *src01 = src, *src23 = src + 2, *src4 = src + 3; v_int16 v_1_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040001)); v_int16 v_6_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040006)); - for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src0 += v_int16::nlanes, src2 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) - v_store(row, v_dotprod(v_reinterpret_as_s16(vx_load_expand(src0)), v_1_4) + - v_dotprod(v_reinterpret_as_s16(vx_load_expand(src2)), v_6_4) + + for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src01 += v_int16::nlanes, src23 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) + v_store(row, v_dotprod(v_reinterpret_as_s16(vx_load_expand(src01)), v_1_4) + + v_dotprod(v_reinterpret_as_s16(vx_load_expand(src23)), v_6_4) + (v_reinterpret_as_s32(vx_load_expand(src4)) >> 16)); vx_cleanup(); @@ -102,13 +102,13 @@ template<> int PyrDownVecH(const uchar* src, int* row, int width) template<> int PyrDownVecH(const uchar* src, int* row, int width) { int x = 0; - const uchar *src0 = src, *src2 = src + 4, *src4 = src + 6; + const uchar *src01 = src, *src23 = src + 4, *src4 = src + 6; v_int16 v_1_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040001)); v_int16 v_6_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040006)); - for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src0 += v_int16::nlanes, src2 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) - v_store(row, v_dotprod(v_interleave_pairs(v_reinterpret_as_s16(vx_load_expand(src0))), v_1_4) + - v_dotprod(v_interleave_pairs(v_reinterpret_as_s16(vx_load_expand(src2))), v_6_4) + + for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src01 += v_int16::nlanes, src23 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) + v_store(row, v_dotprod(v_interleave_pairs(v_reinterpret_as_s16(vx_load_expand(src01))), v_1_4) + + v_dotprod(v_interleave_pairs(v_reinterpret_as_s16(vx_load_expand(src23))), v_6_4) + (v_reinterpret_as_s32(v_interleave_pairs(vx_load_expand(src4))) >> 16)); vx_cleanup(); @@ -150,13 +150,13 @@ template<> int PyrDownVecH(const uchar* src, int* row, int width) template<> int PyrDownVecH(const uchar* src, int* row, int width) { int x = 0; - const uchar *src0 = src, *src2 = src + 8, *src4 = src + 12; + const uchar *src01 = src, *src23 = src + 8, *src4 = src + 12; v_int16 v_1_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040001)); v_int16 v_6_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040006)); - for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src0 += v_int16::nlanes, src2 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) - v_store(row, v_dotprod(v_interleave_quads(v_reinterpret_as_s16(vx_load_expand(src0))), v_1_4) + - v_dotprod(v_interleave_quads(v_reinterpret_as_s16(vx_load_expand(src2))), v_6_4) + + for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src01 += v_int16::nlanes, src23 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) + v_store(row, v_dotprod(v_interleave_quads(v_reinterpret_as_s16(vx_load_expand(src01))), v_1_4) + + v_dotprod(v_interleave_quads(v_reinterpret_as_s16(vx_load_expand(src23))), v_6_4) + (v_reinterpret_as_s32(v_interleave_quads(vx_load_expand(src4))) >> 16)); vx_cleanup(); @@ -166,13 +166,13 @@ template<> int PyrDownVecH(const uchar* src, int* row, int width) template<> int PyrDownVecH(const short* src, int* row, int width) { int x = 0; - const short *src0 = src, *src2 = src + 2, *src4 = src + 3; + const short *src01 = src, *src23 = src + 2, *src4 = src + 3; v_int16 v_1_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040001)); v_int16 v_6_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040006)); - for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src0 += v_int16::nlanes, src2 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) - v_store(row, v_dotprod(vx_load(src0), v_1_4) + - v_dotprod(vx_load(src2), v_6_4) + + for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src01 += v_int16::nlanes, src23 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) + v_store(row, v_dotprod(vx_load(src01), v_1_4) + + v_dotprod(vx_load(src23), v_6_4) + (v_reinterpret_as_s32(vx_load(src4)) >> 16)); vx_cleanup(); @@ -181,13 +181,13 @@ template<> int PyrDownVecH(const short* src, int* row, int width) template<> int PyrDownVecH(const short* src, int* row, int width) { int x = 0; - const short *src0 = src, *src2 = src + 4, *src4 = src + 6; + const short *src01 = src, *src23 = src + 4, *src4 = src + 6; v_int16 v_1_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040001)); v_int16 v_6_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040006)); - for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src0 += v_int16::nlanes, src2 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) - v_store(row, v_dotprod(v_interleave_pairs(vx_load(src0)), v_1_4) + - v_dotprod(v_interleave_pairs(vx_load(src2)), v_6_4) + + for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src01 += v_int16::nlanes, src23 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) + v_store(row, v_dotprod(v_interleave_pairs(vx_load(src01)), v_1_4) + + v_dotprod(v_interleave_pairs(vx_load(src23)), v_6_4) + (v_reinterpret_as_s32(v_interleave_pairs(vx_load(src4))) >> 16)); vx_cleanup(); @@ -247,15 +247,15 @@ template<> int PyrDownVecH(const short* src, int* row, int width) template<> int PyrDownVecH(const ushort* src, int* row, int width) { int x = 0; - const ushort *src0 = src, *src2 = src + 2, *src4 = src + 3; + const ushort *src01 = src, *src23 = src + 2, *src4 = src + 3; v_int16 v_1_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040001)); v_int16 v_6_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040006)); v_uint16 v_half = vx_setall_u16(0x8000); v_int32 v_half15 = vx_setall_s32(0x00078000); - for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src0 += v_int16::nlanes, src2 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) - v_store(row, v_dotprod(v_reinterpret_as_s16(v_sub_wrap(vx_load(src0), v_half)), v_1_4) + - v_dotprod(v_reinterpret_as_s16(v_sub_wrap(vx_load(src2), v_half)), v_6_4) + + for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src01 += v_int16::nlanes, src23 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) + v_store(row, v_dotprod(v_reinterpret_as_s16(v_sub_wrap(vx_load(src01), v_half)), v_1_4) + + v_dotprod(v_reinterpret_as_s16(v_sub_wrap(vx_load(src23), v_half)), v_6_4) + v_reinterpret_as_s32(v_reinterpret_as_u32(vx_load(src4)) >> 16) + v_half15); vx_cleanup(); @@ -264,15 +264,15 @@ template<> int PyrDownVecH(const ushort* src, int* row, int widt template<> int PyrDownVecH(const ushort* src, int* row, int width) { int x = 0; - const ushort *src0 = src, *src2 = src + 4, *src4 = src + 6; + const ushort *src01 = src, *src23 = src + 4, *src4 = src + 6; v_int16 v_1_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040001)); v_int16 v_6_4 = v_reinterpret_as_s16(vx_setall_u32(0x00040006)); v_uint16 v_half = vx_setall_u16(0x8000); v_int32 v_half15 = vx_setall_s32(0x00078000); - for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src0 += v_int16::nlanes, src2 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) - v_store(row, v_dotprod(v_interleave_pairs(v_reinterpret_as_s16(v_sub_wrap(vx_load(src0), v_half))), v_1_4) + - v_dotprod(v_interleave_pairs(v_reinterpret_as_s16(v_sub_wrap(vx_load(src2), v_half))), v_6_4) + + for (; x <= width - v_int32::nlanes; x += v_int32::nlanes, src01 += v_int16::nlanes, src23 += v_int16::nlanes, src4 += v_int16::nlanes, row += v_int32::nlanes) + v_store(row, v_dotprod(v_interleave_pairs(v_reinterpret_as_s16(v_sub_wrap(vx_load(src01), v_half))), v_1_4) + + v_dotprod(v_interleave_pairs(v_reinterpret_as_s16(v_sub_wrap(vx_load(src23), v_half))), v_6_4) + v_reinterpret_as_s32(v_reinterpret_as_u32(v_interleave_pairs(vx_load(src4))) >> 16) + v_half15); vx_cleanup(); @@ -344,15 +344,15 @@ template<> int PyrDownVecH(const ushort* src, int* row, int widt template<> int PyrDownVecH(const float* src, float* row, int width) { int x = 0; - const float *src0 = src, *src2 = src + 2, *src4 = src + 4; + const float *src01 = src, *src23 = src + 2, *src4 = src + 3; v_float32 _4 = vx_setall_f32(4.f), _6 = vx_setall_f32(6.f); - for (; x <= width - v_float32::nlanes; x += v_float32::nlanes, src0 += 2*v_float32::nlanes, src2 += 2*v_float32::nlanes, src4 += 2*v_float32::nlanes, row+=v_float32::nlanes) + for (; x <= width - v_float32::nlanes; x += v_float32::nlanes, src01 += 2*v_float32::nlanes, src23 += 2*v_float32::nlanes, src4 += 2*v_float32::nlanes, row+=v_float32::nlanes) { v_float32 r0, r1, r2, r3, r4, rtmp; - v_load_deinterleave(src0, r0, r1); - v_load_deinterleave(src2, r2, r3); - v_load_deinterleave(src4, r4, rtmp); + v_load_deinterleave(src01, r0, r1); + v_load_deinterleave(src23, r2, r3); + v_load_deinterleave(src4, rtmp, r4); v_store(row, v_muladd(r2, _6, v_muladd(r1 + r3, _4, r0 + r4))); } vx_cleanup(); @@ -362,14 +362,14 @@ template<> int PyrDownVecH(const float* src, float* row, int wi template<> int PyrDownVecH(const float* src, float* row, int width) { int x = 0; - const float *src0 = src, *src2 = src + 4, *src4 = src + 6; + const float *src01 = src, *src23 = src + 4, *src4 = src + 6; v_float32 _4 = vx_setall_f32(4.f), _6 = vx_setall_f32(6.f); - for (; x <= width - 2*v_float32::nlanes; x += 2*v_float32::nlanes, src0 += 4*v_float32::nlanes, src2 += 4*v_float32::nlanes, src4 += 4*v_float32::nlanes, row += 2*v_float32::nlanes) + for (; x <= width - 2*v_float32::nlanes; x += 2*v_float32::nlanes, src01 += 4*v_float32::nlanes, src23 += 4*v_float32::nlanes, src4 += 4*v_float32::nlanes, row += 2*v_float32::nlanes) { v_float32 r0a, r0b, r1a, r1b, r2a, r2b, r3a, r3b, r4a, r4b, rtmpa, rtmpb; - v_load_deinterleave(src0, r0a, r0b, r1a, r1b); - v_load_deinterleave(src2, r2a, r2b, r3a, r3b); + v_load_deinterleave(src01, r0a, r0b, r1a, r1b); + v_load_deinterleave(src23, r2a, r2b, r3a, r3b); v_load_deinterleave(src4, rtmpa, rtmpb, r4a, r4b); v_store_interleave(row, v_muladd(r2a, _6, v_muladd(r1a + r3a, _4, r0a + r4a)), v_muladd(r2b, _6, v_muladd(r1b + r3b, _4, r0b + r4b))); } @@ -430,15 +430,15 @@ template<> int PyrDownVecH(const float* src, float* row, int wi template<> int PyrDownVecH(const double* src, double* row, int width) { int x = 0; - const double *src0 = src, *src2 = src + 2, *src4 = src + 4; + const double *src01 = src, *src23 = src + 2, *src4 = src + 3; v_float64 _4 = vx_setall_f64(4.f), _6 = vx_setall_f64(6.f); - for (; x <= width - v_float64::nlanes; x += v_float64::nlanes, src0 += 2*v_float64::nlanes, src2 += 2*v_float64::nlanes, src4 += 2*v_float64::nlanes, row += v_float64::nlanes) + for (; x <= width - v_float64::nlanes; x += v_float64::nlanes, src01 += 2*v_float64::nlanes, src23 += 2*v_float64::nlanes, src4 += 2*v_float64::nlanes, row += v_float64::nlanes) { v_float64 r0, r1, r2, r3, r4, rtmp; - v_load_deinterleave(src0, r0, r1); - v_load_deinterleave(src2, r2, r3); - v_load_deinterleave(src4, r4, rtmp); + v_load_deinterleave(src01, r0, r1); + v_load_deinterleave(src23, r2, r3); + v_load_deinterleave(src4, rtmp, r4); v_store(row, v_muladd(r2, _6, v_muladd(r1 + r3, _4, r0 + r4))); } vx_cleanup();