diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp index a64cd7ba6a..09847d4a4b 100644 --- a/modules/core/src/matmul.simd.hpp +++ b/modules/core/src/matmul.simd.hpp @@ -2544,7 +2544,7 @@ double dotProd_32s(const int* src1, const int* src2, int len) #if CV_SIMD_WIDTH == 16 const int wstep = step * 2; v_float64 v_sum1 = vx_setzero_f64(); - for (; i < len - wstep; i += wstep, src1 += wstep, src2 += wstep) + for (; i <= len - wstep; i += wstep, src1 += wstep, src2 += wstep) { v_int32 v_src10 = vx_load(src1); v_int32 v_src20 = vx_load(src2); @@ -2555,7 +2555,7 @@ double dotProd_32s(const int* src1, const int* src2, int len) } v_sum0 = v_add(v_sum0, v_sum1); #endif - for (; i < len - step; i += step, src1 += step, src2 += step) + for (; i <= len - step; i += step, src1 += step, src2 += step) { v_int32 v_src1 = vx_load(src1); v_int32 v_src2 = vx_load(src2); diff --git a/modules/imgproc/src/color_lab.cpp b/modules/imgproc/src/color_lab.cpp index dd6fb52949..fe9888e381 100644 --- a/modules/imgproc/src/color_lab.cpp +++ b/modules/imgproc/src/color_lab.cpp @@ -1953,7 +1953,7 @@ struct RGB2Lab_f { const int vsize = VTraits::vlanes(); static const int nPixels = vsize*2; - for(; i < n - 3*nPixels; i += 3*nPixels, src += scn*nPixels) + for(; i <= n - 3*nPixels; i += 3*nPixels, src += scn*nPixels) { v_float32 rvec0, gvec0, bvec0, rvec1, gvec1, bvec1; if(scn == 3) @@ -3297,7 +3297,7 @@ struct RGB2Luvinterpolate { const int vsize = VTraits::vlanes(); static const int nPixels = vsize*2; - for(; i < n - 3*nPixels; i += 3*nPixels, src += scn*nPixels) + for(; i <= n - 3*nPixels; i += 3*nPixels, src += scn*nPixels) { /* int R = src[bIdx], G = src[1], B = src[bIdx^2]; diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 4b317c6a5a..92af9cacc4 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -1325,7 +1325,7 @@ struct VResizeLinearVec_32s8u v_store(dst + x, v_rshr_pack_u<2>(v_add(v_mul_hi(v_pack(v_shr<4>(vx_load(S0 + x)), v_shr<4>(vx_load(S0 + x + VTraits::vlanes()))), b0), v_mul_hi(v_pack(v_shr<4>(vx_load(S1 + x)), v_shr<4>(vx_load(S1 + x + VTraits::vlanes()))), b1)), v_add(v_mul_hi(v_pack(v_shr<4>(vx_load(S0 + x + 2 * VTraits::vlanes())), v_shr<4>(vx_load(S0 + x + 3 * VTraits::vlanes()))), b0), v_mul_hi(v_pack(v_shr<4>(vx_load(S1 + x + 2 * VTraits::vlanes())), v_shr<4>(vx_load(S1 + x + 3 * VTraits::vlanes()))), b1)))); - for( ; x < width - VTraits::vlanes(); x += VTraits::vlanes()) + for( ; x <= width - VTraits::vlanes(); x += VTraits::vlanes()) v_rshr_pack_u_store<2>(dst + x, v_add(v_mul_hi(v_pack(v_shr<4>(vx_load(S0 + x)), v_shr<4>(vx_load(S0 + x + VTraits::vlanes()))), b0), v_mul_hi(v_pack(v_shr<4>(vx_load(S1 + x)), v_shr<4>(vx_load(S1 + x + VTraits::vlanes()))), b1))); return x; @@ -1349,7 +1349,7 @@ struct VResizeLinearVec_32f16u for (; x <= width - VTraits::vlanes(); x += VTraits::vlanes()) v_store(dst + x, v_pack_u(v_round(v_muladd(vx_load(S0 + x ), b0, v_mul(vx_load(S1 + x), b1))), v_round(v_muladd(vx_load(S0 + x + VTraits::vlanes()), b0, v_mul(vx_load(S1 + x + VTraits::vlanes()), b1))))); - for( ; x < width - VTraits::vlanes(); x += VTraits::vlanes()) + for( ; x <= width - VTraits::vlanes(); x += VTraits::vlanes()) { v_int32 t0 = v_round(v_muladd(vx_load(S0 + x), b0, v_mul(vx_load(S1 + x), b1))); v_store_low(dst + x, v_pack_u(t0, t0)); @@ -1376,7 +1376,7 @@ struct VResizeLinearVec_32f16s for (; x <= width - VTraits::vlanes(); x += VTraits::vlanes()) v_store(dst + x, v_pack(v_round(v_muladd(vx_load(S0 + x ), b0, v_mul(vx_load(S1 + x), b1))), v_round(v_muladd(vx_load(S0 + x + VTraits::vlanes()), b0, v_mul(vx_load(S1 + x + VTraits::vlanes()), b1))))); - for( ; x < width - VTraits::vlanes(); x += VTraits::vlanes()) + for( ; x <= width - VTraits::vlanes(); x += VTraits::vlanes()) { v_int32 t0 = v_round(v_muladd(vx_load(S0 + x), b0, v_mul(vx_load(S1 + x), b1))); v_store_low(dst + x, v_pack(t0, t0));