Merge pull request #16504 from alalek:issue_16501

2025-07-24 14:06:27 +08:00 · 2020-02-04 16:39:17 +00:00 · 2020-02-04 16:39:17 +00:00 · d917f889b1
commit d917f889b1
parent e50acb923e f67c8e37d6
1 changed files with 1 additions and 85 deletions
--- a/modules/imgproc/src/resize.cpp
+++ b/modules/imgproc/src/resize.cpp
@ -1674,93 +1674,9 @@ struct HResizeLinearVecU8_X4
                }
            }
        }
-        else if(cn < 9)
-        {
-            const int step = 8;
-            const int len0 = xmax & -step;
-            for( ; k <= (count - 2); k+=2 )
-            {
-                const uchar *S0 = src[k];
-                int *D0 = dst[k];
-                const uchar *S1 = src[k+1];
-                int *D1 = dst[k+1];
-
-                for( dx = 0; dx < len0; dx += cn )
-                {
-                    v_int16x8 a0 = v_load(alpha+dx*2);
-                    v_int16x8 a1 = v_load(alpha+dx*2 + 8);
-                    v_uint16x8 s0, s1;
-                    v_zip(v_load_expand(S0+xofs[dx]), v_load_expand(S0+xofs[dx]+cn), s0, s1);
-                    v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(s0), a0));
-                    v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(s1), a1));
-                    v_zip(v_load_expand(S1+xofs[dx]), v_load_expand(S1+xofs[dx]+cn), s0, s1);
-                    v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(s0), a0));
-                    v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(s1), a1));
-                }
-            }
-            for( ; k < count; k++ )
-            {
-                const uchar *S = src[k];
-                int *D = dst[k];
-                for( dx = 0; dx < len0; dx += cn )
-                {
-                    v_int16x8 a0 = v_load(alpha+dx*2);
-                    v_int16x8 a1 = v_load(alpha+dx*2 + 8);
-                    v_uint16x8 s0, s1;
-                    v_zip(v_load_expand(S+xofs[dx]), v_load_expand(S+xofs[dx]+cn), s0, s1);
-                    v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(s0), a0));
-                    v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(s1), a1));
-                }
-            }
-        }
        else
        {
-            const int step = 16;
-            const int len0 = (xmax - cn) & -step;
-            for( ; k <= (count - 2); k+=2 )
-            {
-                const uchar *S0 = src[k];
-                int *D0 = dst[k];
-                const uchar *S1 = src[k+1];
-                int *D1 = dst[k+1];
-
-                for( dx = 0; dx < len0; dx += step )
-                {
-                    v_int16x8 a0 = v_load(alpha+dx*2);
-                    v_int16x8 a1 = v_load(alpha+dx*2 + 8);
-                    v_int16x8 a2 = v_load(alpha+dx*2 + 16);
-                    v_int16x8 a3 = v_load(alpha+dx*2 + 24);
-                    v_uint8x16 s01, s23;
-                    v_zip(v_lut(S0, xofs+dx), v_lut(S0+cn, xofs+dx), s01, s23);
-                    v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(v_expand_low(s01)), a0));
-                    v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(v_expand_high(s01)), a1));
-                    v_store(&D0[dx+8], v_dotprod(v_reinterpret_as_s16(v_expand_low(s23)), a2));
-                    v_store(&D0[dx+12], v_dotprod(v_reinterpret_as_s16(v_expand_high(s23)), a3));
-                    v_zip(v_lut(S1, xofs+dx), v_lut(S1+cn, xofs+dx), s01, s23);
-                    v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(v_expand_low(s01)), a0));
-                    v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(v_expand_high(s01)), a1));
-                    v_store(&D1[dx+8], v_dotprod(v_reinterpret_as_s16(v_expand_low(s23)), a2));
-                    v_store(&D1[dx+12], v_dotprod(v_reinterpret_as_s16(v_expand_high(s23)), a3));
-                }
-            }
-            for( ; k < count; k++ )
-            {
-                const uchar *S = src[k];
-                int *D = dst[k];
-                for( dx = 0; dx < len0; dx += step )
-                {
-                    v_int16x8 a0 = v_load(alpha+dx*2);
-                    v_int16x8 a1 = v_load(alpha+dx*2 + 8);
-                    v_int16x8 a2 = v_load(alpha+dx*2 + 16);
-                    v_int16x8 a3 = v_load(alpha+dx*2 + 24);
-                    v_uint8x16 s01, s23;
-                    v_zip(v_lut(S, xofs+dx), v_lut(S+cn, xofs+dx), s01, s23);
-                    v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(v_expand_low(s01)), a0));
-                    v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(v_expand_high(s01)), a1));
-                    v_store(&D[dx+8], v_dotprod(v_reinterpret_as_s16(v_expand_low(s23)), a2));
-                    v_store(&D[dx+12], v_dotprod(v_reinterpret_as_s16(v_expand_high(s23)), a3));
-                }
-            }
+            return 0;  // images with channels >4 are out of optimization scope
        }
        return dx;
    }