Merge pull request #26071 from tingboliao:4.x

Remove the redundant codes of cv::convertMaps and mRGBA2RGBA<uchar> #26071 (1) cv::convertMaps: the branch [else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 ) if( nninterpolate )] is unreachable, as the condition is satisfied in lines 1959 to 1961, calculated in advance and return directly. (2) mRGBA2RGBA<uchar>: dst[0], dst[1], dst[2] and dst[3] is calculated repeatedly. Introduced in https://github.com/opencv/opencv/pull/13440 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2025-07-21 11:36:46 +08:00 · 2024-09-03 12:56:37 +08:00 · 2024-09-03 12:56:37 +08:00 · 88f99edc65
commit 88f99edc65
parent e9c3e1acb5
2 changed files with 33 additions and 57 deletions
--- a/modules/imgproc/src/color_rgb.simd.hpp
+++ b/modules/imgproc/src/color_rgb.simd.hpp
@ -1088,11 +1088,6 @@ struct mRGBA2RGBA<uchar>

            uchar v3_half = v3 / 2;

-            dst[0] = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
-            dst[1] = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
-            dst[2] = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
-            dst[3] = v3;
-
            dst[0] = (v3==0)? 0 : saturate_cast<uchar>((v0 * max_val + v3_half) / v3);
            dst[1] = (v3==0)? 0 : saturate_cast<uchar>((v1 * max_val + v3_half) / v3);
            dst[2] = (v3==0)? 0 : saturate_cast<uchar>((v2 * max_val + v3_half) / v3);
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -2082,65 +2082,46 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
        }
        else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
        {
-            if( nninterpolate )
+            #if CV_TRY_SSE4_1
+            if( useSSE4_1 )
+                opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
+            else
+            #endif
            {
                #if CV_SIMD128
-                int span = VTraits<v_float32x4>::vlanes();
                {
-                    for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
-                        v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
-                                                 v_round(v_load(src1f + x + span))));
+                    v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
+                    v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
+                    v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
+                    int span = VTraits<v_uint16x8>::vlanes();
+                    for (; x <= size.width - span; x += span )
+                    {
+                        v_float32x4 v_src0[2], v_src1[2];
+                        v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
+                        v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
+                        v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
+                        v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
+                        v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
+                        v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
+
+                        v_int16x8 v_dst[2];
+                        v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
+                        v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
+                        v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
+
+                        v_store(dst2 + x, v_pack_u(
+                            v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
+                            v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
+                    }
                }
                #endif
                for( ; x < size.width; x++ )
                {
-                    dst1[x*2] = saturate_cast<short>(src1f[x*2]);
-                    dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
-                }
-            }
-            else
-            {
-                #if CV_TRY_SSE4_1
-                if( useSSE4_1 )
-                    opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
-                else
-                #endif
-                {
-                    #if CV_SIMD128
-                    {
-                        v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
-                        v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
-                        v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
-                        int span = VTraits<v_uint16x8>::vlanes();
-                        for (; x <= size.width - span; x += span )
-                        {
-                            v_float32x4 v_src0[2], v_src1[2];
-                            v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
-                            v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
-                            v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
-                            v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
-                            v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
-                            v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
-
-                            v_int16x8 v_dst[2];
-                            v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
-                            v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
-                            v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
-
-                            v_store(dst2 + x, v_pack_u(
-                                v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
-                                v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
-                        }
-                    }
-                    #endif
-                    for( ; x < size.width; x++ )
-                    {
-                        int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
-                        int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
-                        dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
-                        dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
-                        dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
-                    }
+                    int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
+                    int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
+                    dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
+                    dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
+                    dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
                }
            }
        }