mirror of
https://github.com/opencv/opencv.git
synced 2024-11-27 20:50:25 +08:00
Merge pull request #26071 from tingboliao:4.x
Remove the redundant codes of cv::convertMaps and mRGBA2RGBA<uchar> #26071 (1) cv::convertMaps: the branch [else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 ) if( nninterpolate )] is unreachable, as the condition is satisfied in lines 1959 to 1961, calculated in advance and return directly. (2) mRGBA2RGBA<uchar>: dst[0], dst[1], dst[2] and dst[3] is calculated repeatedly. Introduced in https://github.com/opencv/opencv/pull/13440 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
e9c3e1acb5
commit
88f99edc65
@ -1088,11 +1088,6 @@ struct mRGBA2RGBA<uchar>
|
||||
|
||||
uchar v3_half = v3 / 2;
|
||||
|
||||
dst[0] = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
|
||||
dst[1] = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
|
||||
dst[2] = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
|
||||
dst[3] = v3;
|
||||
|
||||
dst[0] = (v3==0)? 0 : saturate_cast<uchar>((v0 * max_val + v3_half) / v3);
|
||||
dst[1] = (v3==0)? 0 : saturate_cast<uchar>((v1 * max_val + v3_half) / v3);
|
||||
dst[2] = (v3==0)? 0 : saturate_cast<uchar>((v2 * max_val + v3_half) / v3);
|
||||
|
@ -2082,65 +2082,46 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
|
||||
}
|
||||
else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
|
||||
{
|
||||
if( nninterpolate )
|
||||
#if CV_TRY_SSE4_1
|
||||
if( useSSE4_1 )
|
||||
opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if CV_SIMD128
|
||||
int span = VTraits<v_float32x4>::vlanes();
|
||||
{
|
||||
for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
|
||||
v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
|
||||
v_round(v_load(src1f + x + span))));
|
||||
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
|
||||
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
|
||||
v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
|
||||
int span = VTraits<v_uint16x8>::vlanes();
|
||||
for (; x <= size.width - span; x += span )
|
||||
{
|
||||
v_float32x4 v_src0[2], v_src1[2];
|
||||
v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
|
||||
v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
|
||||
v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
|
||||
v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
|
||||
v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
|
||||
v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
|
||||
|
||||
v_int16x8 v_dst[2];
|
||||
v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
|
||||
v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
|
||||
v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
|
||||
|
||||
v_store(dst2 + x, v_pack_u(
|
||||
v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
|
||||
v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; x < size.width; x++ )
|
||||
{
|
||||
dst1[x*2] = saturate_cast<short>(src1f[x*2]);
|
||||
dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if CV_TRY_SSE4_1
|
||||
if( useSSE4_1 )
|
||||
opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if CV_SIMD128
|
||||
{
|
||||
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
|
||||
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
|
||||
v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
|
||||
int span = VTraits<v_uint16x8>::vlanes();
|
||||
for (; x <= size.width - span; x += span )
|
||||
{
|
||||
v_float32x4 v_src0[2], v_src1[2];
|
||||
v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
|
||||
v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
|
||||
v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
|
||||
v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
|
||||
v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
|
||||
v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
|
||||
|
||||
v_int16x8 v_dst[2];
|
||||
v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
|
||||
v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
|
||||
v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
|
||||
|
||||
v_store(dst2 + x, v_pack_u(
|
||||
v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
|
||||
v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; x < size.width; x++ )
|
||||
{
|
||||
int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
|
||||
int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
|
||||
dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
|
||||
dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
|
||||
dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
|
||||
}
|
||||
int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
|
||||
int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
|
||||
dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
|
||||
dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
|
||||
dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user