From 47c792ecc0d797097b048c8643cde439c48a7b18 Mon Sep 17 00:00:00 2001 From: k-shinotsuka Date: Tue, 4 Oct 2016 22:27:48 +0900 Subject: [PATCH] add simd code when scn equals 4 at RGB2Luv_b(). --- modules/imgproc/src/color.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 6d0d0f8fad..47b66f96f9 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -5917,6 +5917,26 @@ struct RGB2Luv_b if (jr) src -= jr, j -= jr; } + else if (scn == 4 && haveSIMD) + { + for ( ; j <= (dn * 3 - 12); j += 12, src += 16) + { + __m128i v_src = _mm_loadu_si128((__m128i const *)src); + + __m128i v_src_lo = _mm_unpacklo_epi8(v_src, v_zero); + __m128i v_src_hi = _mm_unpackhi_epi8(v_src, v_zero); + _mm_storeu_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_lo, v_zero)), v_scale_inv)); + _mm_storeu_ps(buf + j + 3, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_lo, v_zero)), v_scale_inv)); + _mm_storeu_ps(buf + j + 6, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_hi, v_zero)), v_scale_inv)); + float tmp = buf[j + 8]; + _mm_storeu_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_shuffle_epi32(_mm_unpackhi_epi16(v_src_hi, v_zero), 0x90)), v_scale_inv)); + buf[j + 8] = tmp; + } + + int jr = j % 3; + if (jr) + src -= jr, j -= jr; + } #endif for( ; j < dn*3; j += 3, src += scn ) {