From c4c86a899094116cc23a94dbc70dcb9b46af2575 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 12 Jan 2015 10:59:29 +0300 Subject: [PATCH] cvtColor XYZ 2 RGB f32 --- modules/imgproc/perf/perf_cvt_color.cpp | 6 +- modules/imgproc/src/color.cpp | 107 ++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 3 deletions(-) diff --git a/modules/imgproc/perf/perf_cvt_color.cpp b/modules/imgproc/perf/perf_cvt_color.cpp index 9682cfc5ee..e01257eed1 100644 --- a/modules/imgproc/perf/perf_cvt_color.cpp +++ b/modules/imgproc/perf/perf_cvt_color.cpp @@ -204,12 +204,12 @@ PERF_TEST_P(Size_CvtMode, cvtColor8u, ) { Size sz = GetParam(); - int mode = COLOR_YCrCb2RGB; + int mode = COLOR_XYZ2RGB; ChPair ch = getConversionInfo(mode); mode %= COLOR_COLORCVT_MAX; - Mat src(sz, CV_8UC(3)); - Mat dst(sz, CV_8UC(3)); + Mat src(sz, CV_32FC(3)); + Mat dst(sz, CV_32FC(3)); declare.time(100); declare.in(src, WARMUP_RNG).out(dst); diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 8dd5e2cfee..292a6df3a2 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -3421,6 +3421,7 @@ template struct RGB2XYZ_i C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; n *= 3; + for(int i = 0; i < n; i += 3, src += scn) { int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); @@ -3714,6 +3715,112 @@ template struct XYZ2RGB_f float coeffs[9]; }; +#if CV_SSE2 + +template <> +struct XYZ2RGB_f +{ + typedef float channel_type; + + XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) + : dstcn(_dstcn), blueIdx(_blueIdx) + { + memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0])); + if(blueIdx == 0) + { + std::swap(coeffs[0], coeffs[6]); + std::swap(coeffs[1], coeffs[7]); + std::swap(coeffs[2], coeffs[8]); + } + + v_c0 = _mm_set1_ps(coeffs[0]); + v_c1 = _mm_set1_ps(coeffs[1]); + v_c2 = _mm_set1_ps(coeffs[2]); + v_c3 = _mm_set1_ps(coeffs[3]); + v_c4 = _mm_set1_ps(coeffs[4]); + v_c5 = _mm_set1_ps(coeffs[5]); + v_c6 = _mm_set1_ps(coeffs[6]); + v_c7 = _mm_set1_ps(coeffs[7]); + v_c8 = _mm_set1_ps(coeffs[8]); + } + + void process(__m128 v_x, __m128 v_y, __m128 v_z, + __m128 & v_r, __m128 & v_g, __m128 & v_b) const + { + v_b = _mm_mul_ps(v_x, v_c0); + v_b = _mm_add_ps(v_b, _mm_mul_ps(v_y, v_c1)); + v_b = _mm_add_ps(v_b, _mm_mul_ps(v_z, v_c2)); + + v_g = _mm_mul_ps(v_x, v_c3); + v_g = _mm_add_ps(v_g, _mm_mul_ps(v_y, v_c4)); + v_g = _mm_add_ps(v_g, _mm_mul_ps(v_z, v_c5)); + + v_r = _mm_mul_ps(v_x, v_c6); + v_r = _mm_add_ps(v_r, _mm_mul_ps(v_y, v_c7)); + v_r = _mm_add_ps(v_r, _mm_mul_ps(v_z, v_c8)); + } + + void operator()(const float* src, float* dst, int n) const + { + int dcn = dstcn; + float alpha = ColorChannel::max(); + float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], + C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], + C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; + n *= 3; + int i = 0; + + if (dcn == 3) + { + for ( ; i <= n - 24; i += 24, dst += 24) + { + __m128 v_x0 = _mm_loadu_ps(src + i); + __m128 v_x1 = _mm_loadu_ps(src + i + 4); + __m128 v_y0 = _mm_loadu_ps(src + i + 8); + __m128 v_y1 = _mm_loadu_ps(src + i + 12); + __m128 v_z0 = _mm_loadu_ps(src + i + 16); + __m128 v_z1 = _mm_loadu_ps(src + i + 20); + + _MM_DEINTERLIV_PS(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1) + + __m128 v_r0, v_g0, v_b0; + process(v_x0, v_y0, v_z0, + v_r0, v_g0, v_b0); + + __m128 v_r1, v_g1, v_b1; + process(v_x1, v_y1, v_z1, + v_r1, v_g1, v_b1); + + _MM_INTERLIV_PS(v_b0, v_b1, v_g0, v_g1, v_r0, v_r1) + + _mm_storeu_ps(dst, v_b0); + _mm_storeu_ps(dst + 4, v_b1); + _mm_storeu_ps(dst + 8, v_g0); + _mm_storeu_ps(dst + 12, v_g1); + _mm_storeu_ps(dst + 16, v_r0); + _mm_storeu_ps(dst + 20, v_r1); + } + + } + + for( ; i < n; i += 3, dst += dcn) + { + float B = src[i]*C0 + src[i+1]*C1 + src[i+2]*C2; + float G = src[i]*C3 + src[i+1]*C4 + src[i+2]*C5; + float R = src[i]*C6 + src[i+1]*C7 + src[i+2]*C8; + dst[0] = B; dst[1] = G; dst[2] = R; + if( dcn == 4 ) + dst[3] = alpha; + } + } + int dstcn, blueIdx; + float coeffs[9]; + + __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; +}; + +#endif // CV_SSE2 + template struct XYZ2RGB_i {