Added optimized loading to YUV2RGB_422 kernel

This commit is contained in:
Alexander Karsakov 2014-10-28 15:07:51 +03:00
parent 1466621f99
commit 643c906f3d
2 changed files with 16 additions and 4 deletions

View File

@ -5060,7 +5060,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert( scn == 2 && depth == CV_8U );
k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc,
opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d", dcn, bidx, uidx, yidx));
opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx,
src.offset % 4 == 0 && src.step % 4 == 0 ? " -D USE_OPTIMIZED_LOAD" : ""));
break;
}
case COLOR_BGR2YCrCb:

View File

@ -573,22 +573,33 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
{
if (y < rows )
{
__constant float* coeffs = c_YUV2RGBCoeffs_420;
#ifndef USE_OPTIMIZED_LOAD
float U = ((float) src[uidx]) - HALF_MAX;
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX;
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
#else
int load_src = *((__global int*) src);
float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff};
float U = vec_src[uidx] - HALF_MAX;
float V = vec_src[(2 + uidx) % 4] - HALF_MAX;
float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0];
float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0];
#endif
__constant float* coeffs = c_YUV2RGBCoeffs_420;
float ruv = fma(coeffs[4], V, 0.5f);
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
float buv = fma(coeffs[1], U, 0.5f);
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
dst[2 - bidx] = convert_uchar_sat(y00 + ruv);
dst[1] = convert_uchar_sat(y00 + guv);
dst[bidx] = convert_uchar_sat(y00 + buv);
#if dcn == 4
dst[3] = 255;
#endif
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv);
dst[dcn + 1] = convert_uchar_sat(y01 + guv);
dst[dcn + bidx] = convert_uchar_sat(y01 + buv);