fixed wrong equivalence in YUV conversion (#7481)

* fixed wrong equivalence in YUV conversion

* fixed channel order from YVU to YUV
This commit is contained in:
Rostislav Vasilikhin 2016-11-23 18:39:18 +04:00 committed by Vadim Pisarevsky
parent b8c875d4a1
commit 7db43f9fff
2 changed files with 272 additions and 153 deletions

View File

@ -105,6 +105,39 @@
namespace cv
{
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
const float B2YF = 0.114f;
const float G2YF = 0.587f;
const float R2YF = 0.299f;
//to YCbCr
const float YCBF = 0.564f; // == 1/2/(1-B2YF)
const float YCRF = 0.713f; // == 1/2/(1-R2YF)
const int YCBI = 9241; // == YCBF*16384
const int YCRI = 11682; // == YCRF*16384
//to YUV
const float B2UF = 0.492f;
const float R2VF = 0.877f;
const int B2UI = 8061; // == B2UF*16384
const int R2VI = 14369; // == R2VF*16384
//from YUV
const float U2BF = 2.032f;
const float U2GF = -0.395f;
const float V2GF = -0.581f;
const float V2RF = 1.140f;
const int U2BI = 33292;
const int U2GI = -6472;
const int V2GI = -9519;
const int V2RI = 18678;
//from YCrCb
const float CB2BF = 1.773f;
const float CB2GF = -0.344f;
const float CR2GF = -0.714f;
const float CR2RF = 1.403f;
const int CB2BI = 29049;
const int CB2GI = -5636;
const int CR2GI = -11698;
const int CR2RI = 22987;
// computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
@ -499,9 +532,9 @@ struct IPPColor2GrayFunctor
IPPColor2GrayFunctor(ippiColor2GrayFunc _func) :
ippiColorToGray(_func)
{
coeffs[0] = 0.114f;
coeffs[1] = 0.587f;
coeffs[2] = 0.299f;
coeffs[0] = B2YF;
coeffs[1] = G2YF;
coeffs[2] = R2YF;
}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
@ -1143,9 +1176,9 @@ enum
{
yuv_shift = 14,
xyz_shift = 12,
R2Y = 4899,
G2Y = 9617,
B2Y = 1868,
R2Y = 4899, // == R2YF*16384
G2Y = 9617, // == G2YF*16384
B2Y = 1868, // == B2YF*16384
BLOCK_SIZE = 256
};
@ -1313,7 +1346,7 @@ template<typename _Tp> struct RGB2Gray
RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
{
static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
static const float coeffs0[] = { R2YF, G2YF, B2YF };
memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
if(blueIdx == 0)
std::swap(coeffs[0], coeffs[2]);
@ -1460,7 +1493,7 @@ struct RGB2Gray<float>
RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
{
static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
static const float coeffs0[] = { R2YF, G2YF, B2YF };
memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
if(blueIdx == 0)
std::swap(coeffs[0], coeffs[2]);
@ -1650,7 +1683,7 @@ struct RGB2Gray<float>
RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
{
static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
static const float coeffs0[] = { R2YF, G2YF, B2YF };
memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
if(blueIdx == 0)
std::swap(coeffs[0], coeffs[2]);
@ -1772,16 +1805,18 @@ template<typename _Tp> struct RGB2YCrCb_f
{
typedef _Tp channel_type;
RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx)
RGB2YCrCb_f(int _srccn, int _blueIdx, bool _isCrCb) : srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
static const float coeffs_crb[] = { R2YF, G2YF, B2YF, YCRF, YCBF };
static const float coeffs_yuv[] = { R2YF, G2YF, B2YF, R2VF, B2UF };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
}
void operator()(const _Tp* src, _Tp* dst, int n) const
{
int scn = srccn, bidx = blueIdx;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const _Tp delta = ColorChannel<_Tp>::half();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
n *= 3;
@ -1790,10 +1825,11 @@ template<typename _Tp> struct RGB2YCrCb_f
_Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
_Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta);
_Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta);
dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
dst[i] = Y; dst[i+1+yuvOrder] = Cr; dst[i+2-yuvOrder] = Cb;
}
}
int srccn, blueIdx;
bool isCrCb;
float coeffs[5];
};
@ -1804,11 +1840,12 @@ struct RGB2YCrCb_f<float>
{
typedef float channel_type;
RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) :
srccn(_srccn), blueIdx(_blueIdx)
RGB2YCrCb_f(int _srccn, int _blueIdx, bool _isCrCb) :
srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
static const float coeffs_crb[] = { R2YF, G2YF, B2YF, YCRF, YCBF };
static const float coeffs_yuv[] = { R2YF, G2YF, B2YF, R2VF, B2UF };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if(blueIdx==0)
std::swap(coeffs[0], coeffs[2]);
@ -1823,6 +1860,7 @@ struct RGB2YCrCb_f<float>
void operator()(const float * src, float * dst, int n) const
{
int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const float delta = ColorChannel<float>::half();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
n *= 3;
@ -1832,8 +1870,8 @@ struct RGB2YCrCb_f<float>
{
float32x4x3_t v_src = vld3q_f32(src), v_dst;
v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
v_dst.val[1+yuvOrder] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
v_dst.val[2-yuvOrder] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
vst3q_f32(dst + i, v_dst);
}
@ -1843,8 +1881,8 @@ struct RGB2YCrCb_f<float>
float32x4x4_t v_src = vld4q_f32(src);
float32x4x3_t v_dst;
v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
v_dst.val[1+yuvOrder] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
v_dst.val[2-yuvOrder] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
vst3q_f32(dst + i, v_dst);
}
@ -1854,10 +1892,11 @@ struct RGB2YCrCb_f<float>
float Y = src[0]*C0 + src[1]*C1 + src[2]*C2;
float Cr = (src[bidx^2] - Y)*C3 + delta;
float Cb = (src[bidx] - Y)*C4 + delta;
dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
dst[i] = Y; dst[i+1+yuvOrder] = Cr; dst[i+2-yuvOrder] = Cb;
}
}
int srccn, blueIdx;
bool isCrCb;
float coeffs[5];
float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta;
};
@ -1869,11 +1908,12 @@ struct RGB2YCrCb_f<float>
{
typedef float channel_type;
RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) :
srccn(_srccn), blueIdx(_blueIdx)
RGB2YCrCb_f(int _srccn, int _blueIdx, bool _isCrCb) :
srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
static const float coeffs_crb[] = { R2YF, G2YF, B2YF, YCRF, YCBF };
static const float coeffs_yuv[] = { R2YF, G2YF, B2YF, R2VF, B2UF };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]);
@ -1901,6 +1941,7 @@ struct RGB2YCrCb_f<float>
void operator()(const float * src, float * dst, int n) const
{
int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const float delta = ColorChannel<float>::half();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
n *= 3;
@ -1934,14 +1975,19 @@ struct RGB2YCrCb_f<float>
process(v_r1, v_g1, v_b1,
v_y1, v_cr1, v_cb1);
_mm_interleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
if(isCrCb)
_mm_interleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
else //YUV
{
_mm_interleave_ps(v_y0, v_y1, v_cb0, v_cb1, v_cr0, v_cr1);
}
_mm_storeu_ps(dst + i, v_y0);
_mm_storeu_ps(dst + i + 4, v_y1);
_mm_storeu_ps(dst + i + 8, v_cr0);
_mm_storeu_ps(dst + i + 12, v_cr1);
_mm_storeu_ps(dst + i + 16, v_cb0);
_mm_storeu_ps(dst + i + 20, v_cb1);
_mm_storeu_ps(dst + i + 8 + yuvOrder*8, v_cr0);
_mm_storeu_ps(dst + i + 12 + yuvOrder*8, v_cr1);
_mm_storeu_ps(dst + i + 16 - yuvOrder*8, v_cb0);
_mm_storeu_ps(dst + i + 20 - yuvOrder*8, v_cb1);
}
}
@ -1950,10 +1996,11 @@ struct RGB2YCrCb_f<float>
float Y = src[0]*C0 + src[1]*C1 + src[2]*C2;
float Cr = (src[bidx^2] - Y)*C3 + delta;
float Cb = (src[bidx] - Y)*C4 + delta;
dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
dst[i] = Y; dst[i+1+yuvOrder] = Cr; dst[i+2-yuvOrder] = Cb;
}
}
int srccn, blueIdx;
bool isCrCb;
float coeffs[5];
__m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_delta;
bool haveSIMD;
@ -1965,16 +2012,18 @@ template<typename _Tp> struct RGB2YCrCb_i
{
typedef _Tp channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
: srccn(_srccn), blueIdx(_blueIdx)
RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
}
void operator()(const _Tp* src, _Tp* dst, int n) const
{
int scn = srccn, bidx = blueIdx;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift);
n *= 3;
@ -1984,11 +2033,12 @@ template<typename _Tp> struct RGB2YCrCb_i
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<_Tp>(Y);
dst[i+1] = saturate_cast<_Tp>(Cr);
dst[i+2] = saturate_cast<_Tp>(Cb);
dst[i+1+yuvOrder] = saturate_cast<_Tp>(Cr);
dst[i+2-yuvOrder] = saturate_cast<_Tp>(Cb);
}
}
int srccn, blueIdx;
bool isCrCb;
int coeffs[5];
};
@ -1999,11 +2049,12 @@ struct RGB2YCrCb_i<uchar>
{
typedef uchar channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
: srccn(_srccn), blueIdx(_blueIdx)
RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]);
@ -2019,6 +2070,7 @@ struct RGB2YCrCb_i<uchar>
void operator()(const uchar * src, uchar * dst, int n) const
{
int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<uchar>::half()*(1 << yuv_shift);
n *= 3;
@ -2067,8 +2119,8 @@ struct RGB2YCrCb_i<uchar>
v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
v_dst.val[0] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1)));
v_dst.val[1] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1)));
v_dst.val[2] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1)));
v_dst.val[1+yuvOrder] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1)));
v_dst.val[2-yuvOrder] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1)));
vst3_u8(dst + i, v_dst);
}
@ -2079,11 +2131,12 @@ struct RGB2YCrCb_i<uchar>
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<uchar>(Y);
dst[i+1] = saturate_cast<uchar>(Cr);
dst[i+2] = saturate_cast<uchar>(Cb);
dst[i+1+yuvOrder] = saturate_cast<uchar>(Cr);
dst[i+2-yuvOrder] = saturate_cast<uchar>(Cb);
}
}
int srccn, blueIdx, coeffs[5];
bool isCrCb;
int16x4_t v_c0, v_c1, v_c2;
int32x4_t v_c3, v_c4, v_delta, v_delta2;
};
@ -2093,11 +2146,12 @@ struct RGB2YCrCb_i<ushort>
{
typedef ushort channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
: srccn(_srccn), blueIdx(_blueIdx)
RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]);
@ -2113,6 +2167,7 @@ struct RGB2YCrCb_i<ushort>
void operator()(const ushort * src, ushort * dst, int n) const
{
int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<ushort>::half()*(1 << yuv_shift);
n *= 3;
@ -2155,8 +2210,8 @@ struct RGB2YCrCb_i<ushort>
v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
v_dst.val[0] = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1));
v_dst.val[1] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1));
v_dst.val[2] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1));
v_dst.val[1+yuvOrder] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1));
v_dst.val[2-yuvOrder] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1));
vst3q_u16(dst + i, v_dst);
}
@ -2189,8 +2244,8 @@ struct RGB2YCrCb_i<ushort>
v_Cb = vshrq_n_s32(vaddq_s32(v_Cb, v_delta2), yuv_shift);
v_dst.val[0] = vqmovun_s32(v_Y);
v_dst.val[1] = vqmovun_s32(v_Cr);
v_dst.val[2] = vqmovun_s32(v_Cb);
v_dst.val[1+yuvOrder] = vqmovun_s32(v_Cr);
v_dst.val[2-yuvOrder] = vqmovun_s32(v_Cb);
vst3_u16(dst + i, v_dst);
}
@ -2201,11 +2256,12 @@ struct RGB2YCrCb_i<ushort>
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<ushort>(Y);
dst[i+1] = saturate_cast<ushort>(Cr);
dst[i+2] = saturate_cast<ushort>(Cb);
dst[i+1+yuvOrder] = saturate_cast<ushort>(Cr);
dst[i+2-yuvOrder] = saturate_cast<ushort>(Cb);
}
}
int srccn, blueIdx, coeffs[5];
bool isCrCb;
int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2;
};
@ -2216,11 +2272,12 @@ struct RGB2YCrCb_i<uchar>
{
typedef uchar channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
: srccn(_srccn), blueIdx(_blueIdx)
RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]);
@ -2229,7 +2286,10 @@ struct RGB2YCrCb_i<uchar>
v_delta_32 = _mm_set1_epi32(delta);
short delta2 = 1 + ColorChannel<uchar>::half() * 2;
v_coeff = _mm_set_epi16(delta2, (short)coeffs[4], delta2, (short)coeffs[3], delta2, (short)coeffs[4], delta2, (short)coeffs[3]);
v_shuffle2 = _mm_set_epi8(0x0, 0x0, 0x0, 0x0, 0xf, 0xe, 0xc, 0xb, 0xa, 0x8, 0x7, 0x6, 0x4, 0x3, 0x2, 0x0);
if(isCrCb)
v_shuffle2 = _mm_set_epi8(0x0, 0x0, 0x0, 0x0, 0xf, 0xe, 0xc, 0xb, 0xa, 0x8, 0x7, 0x6, 0x4, 0x3, 0x2, 0x0);
else //if YUV
v_shuffle2 = _mm_set_epi8(0x0, 0x0, 0x0, 0x0, 0xe, 0xf, 0xc, 0xa, 0xb, 0x8, 0x6, 0x7, 0x4, 0x2, 0x3, 0x0);
haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1);
}
@ -2283,6 +2343,7 @@ struct RGB2YCrCb_i<uchar>
void operator()(const uchar * src, uchar * dst, int n) const
{
int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<uchar>::half()*(1 << yuv_shift);
n *= 3;
@ -2359,8 +2420,8 @@ struct RGB2YCrCb_i<uchar>
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<uchar>(Y);
dst[i+1] = saturate_cast<uchar>(Cr);
dst[i+2] = saturate_cast<uchar>(Cb);
dst[i+1+yuvOrder] = saturate_cast<uchar>(Cr);
dst[i+2-yuvOrder] = saturate_cast<uchar>(Cb);
}
}
@ -2368,6 +2429,7 @@ struct RGB2YCrCb_i<uchar>
__m128i v_coeff;
__m128i v_shuffle2;
int srccn, blueIdx, coeffs[5];
bool isCrCb;
bool haveSIMD;
};
@ -2376,11 +2438,12 @@ struct RGB2YCrCb_i<ushort>
{
typedef ushort channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
: srccn(_srccn), blueIdx(_blueIdx)
RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]);
@ -2437,6 +2500,7 @@ struct RGB2YCrCb_i<ushort>
void operator()(const ushort * src, ushort * dst, int n) const
{
int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<ushort>::half()*(1 << yuv_shift);
n *= 3;
@ -2471,14 +2535,17 @@ struct RGB2YCrCb_i<ushort>
process(v_r1, v_g1, v_b1,
v_y1, v_cr1, v_cb1);
_mm_interleave_epi16(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
if(isCrCb)
_mm_interleave_epi16(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
else //YUV
_mm_interleave_epi16(v_y0, v_y1, v_cb0, v_cb1, v_cr0, v_cr1);
_mm_storeu_si128((__m128i *)(dst + i), v_y0);
_mm_storeu_si128((__m128i *)(dst + i + 8), v_y1);
_mm_storeu_si128((__m128i *)(dst + i + 16), v_cr0);
_mm_storeu_si128((__m128i *)(dst + i + 24), v_cr1);
_mm_storeu_si128((__m128i *)(dst + i + 32), v_cb0);
_mm_storeu_si128((__m128i *)(dst + i + 40), v_cb1);
_mm_storeu_si128((__m128i *)(dst + i + 16 + yuvOrder*16), v_cr0);
_mm_storeu_si128((__m128i *)(dst + i + 24 + yuvOrder*16), v_cr1);
_mm_storeu_si128((__m128i *)(dst + i + 32 - yuvOrder*16), v_cb0);
_mm_storeu_si128((__m128i *)(dst + i + 40 - yuvOrder*16), v_cb1);
}
}
@ -2488,12 +2555,13 @@ struct RGB2YCrCb_i<ushort>
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<ushort>(Y);
dst[i+1] = saturate_cast<ushort>(Cr);
dst[i+2] = saturate_cast<ushort>(Cb);
dst[i+1+yuvOrder] = saturate_cast<ushort>(Cr);
dst[i+2-yuvOrder] = saturate_cast<ushort>(Cb);
}
}
int srccn, blueIdx, coeffs[5];
bool isCrCb;
__m128i v_c0, v_c1, v_c2;
__m128i v_c3, v_c4, v_delta, v_delta2;
__m128i v_zero;
@ -2506,23 +2574,25 @@ template<typename _Tp> struct YCrCb2RGB_f
{
typedef _Tp channel_type;
YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
: dstcn(_dstcn), blueIdx(_blueIdx)
YCrCb2RGB_f(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
static const float coeffs_cbr[] = {CR2RF, CR2GF, CB2GF, CB2BF};
static const float coeffs_yuv[] = { V2RF, V2GF, U2GF, U2BF};
memcpy(coeffs, isCrCb ? coeffs_cbr : coeffs_yuv, 4*sizeof(coeffs[0]));
}
void operator()(const _Tp* src, _Tp* dst, int n) const
{
int dcn = dstcn, bidx = blueIdx;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3;
for(int i = 0; i < n; i += 3, dst += dcn)
{
_Tp Y = src[i];
_Tp Cr = src[i+1];
_Tp Cb = src[i+2];
_Tp Cr = src[i+1+yuvOrder];
_Tp Cb = src[i+2-yuvOrder];
_Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3);
_Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1);
@ -2534,6 +2604,7 @@ template<typename _Tp> struct YCrCb2RGB_f
}
}
int dstcn, blueIdx;
bool isCrCb;
float coeffs[4];
};
@ -2544,11 +2615,12 @@ struct YCrCb2RGB_f<float>
{
typedef float channel_type;
YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
: dstcn(_dstcn), blueIdx(_blueIdx)
YCrCb2RGB_f(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
static const float coeffs_cbr[] = {CR2RF, CR2GF, CB2GF, CB2BF};
static const float coeffs_yuv[] = { V2RF, V2GF, U2GF, U2BF};
memcpy(coeffs, isCrCb ? coeffs_cbr : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = vdupq_n_f32(coeffs[0]);
v_c1 = vdupq_n_f32(coeffs[1]);
@ -2561,6 +2633,7 @@ struct YCrCb2RGB_f<float>
void operator()(const float* src, float* dst, int n) const
{
int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3;
@ -2569,7 +2642,7 @@ struct YCrCb2RGB_f<float>
for ( ; i <= n - 12; i += 12, dst += 12)
{
float32x4x3_t v_src = vld3q_f32(src + i), v_dst;
float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2];
float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1+yuvOrder], v_Cb = v_src.val[2-yuvOrder];
v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
@ -2582,7 +2655,7 @@ struct YCrCb2RGB_f<float>
{
float32x4x3_t v_src = vld3q_f32(src + i);
float32x4x4_t v_dst;
float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2];
float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1+yuvOrder], v_Cb = v_src.val[2-yuvOrder];
v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
@ -2594,7 +2667,7 @@ struct YCrCb2RGB_f<float>
for ( ; i < n; i += 3, dst += dcn)
{
float Y = src[i], Cr = src[i+1], Cb = src[i+2];
float Y = src[i], Cr = src[i+1+yuvOrder], Cb = src[i+2-yuvOrder];
float b = Y + (Cb - delta)*C3;
float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1;
@ -2606,6 +2679,7 @@ struct YCrCb2RGB_f<float>
}
}
int dstcn, blueIdx;
bool isCrCb;
float coeffs[4];
float32x4_t v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta;
};
@ -2617,11 +2691,12 @@ struct YCrCb2RGB_f<float>
{
typedef float channel_type;
YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
: dstcn(_dstcn), blueIdx(_blueIdx)
YCrCb2RGB_f(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
static const float coeffs_cbr[] = {CR2RF, CR2GF, CB2GF, CB2BF};
static const float coeffs_yuv[] = { V2RF, V2GF, U2GF, U2BF};
memcpy(coeffs, isCrCb ? coeffs_cbr : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = _mm_set1_ps(coeffs[0]);
v_c1 = _mm_set1_ps(coeffs[1]);
@ -2639,6 +2714,9 @@ struct YCrCb2RGB_f<float>
v_cb = _mm_sub_ps(v_cb, v_delta);
v_cr = _mm_sub_ps(v_cr, v_delta);
if (!isCrCb)
std::swap(v_cb, v_cr);
v_b = _mm_mul_ps(v_cb, v_c3);
v_g = _mm_add_ps(_mm_mul_ps(v_cb, v_c2), _mm_mul_ps(v_cr, v_c1));
v_r = _mm_mul_ps(v_cr, v_c0);
@ -2654,6 +2732,7 @@ struct YCrCb2RGB_f<float>
void operator()(const float* src, float* dst, int n) const
{
int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3;
@ -2704,7 +2783,7 @@ struct YCrCb2RGB_f<float>
for ( ; i < n; i += 3, dst += dcn)
{
float Y = src[i], Cr = src[i+1], Cb = src[i+2];
float Y = src[i], Cr = src[i+1+yuvOrder], Cb = src[i+2-yuvOrder];
float b = Y + (Cb - delta)*C3;
float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1;
@ -2716,6 +2795,7 @@ struct YCrCb2RGB_f<float>
}
}
int dstcn, blueIdx;
bool isCrCb;
float coeffs[4];
__m128 v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta;
@ -2728,24 +2808,26 @@ template<typename _Tp> struct YCrCb2RGB_i
{
typedef _Tp channel_type;
YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
: dstcn(_dstcn), blueIdx(_blueIdx)
YCrCb2RGB_i(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {22987, -11698, -5636, 29049};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
static const int coeffs_crb[] = { CR2RI, CR2GI, CB2GI, CB2BI};
static const int coeffs_yuv[] = { V2RI, V2GI, U2GI, U2BI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 4*sizeof(coeffs[0]));
}
void operator()(const _Tp* src, _Tp* dst, int n) const
{
int dcn = dstcn, bidx = blueIdx;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3;
for(int i = 0; i < n; i += 3, dst += dcn)
{
_Tp Y = src[i];
_Tp Cr = src[i+1];
_Tp Cb = src[i+2];
_Tp Cr = src[i+1+yuvOrder];
_Tp Cb = src[i+2-yuvOrder];
int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
@ -2759,6 +2841,7 @@ template<typename _Tp> struct YCrCb2RGB_i
}
}
int dstcn, blueIdx;
bool isCrCb;
int coeffs[4];
};
@ -2769,11 +2852,12 @@ struct YCrCb2RGB_i<uchar>
{
typedef uchar channel_type;
YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
: dstcn(_dstcn), blueIdx(_blueIdx)
YCrCb2RGB_i(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {22987, -11698, -5636, 29049};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
static const int coeffs_crb[] = { CR2RI, CR2GI, CB2GI, CB2BI};
static const int coeffs_yuv[] = { V2RI, V2GI, U2GI, U2BI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = vdupq_n_s32(coeffs[0]);
v_c1 = vdupq_n_s32(coeffs[1]);
@ -2787,6 +2871,7 @@ struct YCrCb2RGB_i<uchar>
void operator()(const uchar* src, uchar* dst, int n) const
{
int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max();
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3;
@ -2800,8 +2885,8 @@ struct YCrCb2RGB_i<uchar>
v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
int16x4_t v_Y = vget_low_s16(v_src16.val[0]),
v_Cr = vget_low_s16(v_src16.val[1]),
v_Cb = vget_low_s16(v_src16.val[2]);
v_Cr = vget_low_s16(v_src16.val[1+yuvOrder]),
v_Cb = vget_low_s16(v_src16.val[2-yuvOrder]);
int32x4_t v_b0 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
v_b0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
@ -2811,8 +2896,8 @@ struct YCrCb2RGB_i<uchar>
v_r0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
v_Y = vget_high_s16(v_src16.val[0]);
v_Cr = vget_high_s16(v_src16.val[1]);
v_Cb = vget_high_s16(v_src16.val[2]);
v_Cr = vget_high_s16(v_src16.val[1+yuvOrder]);
v_Cb = vget_high_s16(v_src16.val[2-yuvOrder]);
int32x4_t v_b1 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
v_b1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
@ -2847,8 +2932,8 @@ struct YCrCb2RGB_i<uchar>
for ( ; i < n; i += 3, dst += dcn)
{
uchar Y = src[i];
uchar Cr = src[i+1];
uchar Cb = src[i+2];
uchar Cr = src[i+1+yuvOrder];
uchar Cb = src[i+2-yuvOrder];
int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
@ -2862,6 +2947,7 @@ struct YCrCb2RGB_i<uchar>
}
}
int dstcn, blueIdx;
bool isCrCb;
int coeffs[4];
int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2;
@ -2874,11 +2960,12 @@ struct YCrCb2RGB_i<ushort>
{
typedef ushort channel_type;
YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
: dstcn(_dstcn), blueIdx(_blueIdx)
YCrCb2RGB_i(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {22987, -11698, -5636, 29049};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
static const int coeffs_crb[] = { CR2RI, CR2GI, CB2GI, CB2BI};
static const int coeffs_yuv[] = { V2RI, V2GI, U2GI, U2BI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = vdupq_n_s32(coeffs[0]);
v_c1 = vdupq_n_s32(coeffs[1]);
@ -2893,6 +2980,7 @@ struct YCrCb2RGB_i<ushort>
void operator()(const ushort* src, ushort* dst, int n) const
{
int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const ushort delta = ColorChannel<ushort>::half(), alpha = ColorChannel<ushort>::max();
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3;
@ -2902,8 +2990,8 @@ struct YCrCb2RGB_i<ushort>
uint16x8x3_t v_src = vld3q_u16(src + i);
int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))),
v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1+yuvOrder]))),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2-yuvOrder])));
int32x4_t v_b0 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
v_b0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
@ -2913,8 +3001,8 @@ struct YCrCb2RGB_i<ushort>
v_r0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))),
v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1+yuvOrder]))),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2-yuvOrder])));
int32x4_t v_b1 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
v_b1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
@ -2951,8 +3039,8 @@ struct YCrCb2RGB_i<ushort>
uint16x4x3_t v_src = vld3_u16(src + i);
int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])),
v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1+yuvOrder])),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2-yuvOrder]));
int32x4_t v_b = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
v_b = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b, v_delta2), yuv_shift), v_Y);
@ -2987,8 +3075,8 @@ struct YCrCb2RGB_i<ushort>
for ( ; i < n; i += 3, dst += dcn)
{
ushort Y = src[i];
ushort Cr = src[i+1];
ushort Cb = src[i+2];
ushort Cr = src[i+1+yuvOrder];
ushort Cb = src[i+2-yuvOrder];
int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
@ -3002,6 +3090,7 @@ struct YCrCb2RGB_i<ushort>
}
}
int dstcn, blueIdx;
bool isCrCb;
int coeffs[4];
int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2, v_delta;
@ -3016,11 +3105,12 @@ struct YCrCb2RGB_i<uchar>
{
typedef uchar channel_type;
YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
: dstcn(_dstcn), blueIdx(_blueIdx)
YCrCb2RGB_i(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{
static const int coeffs0[] = {22987, -11698, -5636, 29049};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
static const int coeffs_crb[] = { CR2RI, CR2GI, CB2GI, CB2BI};
static const int coeffs_yuv[] = { V2RI, V2GI, U2GI, U2BI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = _mm_set1_epi16((short)coeffs[0]);
v_c1 = _mm_set1_epi16((short)coeffs[1]);
@ -3033,7 +3123,9 @@ struct YCrCb2RGB_i<uchar>
uchar alpha = ColorChannel<uchar>::max();
v_alpha = _mm_set1_epi8(*(char *)&alpha);
useSSE = coeffs[0] <= std::numeric_limits<short>::max();
// when using YUV, one of coefficients is bigger than std::numeric_limits<short>::max(),
//which is not appropriate for SSE
useSSE = isCrCb;
haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
}
@ -3152,6 +3244,7 @@ struct YCrCb2RGB_i<uchar>
void operator()(const uchar* src, uchar* dst, int n) const
{
int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max();
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3;
@ -3316,8 +3409,8 @@ struct YCrCb2RGB_i<uchar>
for ( ; i < n; i += 3, dst += dcn)
{
uchar Y = src[i];
uchar Cr = src[i+1];
uchar Cb = src[i+2];
uchar Cr = src[i+1+yuvOrder];
uchar Cb = src[i+2-yuvOrder];
int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
@ -3332,6 +3425,7 @@ struct YCrCb2RGB_i<uchar>
}
int dstcn, blueIdx;
int coeffs[4];
bool isCrCb;
bool useSSE, haveSIMD;
__m128i v_c0, v_c1, v_c2, v_c3, v_delta2;
@ -7992,7 +8086,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
case COLOR_RGB2YUV:
{
CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_RGB2YUV ? 0 : 2;
bidx = code == COLOR_RGB2YUV ? 2 : 0;
dcn = 3;
k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc,
opts + format("-D dcn=3 -D bidx=%d", bidx));
@ -8001,9 +8095,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
case COLOR_YUV2BGR:
case COLOR_YUV2RGB:
{
if(dcn < 0) dcn = 3;
if(dcn <= 0) dcn = 3;
CV_Assert(dcn == 3 || dcn == 4);
bidx = code == COLOR_YUV2RGB ? 0 : 2;
bidx = code == COLOR_YUV2RGB ? 2 : 0;
k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc,
opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
break;
@ -8858,17 +8952,13 @@ void cvtBGRtoYUV(const uchar * src_data, size_t src_step,
}
#endif
static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };
const float* coeffs_f = isCbCr ? 0 : yuv_f;
const int* coeffs_i = isCbCr ? 0 : yuv_i;
int blueIdx = swapBlue ? 2 : 0;
if( depth == CV_8U )
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i<uchar>(scn, blueIdx, coeffs_i));
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i<uchar>(scn, blueIdx, isCbCr));
else if( depth == CV_16U )
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i<ushort>(scn, blueIdx, coeffs_i));
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i<ushort>(scn, blueIdx, isCbCr));
else
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_f<float>(scn, blueIdx, coeffs_f));
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_f<float>(scn, blueIdx, isCbCr));
}
void cvtYUVtoBGR(const uchar * src_data, size_t src_step,
@ -8914,17 +9004,13 @@ void cvtYUVtoBGR(const uchar * src_data, size_t src_step,
}
#endif
static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };
static const int yuv_i[] = { 33292, -6472, -9519, 18678 };
const float* coeffs_f = isCbCr ? 0 : yuv_f;
const int* coeffs_i = isCbCr ? 0 : yuv_i;
int blueIdx = swapBlue ? 2 : 0;
if( depth == CV_8U )
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i<uchar>(dcn, blueIdx, coeffs_i));
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i<uchar>(dcn, blueIdx, isCbCr));
else if( depth == CV_16U )
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i<ushort>(dcn, blueIdx, coeffs_i));
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i<ushort>(dcn, blueIdx, isCbCr));
else
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_f<float>(dcn, blueIdx, coeffs_f));
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_f<float>(dcn, blueIdx, isCbCr));
}
void cvtBGRtoXYZ(const uchar * src_data, size_t src_step,

View File

@ -84,6 +84,39 @@ enum
BLOCK_SIZE = 256
};
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
#define B2YF 0.114f
#define G2YF 0.587f
#define R2YF 0.299f
//to YCbCr
#define YCBF 0.564f
#define YCRF 0.713f
#define YCBI 9241
#define YCRI 11682
//to YUV
#define B2UF 0.492f
#define R2VF 0.877f
#define B2UI 8061
#define R2VI 14369
//from YUV
#define U2BF 2.032f
#define U2GF -0.395f
#define V2GF -0.581f
#define V2RF 1.140f
#define U2BI 33292
#define U2GI -6472
#define V2GI -9519
#define V2RI 18678
//from YCrCb
#define CR2RF 1.403f
#define CB2GF -0.344f
#define CR2GF -0.714f
#define CB2BF 1.773f
#define CR2RI 22987
#define CB2GI -5636
#define CR2GI -11698
#define CB2BI 29049
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
@ -151,7 +184,7 @@ __kernel void RGB2Gray(__global const uchar * srcptr, int src_step, int src_offs
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src);
#ifdef DEPTH_5
dst[0] = fma(src_pix.B_COMP, 0.114f, fma(src_pix.G_COMP, 0.587f, src_pix.R_COMP * 0.299f));
dst[0] = fma(src_pix.B_COMP, B2YF, fma(src_pix.G_COMP, G2YF, src_pix.R_COMP * R2YF));
#else
dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, mul24(src_pix.R_COMP, R2Y))), yuv_shift);
#endif
@ -201,8 +234,8 @@ __kernel void Gray2RGB(__global const uchar * srcptr, int src_step, int src_offs
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
__constant float c_RGB2YUVCoeffs_f[5] = { B2YF, G2YF, R2YF, B2UF, R2VF };
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, B2UI, R2VI };
__kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
@ -251,8 +284,8 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset
}
}
__constant float c_YUV2RGBCoeffs_f[4] = { 2.032f, -0.395f, -0.581f, 1.140f };
__constant int c_YUV2RGBCoeffs_i[4] = { 33292, -6472, -9519, 18678 };
__constant float c_YUV2RGBCoeffs_f[4] = { U2BF, U2GF, V2GF, V2RF };
__constant int c_YUV2RGBCoeffs_i[4] = { U2BI, U2GI, V2GI, V2RI };
__kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
@ -624,8 +657,8 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
__constant float c_RGB2YCrCbCoeffs_f[5] = {R2YF, G2YF, B2YF, YCRF, YCBF};
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, YCRI, YCBI};
__kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
@ -674,8 +707,8 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offs
}
}
__constant float c_YCrCb2RGBCoeffs_f[4] = { 1.403f, -0.714f, -0.344f, 1.773f };
__constant int c_YCrCb2RGBCoeffs_i[4] = { 22987, -11698, -5636, 29049 };
__constant float c_YCrCb2RGBCoeffs_f[4] = { CR2RF, CR2GF, CB2GF, CB2BF };
__constant int c_YCrCb2RGBCoeffs_i[4] = { CR2RI, CR2GI, CB2GI, CB2BI };
__kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,