mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
Bit-exact version of Luv2RGB_b (#9470)
* lab_tetra squashed * initial version is almost written * unfinished work * compilation fixed, to be debugged * Lab test removed * more fixes * Luv2RGBinteger: channels order fixed * Lab structs removed * good trilinear interpolation added * several fixes * removed Luv2RGB interpolations, XYZ tables; 8-cell LUT added * no_interpolate made 8-cell * interpolations rewritten to 8-cell, minor fixes * packed interpolation added for RGB2Luv * tetra implemented * removing unnecessary code * LUT building merged * changes ported to color.cpp * minor fixes; try to suppress warnings * fixed v range of Luv * fixed incorrect src channel number * minor fixes * preliminary version of Luv2RGBinteger is done * Luv2RGB_b is in progress * XYZ color constants converted to softfloat * Luv test: precision fixed * Luv bit-exactness test added * warnings fixed * compilation fixed, error message fixed * Luv check is limited to [0-2,0-2,0-2] by XYZ * L->Y generation moved to LUT * LUTs added for up and vp of Luv2RGB_b * still works * fixed-point is done, works at maxerr 2 * vectorized code is done, 2x slower than original * perf improved by 10% * extra comments removed * code moved to color.cpp * test_lab.cpp updated * minor refactoring * test added for Luv2RGB * OCL Luv2RGB_b: XYZ are limited to [0, 2]; docs updated * Luv2RGB_b rewritten to universal intrinsics * test_lab.cpp moved to luv_tetra branch
This commit is contained in:
parent
bb679f978d
commit
cc547e8260
@ -136,6 +136,8 @@ The values are then converted to the destination data type:
|
|||||||
- 16-bit images: (currently not supported)
|
- 16-bit images: (currently not supported)
|
||||||
- 32-bit images: L, u, and v are left as is
|
- 32-bit images: L, u, and v are left as is
|
||||||
|
|
||||||
|
Note that when converting integer Luv images to RGB the intermediate X, Y and Z values are truncated to \f$ [0, 2] \f$ range to fit white point limitations. It may lead to incorrect representation of colors with odd XYZ values.
|
||||||
|
|
||||||
The above formulae for converting RGB to/from various color spaces have been taken from multiple
|
The above formulae for converting RGB to/from various color spaces have been taken from multiple
|
||||||
sources on the web, primarily from the Charles Poynton site <http://www.poynton.com/ColorFAQ.html>
|
sources on the web, primarily from the Charles Poynton site <http://www.poynton.com/ColorFAQ.html>
|
||||||
|
|
||||||
|
@ -5881,9 +5881,13 @@ static int abToXZ_b[LAB_BASE*9/4];
|
|||||||
// Luv constants
|
// Luv constants
|
||||||
static const bool enableRGB2LuvInterpolation = true;
|
static const bool enableRGB2LuvInterpolation = true;
|
||||||
static const bool enablePackedRGB2Luv = true;
|
static const bool enablePackedRGB2Luv = true;
|
||||||
|
static const bool enablePackedLuv2RGB = true;
|
||||||
static int16_t RGB2LuvLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8];
|
static int16_t RGB2LuvLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8];
|
||||||
static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow);
|
static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow);
|
||||||
static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow);
|
static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow);
|
||||||
|
static int LuToUp_b[256*256];
|
||||||
|
static int LvToVp_b[256*256];
|
||||||
|
static long long int LvToVpl_b[256*256];
|
||||||
|
|
||||||
#define clip(value) \
|
#define clip(value) \
|
||||||
value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
|
value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
|
||||||
@ -6013,6 +6017,43 @@ static void initLabTabs()
|
|||||||
abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231
|
abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231
|
||||||
}
|
}
|
||||||
|
|
||||||
|
softfloat dd = D65[0] + D65[1]*softdouble(15) + D65[2]*softdouble(3);
|
||||||
|
dd = softfloat::one()/max(dd, softfloat::eps());
|
||||||
|
softfloat un = dd*softfloat(13*4)*D65[0];
|
||||||
|
softfloat vn = dd*softfloat(13*9)*D65[1];
|
||||||
|
softfloat oneof4 = softfloat::one()/softfloat(4);
|
||||||
|
|
||||||
|
//when XYZ are limited to [0, 2]
|
||||||
|
/*
|
||||||
|
up: [-402, 1431.57]
|
||||||
|
min abs diff up: 0.010407
|
||||||
|
vp: [-0.25, 0.25]
|
||||||
|
min abs(vp): 0.00034207
|
||||||
|
*/
|
||||||
|
|
||||||
|
//Luv LUT
|
||||||
|
for(int LL = 0; LL < 256; LL++)
|
||||||
|
{
|
||||||
|
softfloat L = softfloat(LL*100)/f255;
|
||||||
|
for(int uu = 0; uu < 256; uu++)
|
||||||
|
{
|
||||||
|
softfloat u = softfloat(uu)*uRange/f255 + uLow;
|
||||||
|
softfloat up = softfloat(9)*(u + L*un);
|
||||||
|
LuToUp_b[LL*256+uu] = cvRound(up*softfloat(BASE/1024));//1024 is OK, 2048 gave maxerr 3
|
||||||
|
}
|
||||||
|
for(int vv = 0; vv < 256; vv++)
|
||||||
|
{
|
||||||
|
softfloat v = softfloat(vv)*vRange/f255 + vLow;
|
||||||
|
softfloat vp = oneof4/(v + L*vn);
|
||||||
|
if(vp > oneof4) vp = oneof4;
|
||||||
|
if(vp < -oneof4) vp = -oneof4;
|
||||||
|
int ivp = cvRound(vp*softfloat(BASE*1024));
|
||||||
|
LvToVp_b[LL*256+vv] = ivp;
|
||||||
|
int vpl = ivp*LL;
|
||||||
|
LvToVpl_b[LL*256+vv] = (12*13*100*(BASE/1024))*(long long)vpl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//try to suppress warning
|
//try to suppress warning
|
||||||
static const bool calcLUT = enableRGB2LabInterpolation || enableRGB2LuvInterpolation;
|
static const bool calcLUT = enableRGB2LabInterpolation || enableRGB2LuvInterpolation;
|
||||||
if(calcLUT)
|
if(calcLUT)
|
||||||
@ -6041,11 +6082,6 @@ static void initLabTabs()
|
|||||||
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
|
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
|
||||||
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
||||||
|
|
||||||
softfloat dd = D65[0] + D65[1]*softdouble(15) + D65[2]*softdouble(3);
|
|
||||||
dd = softfloat::one()/max(dd, softfloat(FLT_EPSILON));
|
|
||||||
softfloat un = dd*softfloat(13*4)*D65[0];
|
|
||||||
softfloat vn = dd*softfloat(13*9)*D65[1];
|
|
||||||
|
|
||||||
//u, v: [-134.0, 220.0], [-140.0, 122.0]
|
//u, v: [-134.0, 220.0], [-140.0, 122.0]
|
||||||
static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16), f500(500), f200(200);
|
static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16), f500(500), f200(200);
|
||||||
static const softfloat f100(100), f128(128), f256(256), lbase((int)LAB_BASE);
|
static const softfloat f100(100), f128(128), f256(256), lbase((int)LAB_BASE);
|
||||||
@ -7472,7 +7508,7 @@ struct RGB2Luvfloat
|
|||||||
softfloat d = whitePt[0] +
|
softfloat d = whitePt[0] +
|
||||||
whitePt[1]*softdouble(15) +
|
whitePt[1]*softdouble(15) +
|
||||||
whitePt[2]*softdouble(3);
|
whitePt[2]*softdouble(3);
|
||||||
d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
|
d = softfloat::one()/max(d, softfloat::eps());
|
||||||
un = d*softfloat(13*4)*whitePt[0];
|
un = d*softfloat(13*4)*whitePt[0];
|
||||||
vn = d*softfloat(13*9)*whitePt[1];
|
vn = d*softfloat(13*9)*whitePt[1];
|
||||||
|
|
||||||
@ -7764,13 +7800,13 @@ struct RGB2Luv_f
|
|||||||
int srccn;
|
int srccn;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Luv2RGB_f
|
struct Luv2RGBfloat
|
||||||
{
|
{
|
||||||
typedef float channel_type;
|
typedef float channel_type;
|
||||||
|
|
||||||
Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
|
Luv2RGBfloat( int _dstcn, int blueIdx, const float* _coeffs,
|
||||||
const float* whitept, bool _srgb )
|
const float* whitept, bool _srgb )
|
||||||
: dstcn(_dstcn), srgb(_srgb)
|
: dstcn(_dstcn), srgb(_srgb)
|
||||||
{
|
{
|
||||||
initLabTabs();
|
initLabTabs();
|
||||||
|
|
||||||
@ -7798,7 +7834,7 @@ struct Luv2RGB_f
|
|||||||
softfloat d = whitePt[0] +
|
softfloat d = whitePt[0] +
|
||||||
whitePt[1]*softdouble(15) +
|
whitePt[1]*softdouble(15) +
|
||||||
whitePt[2]*softdouble(3);
|
whitePt[2]*softdouble(3);
|
||||||
d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
|
d = softfloat::one()/max(d, softfloat::eps());
|
||||||
un = softfloat(4*13)*d*whitePt[0];
|
un = softfloat(4*13)*d*whitePt[0];
|
||||||
vn = softfloat(9*13)*d*whitePt[1];
|
vn = softfloat(9*13)*d*whitePt[1];
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
@ -8010,6 +8046,25 @@ struct Luv2RGB_f
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct Luv2RGB_f
|
||||||
|
{
|
||||||
|
typedef float channel_type;
|
||||||
|
|
||||||
|
Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
|
||||||
|
const float* whitept, bool _srgb )
|
||||||
|
: fcvt(_dstcn, blueIdx, _coeffs, whitept, _srgb), dstcn(_dstcn)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
void operator()(const float* src, float* dst, int n) const
|
||||||
|
{
|
||||||
|
fcvt(src, dst, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
Luv2RGBfloat fcvt;
|
||||||
|
int dstcn;
|
||||||
|
};
|
||||||
|
|
||||||
struct RGB2Luvinterpolate
|
struct RGB2Luvinterpolate
|
||||||
{
|
{
|
||||||
typedef uchar channel_type;
|
typedef uchar channel_type;
|
||||||
@ -8329,217 +8384,308 @@ struct RGB2Luv_b
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct Luv2RGBinteger
|
||||||
|
{
|
||||||
|
typedef uchar channel_type;
|
||||||
|
|
||||||
|
static const int base_shift = 14;
|
||||||
|
static const int BASE = (1 << base_shift);
|
||||||
|
static const int shift = lab_shift+(base_shift-inv_gamma_shift);
|
||||||
|
|
||||||
|
// whitept is fixed for int calculations
|
||||||
|
Luv2RGBinteger( int _dstcn, int blueIdx, const float* _coeffs,
|
||||||
|
const float* /*_whitept*/, bool _srgb )
|
||||||
|
: dstcn(_dstcn)
|
||||||
|
{
|
||||||
|
initLabTabs();
|
||||||
|
|
||||||
|
static const softdouble lshift(1 << lab_shift);
|
||||||
|
for(int i = 0; i < 3; i++)
|
||||||
|
{
|
||||||
|
softdouble c[3];
|
||||||
|
for(int j = 0; j < 3; j++)
|
||||||
|
if(_coeffs)
|
||||||
|
c[j] = softdouble(_coeffs[i + j*3]);
|
||||||
|
else
|
||||||
|
c[j] = XYZ2sRGB_D65[i + j*3];
|
||||||
|
|
||||||
|
coeffs[i+blueIdx*3] = cvRound(lshift*c[0]);
|
||||||
|
coeffs[i+3] = cvRound(lshift*c[1]);
|
||||||
|
coeffs[i+(blueIdx^2)*3] = cvRound(lshift*c[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
tab = _srgb ? sRGBInvGammaTab_b : linearInvGammaTab_b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// L, u, v should be in their natural range
|
||||||
|
inline void process(const uchar LL, const uchar uu, const uchar vv, int& ro, int& go, int& bo) const
|
||||||
|
{
|
||||||
|
ushort y = LabToYF_b[LL*2];
|
||||||
|
|
||||||
|
// y : [0, BASE]
|
||||||
|
// up: [-402, 1431.57]*(BASE/1024)
|
||||||
|
// vp: +/- 0.25*BASE*1024
|
||||||
|
int up = LuToUp_b[LL*256+uu];
|
||||||
|
int vp = LvToVp_b[LL*256+vv];
|
||||||
|
//X = y*3.f* up/((float)BASE/1024) *vp/((float)BASE*1024);
|
||||||
|
//Z = y*(((12.f*13.f)*((float)LL)*100.f/255.f - up/((float)BASE))*vp/((float)BASE*1024) - 5.f);
|
||||||
|
|
||||||
|
long long int xv = ((int)up)*(long long)vp;
|
||||||
|
int x = (int)(xv/BASE);
|
||||||
|
x = y*x/BASE;
|
||||||
|
|
||||||
|
long long int vpl = LvToVpl_b[LL*256+vv];
|
||||||
|
long long int zp = vpl - xv*(255/3);
|
||||||
|
zp /= BASE;
|
||||||
|
long long int zq = zp - (long long)(5*255*BASE);
|
||||||
|
int zm = (int)(y*zq/BASE);
|
||||||
|
int z = zm/256 + zm/65536;
|
||||||
|
|
||||||
|
//limit X, Y, Z to [0, 2] to fit white point
|
||||||
|
x = max(0, min(2*BASE, x)); z = max(0, min(2*BASE, z));
|
||||||
|
|
||||||
|
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2];
|
||||||
|
int C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5];
|
||||||
|
int C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
||||||
|
|
||||||
|
ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift);
|
||||||
|
go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift);
|
||||||
|
bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift);
|
||||||
|
|
||||||
|
ro = max(0, min((int)INV_GAMMA_TAB_SIZE-1, ro));
|
||||||
|
go = max(0, min((int)INV_GAMMA_TAB_SIZE-1, go));
|
||||||
|
bo = max(0, min((int)INV_GAMMA_TAB_SIZE-1, bo));
|
||||||
|
|
||||||
|
ro = tab[ro];
|
||||||
|
go = tab[go];
|
||||||
|
bo = tab[bo];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void processLuvToXYZ(const v_uint8x16& lv, const v_uint8x16& uv, const v_uint8x16& vv,
|
||||||
|
int32_t* xyz) const
|
||||||
|
{
|
||||||
|
uint8_t CV_DECL_ALIGNED(16) lvstore[16], uvstore[16], vvstore[16];
|
||||||
|
v_store_aligned(lvstore, lv); v_store_aligned(uvstore, uv); v_store_aligned(vvstore, vv);
|
||||||
|
|
||||||
|
for(int i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
int LL = lvstore[i];
|
||||||
|
int u = uvstore[i];
|
||||||
|
int v = vvstore[i];
|
||||||
|
int y = LabToYF_b[LL*2];
|
||||||
|
|
||||||
|
int up = LuToUp_b[LL*256+u];
|
||||||
|
int vp = LvToVp_b[LL*256+v];
|
||||||
|
|
||||||
|
long long int xv = up*(long long int)vp;
|
||||||
|
long long int vpl = LvToVpl_b[LL*256+v];
|
||||||
|
long long int zp = vpl - xv*(255/3);
|
||||||
|
zp = zp >> base_shift;
|
||||||
|
long long int zq = zp - (5*255*BASE);
|
||||||
|
int zm = (int)((y*zq) >> base_shift);
|
||||||
|
|
||||||
|
int x = (int)(xv >> base_shift);
|
||||||
|
x = (y*x) >> base_shift;
|
||||||
|
|
||||||
|
int z = zm/256 + zm/65536;
|
||||||
|
x = max(0, min(2*BASE, x)); z = max(0, min(2*BASE, z));
|
||||||
|
|
||||||
|
xyz[i] = x; xyz[i + 16] = y; xyz[i + 32] = z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator()(const uchar* src, uchar* dst, int n) const
|
||||||
|
{
|
||||||
|
int i, dcn = dstcn;
|
||||||
|
uchar alpha = ColorChannel<uchar>::max();
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
if(enablePackedLuv2RGB)
|
||||||
|
{
|
||||||
|
static const int nPixels = 16;
|
||||||
|
for (; i < n*3-3*nPixels; i += 3*nPixels, dst += dcn*nPixels)
|
||||||
|
{
|
||||||
|
v_uint8x16 u8l, u8u, u8v;
|
||||||
|
v_load_deinterleave(src + i, u8l, u8u, u8v);
|
||||||
|
|
||||||
|
int32_t CV_DECL_ALIGNED(16) xyz[48];
|
||||||
|
processLuvToXYZ(u8l, u8u, u8v, xyz);
|
||||||
|
|
||||||
|
v_int32x4 xiv[4], yiv[4], ziv[4];
|
||||||
|
for(int k = 0; k < 4; k++)
|
||||||
|
{
|
||||||
|
xiv[k] = v_load_aligned(xyz + 4*k);
|
||||||
|
yiv[k] = v_load_aligned(xyz + 4*k + 16);
|
||||||
|
ziv[k] = v_load_aligned(xyz + 4*k + 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift);
|
||||||
|
go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift);
|
||||||
|
bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift);
|
||||||
|
*/
|
||||||
|
v_int32x4 C0 = v_setall_s32(coeffs[0]), C1 = v_setall_s32(coeffs[1]), C2 = v_setall_s32(coeffs[2]);
|
||||||
|
v_int32x4 C3 = v_setall_s32(coeffs[3]), C4 = v_setall_s32(coeffs[4]), C5 = v_setall_s32(coeffs[5]);
|
||||||
|
v_int32x4 C6 = v_setall_s32(coeffs[6]), C7 = v_setall_s32(coeffs[7]), C8 = v_setall_s32(coeffs[8]);
|
||||||
|
v_int32x4 descaleShift = v_setall_s32(1 << (shift-1));
|
||||||
|
v_int32x4 tabsz = v_setall_s32((int)INV_GAMMA_TAB_SIZE-1);
|
||||||
|
v_uint32x4 r_vecs[4], g_vecs[4], b_vecs[4];
|
||||||
|
for(int k = 0; k < 4; k++)
|
||||||
|
{
|
||||||
|
v_int32x4 i_r, i_g, i_b;
|
||||||
|
i_r = (xiv[k]*C0 + yiv[k]*C1 + ziv[k]*C2 + descaleShift) >> shift;
|
||||||
|
i_g = (xiv[k]*C3 + yiv[k]*C4 + ziv[k]*C5 + descaleShift) >> shift;
|
||||||
|
i_b = (xiv[k]*C6 + yiv[k]*C7 + ziv[k]*C8 + descaleShift) >> shift;
|
||||||
|
|
||||||
|
//limit indices in table and then substitute
|
||||||
|
//ro = tab[ro]; go = tab[go]; bo = tab[bo];
|
||||||
|
int32_t CV_DECL_ALIGNED(16) rshifts[4], gshifts[4], bshifts[4];
|
||||||
|
v_int32x4 rs = v_max(v_setzero_s32(), v_min(tabsz, i_r));
|
||||||
|
v_int32x4 gs = v_max(v_setzero_s32(), v_min(tabsz, i_g));
|
||||||
|
v_int32x4 bs = v_max(v_setzero_s32(), v_min(tabsz, i_b));
|
||||||
|
|
||||||
|
v_store_aligned(rshifts, rs);
|
||||||
|
v_store_aligned(gshifts, gs);
|
||||||
|
v_store_aligned(bshifts, bs);
|
||||||
|
|
||||||
|
r_vecs[k] = v_uint32x4(tab[rshifts[0]], tab[rshifts[1]], tab[rshifts[2]], tab[rshifts[3]]);
|
||||||
|
g_vecs[k] = v_uint32x4(tab[gshifts[0]], tab[gshifts[1]], tab[gshifts[2]], tab[gshifts[3]]);
|
||||||
|
b_vecs[k] = v_uint32x4(tab[bshifts[0]], tab[bshifts[1]], tab[bshifts[2]], tab[bshifts[3]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
v_uint16x8 u_rvec0 = v_pack(r_vecs[0], r_vecs[1]), u_rvec1 = v_pack(r_vecs[2], r_vecs[3]);
|
||||||
|
v_uint16x8 u_gvec0 = v_pack(g_vecs[0], g_vecs[1]), u_gvec1 = v_pack(g_vecs[2], g_vecs[3]);
|
||||||
|
v_uint16x8 u_bvec0 = v_pack(b_vecs[0], b_vecs[1]), u_bvec1 = v_pack(b_vecs[2], b_vecs[3]);
|
||||||
|
|
||||||
|
v_uint8x16 u8_b, u8_g, u8_r;
|
||||||
|
u8_b = v_pack(u_bvec0, u_bvec1);
|
||||||
|
u8_g = v_pack(u_gvec0, u_gvec1);
|
||||||
|
u8_r = v_pack(u_rvec0, u_rvec1);
|
||||||
|
|
||||||
|
if(dcn == 4)
|
||||||
|
{
|
||||||
|
v_store_interleave(dst, u8_b, u8_g, u8_r, v_setall_u8(alpha));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
v_store_interleave(dst, u8_b, u8_g, u8_r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < n*3; i += 3, dst += dcn)
|
||||||
|
{
|
||||||
|
int ro, go, bo;
|
||||||
|
process(src[i + 0], src[i + 1], src[i + 2], ro, go, bo);
|
||||||
|
|
||||||
|
dst[0] = saturate_cast<uchar>(bo);
|
||||||
|
dst[1] = saturate_cast<uchar>(go);
|
||||||
|
dst[2] = saturate_cast<uchar>(ro);
|
||||||
|
if( dcn == 4 )
|
||||||
|
dst[3] = alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int dstcn;
|
||||||
|
int coeffs[9];
|
||||||
|
ushort* tab;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct Luv2RGB_b
|
struct Luv2RGB_b
|
||||||
{
|
{
|
||||||
typedef uchar channel_type;
|
typedef uchar channel_type;
|
||||||
|
|
||||||
Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
|
Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
|
||||||
const float* _whitept, bool _srgb )
|
const float* _whitept, bool _srgb )
|
||||||
: dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
|
: dstcn(_dstcn),
|
||||||
|
fcvt(_dstcn, blueIdx, _coeffs, _whitept, _srgb),
|
||||||
|
icvt(_dstcn, blueIdx, _coeffs, _whitept, _srgb)
|
||||||
{
|
{
|
||||||
// 1.388235294117647 = (220+134)/255
|
// whitept is fixed for int calculations
|
||||||
// 1.027450980392157 = (140+122)/255
|
useBitExactness = (!_whitept && enableBitExactness);
|
||||||
#if CV_NEON
|
|
||||||
v_scale_inv = vdupq_n_f32(100.f/255.f);
|
|
||||||
v_coeff1 = vdupq_n_f32(1.388235294117647f);
|
|
||||||
v_coeff2 = vdupq_n_f32(1.027450980392157f);
|
|
||||||
v_134 = vdupq_n_f32(134.f);
|
|
||||||
v_140 = vdupq_n_f32(140.f);
|
|
||||||
v_scale = vdupq_n_f32(255.f);
|
|
||||||
v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
|
|
||||||
#elif CV_SSE2
|
|
||||||
v_scale = _mm_set1_ps(255.f);
|
|
||||||
v_zero = _mm_setzero_si128();
|
|
||||||
v_alpha = _mm_set1_ps(ColorChannel<uchar>::max());
|
|
||||||
haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CV_SSE2
|
|
||||||
// 16s x 8
|
|
||||||
void process(__m128i v_l, __m128i v_u, __m128i v_v,
|
|
||||||
const __m128& v_coeffs_, const __m128& v_res_,
|
|
||||||
float * buf) const
|
|
||||||
{
|
|
||||||
__m128 v_l0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_l, v_zero));
|
|
||||||
__m128 v_u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_u, v_zero));
|
|
||||||
__m128 v_v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_v, v_zero));
|
|
||||||
|
|
||||||
__m128 v_l1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_l, v_zero));
|
|
||||||
__m128 v_u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_u, v_zero));
|
|
||||||
__m128 v_v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_v, v_zero));
|
|
||||||
|
|
||||||
__m128 v_coeffs = v_coeffs_;
|
|
||||||
__m128 v_res = v_res_;
|
|
||||||
|
|
||||||
v_l0 = _mm_mul_ps(v_l0, v_coeffs);
|
|
||||||
v_u1 = _mm_mul_ps(v_u1, v_coeffs);
|
|
||||||
v_l0 = _mm_sub_ps(v_l0, v_res);
|
|
||||||
v_u1 = _mm_sub_ps(v_u1, v_res);
|
|
||||||
|
|
||||||
v_coeffs = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_coeffs), 0x49));
|
|
||||||
v_res = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_res), 0x49));
|
|
||||||
|
|
||||||
v_l1 = _mm_mul_ps(v_l1, v_coeffs);
|
|
||||||
v_v0 = _mm_mul_ps(v_v0, v_coeffs);
|
|
||||||
v_l1 = _mm_sub_ps(v_l1, v_res);
|
|
||||||
v_v0 = _mm_sub_ps(v_v0, v_res);
|
|
||||||
|
|
||||||
v_coeffs = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_coeffs), 0x49));
|
|
||||||
v_res = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v_res), 0x49));
|
|
||||||
|
|
||||||
v_u0 = _mm_mul_ps(v_u0, v_coeffs);
|
|
||||||
v_v1 = _mm_mul_ps(v_v1, v_coeffs);
|
|
||||||
v_u0 = _mm_sub_ps(v_u0, v_res);
|
|
||||||
v_v1 = _mm_sub_ps(v_v1, v_res);
|
|
||||||
|
|
||||||
_mm_store_ps(buf, v_l0);
|
|
||||||
_mm_store_ps(buf + 4, v_l1);
|
|
||||||
_mm_store_ps(buf + 8, v_u0);
|
|
||||||
_mm_store_ps(buf + 12, v_u1);
|
|
||||||
_mm_store_ps(buf + 16, v_v0);
|
|
||||||
_mm_store_ps(buf + 20, v_v1);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void operator()(const uchar* src, uchar* dst, int n) const
|
void operator()(const uchar* src, uchar* dst, int n) const
|
||||||
{
|
{
|
||||||
|
if(useBitExactness)
|
||||||
|
{
|
||||||
|
icvt(src, dst, n);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
int i, j, dcn = dstcn;
|
int i, j, dcn = dstcn;
|
||||||
uchar alpha = ColorChannel<uchar>::max();
|
uchar alpha = ColorChannel<uchar>::max();
|
||||||
float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
|
float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
|
||||||
|
|
||||||
#if CV_SSE2
|
static const softfloat f255(255);
|
||||||
__m128 v_coeffs = _mm_set_ps(100.f/255.f, 1.027450980392157f, 1.388235294117647f, 100.f/255.f);
|
static const softfloat fl = softfloat(100)/f255;
|
||||||
__m128 v_res = _mm_set_ps(0.f, 140.f, 134.f, 0.f);
|
static const softfloat fu = uRange/f255;
|
||||||
#endif
|
static const softfloat fv = vRange/f255;
|
||||||
|
|
||||||
for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
|
for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
|
||||||
{
|
{
|
||||||
int dn = std::min(n - i, (int)BLOCK_SIZE);
|
int dn = std::min(n - i, (int)BLOCK_SIZE);
|
||||||
j = 0;
|
j = 0;
|
||||||
|
|
||||||
#if CV_NEON
|
v_float32x4 luvlm(fl, fu, fv, fl), uvlum(fu, fv, fl, fu), vluvm(fv, fl, fu, fv);
|
||||||
for ( ; j <= (dn - 8) * 3; j += 24)
|
v_float32x4 luvla(0, uLow, vLow, 0), uvlua(uLow, vLow, 0, uLow), vluva(vLow, 0, uLow, vLow);
|
||||||
|
|
||||||
|
static const int nPixBlock = 16;
|
||||||
|
for( ; j < (dn-nPixBlock)*3; j += nPixBlock*3)
|
||||||
{
|
{
|
||||||
uint8x8x3_t v_src = vld3_u8(src + j);
|
v_uint8x16 src8;
|
||||||
uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
|
v_uint16x8 src16_0, src16_1;
|
||||||
v_t1 = vmovl_u8(v_src.val[1]),
|
v_int32x4 src32_00, src32_01, src32_10, src32_11;
|
||||||
v_t2 = vmovl_u8(v_src.val[2]);
|
v_float32x4 m00, m01, m10, m11, a00, a01, a10, a11;
|
||||||
|
|
||||||
float32x4x3_t v_dst;
|
int bufp = 0, srcp = 0;
|
||||||
v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
|
|
||||||
v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_coeff1), v_134);
|
|
||||||
v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_coeff2), v_140);
|
|
||||||
vst3q_f32(buf + j, v_dst);
|
|
||||||
|
|
||||||
v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
|
#define CVTSTORE(n) v_store_aligned(buf + j + (bufp++)*4, v_muladd(v_cvt_f32(src32_##n), m##n, a##n))
|
||||||
v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_coeff1), v_134);
|
#define LOADSTORE(seq1, seq2, seq3, seq4) \
|
||||||
v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_coeff2), v_140);
|
do{\
|
||||||
vst3q_f32(buf + j + 12, v_dst);
|
m00 = seq1##m, m01 = seq2##m, m10 = seq3##m, m11 = seq4##m;\
|
||||||
|
a00 = seq1##a, a01 = seq2##a, a10 = seq3##a, a11 = seq4##a;\
|
||||||
|
src8 = v_load(src + j + (srcp++)*16);\
|
||||||
|
v_expand(src8, src16_0, src16_1);\
|
||||||
|
v_expand(v_reinterpret_as_s16(src16_0), src32_00, src32_01);\
|
||||||
|
v_expand(v_reinterpret_as_s16(src16_1), src32_10, src32_11);\
|
||||||
|
CVTSTORE(00); CVTSTORE(01); CVTSTORE(10); CVTSTORE(11);\
|
||||||
|
}while(0)
|
||||||
|
|
||||||
|
LOADSTORE(luvl, uvlu, vluv, luvl);
|
||||||
|
LOADSTORE(uvlu, vluv, luvl, uvlu);
|
||||||
|
LOADSTORE(vluv, luvl, uvlu, vluv);
|
||||||
|
|
||||||
|
#undef CVTSTORE
|
||||||
|
#undef LOADSTORE
|
||||||
}
|
}
|
||||||
#elif CV_SSE2
|
|
||||||
if (haveSIMD)
|
|
||||||
{
|
|
||||||
for ( ; j <= (dn - 8) * 3; j += 24)
|
|
||||||
{
|
|
||||||
__m128i v_src0 = _mm_loadu_si128((__m128i const *)(src + j));
|
|
||||||
__m128i v_src1 = _mm_loadl_epi64((__m128i const *)(src + j + 16));
|
|
||||||
|
|
||||||
process(_mm_unpacklo_epi8(v_src0, v_zero),
|
|
||||||
_mm_unpackhi_epi8(v_src0, v_zero),
|
|
||||||
_mm_unpacklo_epi8(v_src1, v_zero),
|
|
||||||
v_coeffs, v_res,
|
|
||||||
buf + j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
for( ; j < dn*3; j += 3 )
|
for( ; j < dn*3; j += 3 )
|
||||||
{
|
{
|
||||||
buf[j] = src[j]*(100.f/255.f);
|
buf[j] = src[j]*((float)fl);
|
||||||
buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f);
|
buf[j+1] = (float)(src[j+1]*(float)fu + (float)uLow);
|
||||||
buf[j+2] = (float)(src[j+2]*1.027450980392157f - 140.f);
|
buf[j+2] = (float)(src[j+2]*(float)fv + (float)vLow);
|
||||||
}
|
}
|
||||||
cvt(buf, buf, dn);
|
|
||||||
|
fcvt(buf, buf, dn);
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
#if CV_NEON
|
|
||||||
for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
|
//assume that fcvt returns 1.f as alpha value in case of 4 channels
|
||||||
|
static const int nBlock = 16;
|
||||||
|
v_float32x4 m255(255.f, 255.f, 255.f, 255.f);
|
||||||
|
v_float32x4 f00, f01, f10, f11;
|
||||||
|
v_int32x4 i00, i01, i10, i11;
|
||||||
|
for(; j < dn*3 - nBlock; j += nBlock, dst += nBlock)
|
||||||
{
|
{
|
||||||
float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
|
f00 = v_load_aligned(buf + j + 0); f01 = v_load_aligned(buf + j + 4);
|
||||||
uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
|
f10 = v_load_aligned(buf + j + 8); f11 = v_load_aligned(buf + j + 12);
|
||||||
vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
|
i00 = v_round(f00*m255); i01 = v_round(f01*m255);
|
||||||
uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
|
i10 = v_round(f10*m255); i11 = v_round(f11*m255);
|
||||||
vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
|
v_store(dst, v_pack(v_reinterpret_as_u16(v_pack(i00, i01)),
|
||||||
uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
|
v_reinterpret_as_u16(v_pack(i10, i11))));
|
||||||
vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
|
|
||||||
|
|
||||||
if (dcn == 4)
|
|
||||||
{
|
|
||||||
uint8x8x4_t v_dst;
|
|
||||||
v_dst.val[0] = v_dst0;
|
|
||||||
v_dst.val[1] = v_dst1;
|
|
||||||
v_dst.val[2] = v_dst2;
|
|
||||||
v_dst.val[3] = v_alpha;
|
|
||||||
vst4_u8(dst, v_dst);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
uint8x8x3_t v_dst;
|
|
||||||
v_dst.val[0] = v_dst0;
|
|
||||||
v_dst.val[1] = v_dst1;
|
|
||||||
v_dst.val[2] = v_dst2;
|
|
||||||
vst3_u8(dst, v_dst);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#elif CV_SSE2
|
|
||||||
if (dcn == 3 && haveSIMD)
|
|
||||||
{
|
|
||||||
for ( ; j <= (dn * 3 - 16); j += 16, dst += 16)
|
|
||||||
{
|
|
||||||
__m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale);
|
|
||||||
__m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale);
|
|
||||||
__m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale);
|
|
||||||
__m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale);
|
|
||||||
|
|
||||||
__m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0),
|
|
||||||
_mm_cvtps_epi32(v_src1));
|
|
||||||
__m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2),
|
|
||||||
_mm_cvtps_epi32(v_src3));
|
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1));
|
|
||||||
}
|
|
||||||
|
|
||||||
int jr = j % 3;
|
|
||||||
if (jr)
|
|
||||||
dst -= jr, j -= jr;
|
|
||||||
}
|
|
||||||
else if (dcn == 4 && haveSIMD)
|
|
||||||
{
|
|
||||||
for ( ; j <= (dn * 3 - 12); j += 12, dst += 16)
|
|
||||||
{
|
|
||||||
__m128 v_buf0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale);
|
|
||||||
__m128 v_buf1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale);
|
|
||||||
__m128 v_buf2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale);
|
|
||||||
|
|
||||||
__m128 v_ba0 = _mm_unpackhi_ps(v_buf0, v_alpha);
|
|
||||||
__m128 v_ba1 = _mm_unpacklo_ps(v_buf2, v_alpha);
|
|
||||||
|
|
||||||
__m128i v_src0 = _mm_cvtps_epi32(_mm_shuffle_ps(v_buf0, v_ba0, 0x44));
|
|
||||||
__m128i v_src1 = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_shuffle_ps(v_ba0, v_buf1, 0x4e)), 0x78);
|
|
||||||
__m128i v_src2 = _mm_cvtps_epi32(_mm_shuffle_ps(v_buf1, v_ba1, 0x4e));
|
|
||||||
__m128i v_src3 = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_shuffle_ps(v_ba1, v_buf2, 0xee)), 0x78);
|
|
||||||
|
|
||||||
__m128i v_dst0 = _mm_packs_epi32(v_src0, v_src1);
|
|
||||||
__m128i v_dst1 = _mm_packs_epi32(v_src2, v_src3);
|
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1));
|
|
||||||
}
|
|
||||||
|
|
||||||
int jr = j % 3;
|
|
||||||
if (jr)
|
|
||||||
dst -= jr, j -= jr;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for( ; j < dn*3; j += 3, dst += dcn )
|
for( ; j < dn*3; j += 3, dst += dcn )
|
||||||
{
|
{
|
||||||
@ -8553,17 +8699,10 @@ struct Luv2RGB_b
|
|||||||
}
|
}
|
||||||
|
|
||||||
int dstcn;
|
int dstcn;
|
||||||
Luv2RGB_f cvt;
|
Luv2RGBfloat fcvt;
|
||||||
|
Luv2RGBinteger icvt;
|
||||||
|
|
||||||
#if CV_NEON
|
bool useBitExactness;
|
||||||
float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140;
|
|
||||||
uint8x8_t v_alpha;
|
|
||||||
#elif CV_SSE2
|
|
||||||
__m128 v_scale;
|
|
||||||
__m128 v_alpha;
|
|
||||||
__m128i v_zero;
|
|
||||||
bool haveSIMD;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef clip
|
#undef clip
|
||||||
|
@ -2160,6 +2160,9 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
|
|||||||
X = 3.f*Y*up*vp;
|
X = 3.f*Y*up*vp;
|
||||||
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
|
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
|
||||||
|
|
||||||
|
//limit X, Y, Z to [0, 2] to fit white point
|
||||||
|
X = clamp(X, 0.f, 2.f); Z = clamp(Z, 0.f, 2.f);
|
||||||
|
|
||||||
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
|
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
|
||||||
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
|
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
|
||||||
float B = fma(X, coeffs[6], fma(Y, coeffs[7], Z * coeffs[8]));
|
float B = fma(X, coeffs[6], fma(Y, coeffs[7], Z * coeffs[8]));
|
||||||
|
@ -2155,6 +2155,9 @@ static int16_t trilinearLUT[TRILINEAR_BASE*TRILINEAR_BASE*TRILINEAR_BASE*8];
|
|||||||
static int16_t RGB2LuvLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8];
|
static int16_t RGB2LuvLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8];
|
||||||
static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow);
|
static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow);
|
||||||
static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow);
|
static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow);
|
||||||
|
static int LuToUp_b[256*256];
|
||||||
|
static int LvToVp_b[256*256];
|
||||||
|
static long long int LvToVpl_b[256*256];
|
||||||
|
|
||||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||||
|
|
||||||
@ -2243,8 +2246,37 @@ static void initLabTabs()
|
|||||||
}
|
}
|
||||||
|
|
||||||
softdouble D65[] = { Xn, softdouble::one(), Zn };
|
softdouble D65[] = { Xn, softdouble::one(), Zn };
|
||||||
softfloat coeffs[9];
|
softfloat dd = (D65[0] + D65[1]*softdouble(15) + D65[2]*softdouble(3));
|
||||||
|
dd = softfloat::one()/max(dd, softfloat::eps());
|
||||||
|
softfloat un = dd*softfloat(13*4)*D65[0];
|
||||||
|
softfloat vn = dd*softfloat(13*9)*D65[1];
|
||||||
|
|
||||||
|
//Luv LUT
|
||||||
|
softfloat oneof4 = softfloat::one()/softfloat(4);
|
||||||
|
|
||||||
|
for(int LL = 0; LL < 256; LL++)
|
||||||
|
{
|
||||||
|
softfloat L = softfloat(LL*100)/f255;
|
||||||
|
for(int uu = 0; uu < 256; uu++)
|
||||||
|
{
|
||||||
|
softfloat u = softfloat(uu)*uRange/f255 + uLow;
|
||||||
|
softfloat up = softfloat(9)*(u + L*un);
|
||||||
|
LuToUp_b[LL*256+uu] = cvRound(up*softfloat(BASE/1024));//1024 is OK, 2048 gave maxerr 3
|
||||||
|
}
|
||||||
|
for(int vv = 0; vv < 256; vv++)
|
||||||
|
{
|
||||||
|
softfloat v = softfloat(vv)*vRange/f255 + vLow;
|
||||||
|
softfloat vp = oneof4/(v + L*vn);
|
||||||
|
if(vp > oneof4) vp = oneof4;
|
||||||
|
if(vp < -oneof4) vp = -oneof4;
|
||||||
|
int ivp = cvRound(vp*softfloat(BASE*1024));
|
||||||
|
LvToVp_b[LL*256+vv] = ivp;
|
||||||
|
int vpl = ivp*LL;
|
||||||
|
LvToVpl_b[LL*256+vv] = (12*13*100*(BASE/1024))*(long long)vpl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
softfloat coeffs[9];
|
||||||
for(int i = 0; i < 3; i++ )
|
for(int i = 0; i < 3; i++ )
|
||||||
{
|
{
|
||||||
coeffs[i*3+2] = RGB2XYZ[i*3 ];
|
coeffs[i*3+2] = RGB2XYZ[i*3 ];
|
||||||
@ -2256,11 +2288,6 @@ static void initLabTabs()
|
|||||||
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
|
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
|
||||||
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
||||||
|
|
||||||
softfloat dd = (D65[0] + D65[1]*softdouble(15) + D65[2]*softdouble(3));
|
|
||||||
dd = softfloat::one()/max(dd, softfloat::eps());
|
|
||||||
softfloat un = dd*softfloat(13*4)*D65[0];
|
|
||||||
softfloat vn = dd*softfloat(13*9)*D65[1];
|
|
||||||
|
|
||||||
//u, v: [-134.0, 220.0], [-140.0, 122.0]
|
//u, v: [-134.0, 220.0], [-140.0, 122.0]
|
||||||
static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16);
|
static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16);
|
||||||
static const softfloat f100(100), lbase((int)LAB_BASE);
|
static const softfloat f100(100), lbase((int)LAB_BASE);
|
||||||
@ -2509,6 +2536,74 @@ int row8uRGB2Luv(const uchar* src_row, uchar *dst_row, int n, int cn, int blue_i
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int row8uLuv2RGB(const uchar* src_row, uchar *dst_row, int n, int cn, int blue_idx, bool srgb)
|
||||||
|
{
|
||||||
|
static const int base_shift = 14;
|
||||||
|
static const int BASE = (1 << base_shift);
|
||||||
|
static const int shift = lab_shift+(base_shift-inv_gamma_shift);
|
||||||
|
int coeffs[9];
|
||||||
|
|
||||||
|
static const softdouble lshift(1 << lab_shift);
|
||||||
|
for(int i = 0; i < 3; i++)
|
||||||
|
{
|
||||||
|
coeffs[i+(blue_idx )*3] = cvRound(lshift*XYZ2RGB[i ]);
|
||||||
|
coeffs[i+ 1*3] = cvRound(lshift*XYZ2RGB[i+3]);
|
||||||
|
coeffs[i+(blue_idx^2)*3] = cvRound(lshift*XYZ2RGB[i+6]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ushort *tab = srgb ? sRGBInvGammaTab_b : linearInvGammaTab_b;
|
||||||
|
|
||||||
|
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2];
|
||||||
|
int C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5];
|
||||||
|
int C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
||||||
|
|
||||||
|
for(int xx = 0; xx < n; xx++)
|
||||||
|
{
|
||||||
|
uchar LL = src_row[xx*3 ];
|
||||||
|
uchar uu = src_row[xx*3 + 1];
|
||||||
|
uchar vv = src_row[xx*3 + 2];
|
||||||
|
|
||||||
|
ushort y = LabToYF_b[LL*2];
|
||||||
|
|
||||||
|
int up = LuToUp_b[LL*256+uu];
|
||||||
|
int vp = LvToVp_b[LL*256+vv];
|
||||||
|
|
||||||
|
long long int xv = ((int)up)*(long long)vp;
|
||||||
|
int x = (int)(xv/BASE);
|
||||||
|
x = y*x/BASE;
|
||||||
|
|
||||||
|
long long int vpl = LvToVpl_b[LL*256+vv];
|
||||||
|
long long int zp = vpl - xv*(255/3);
|
||||||
|
zp /= BASE;
|
||||||
|
long long int zq = zp - (long long)(5*255*BASE);
|
||||||
|
int zm = (int)(y*zq/BASE);
|
||||||
|
int z = zm/256 + zm/65536;
|
||||||
|
|
||||||
|
//limit X, Y, Z to [0, 2] to fit white point
|
||||||
|
x = max(0, min(2*BASE, x)); z = max(0, min(2*BASE, z));
|
||||||
|
|
||||||
|
int ro, go, bo;
|
||||||
|
ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift);
|
||||||
|
go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift);
|
||||||
|
bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift);
|
||||||
|
|
||||||
|
ro = max(0, min((int)INV_GAMMA_TAB_SIZE-1, ro));
|
||||||
|
go = max(0, min((int)INV_GAMMA_TAB_SIZE-1, go));
|
||||||
|
bo = max(0, min((int)INV_GAMMA_TAB_SIZE-1, bo));
|
||||||
|
|
||||||
|
ro = tab[ro];
|
||||||
|
go = tab[go];
|
||||||
|
bo = tab[bo];
|
||||||
|
|
||||||
|
dst_row[xx*cn ] = saturate_cast<uchar>(bo);
|
||||||
|
dst_row[xx*cn + 1] = saturate_cast<uchar>(go);
|
||||||
|
dst_row[xx*cn + 2] = saturate_cast<uchar>(ro);
|
||||||
|
if(cn == 4) dst_row[xx*cn + 3] = 255;
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int row8uLabChoose(const uchar* src_row, uchar *dst_row, int n, bool forward, int blue_idx, bool srgb)
|
int row8uLabChoose(const uchar* src_row, uchar *dst_row, int n, bool forward, int blue_idx, bool srgb)
|
||||||
{
|
{
|
||||||
@ -2518,6 +2613,14 @@ int row8uLabChoose(const uchar* src_row, uchar *dst_row, int n, bool forward, in
|
|||||||
return row8uLab2RGB(src_row, dst_row, n, 3, blue_idx, srgb);
|
return row8uLab2RGB(src_row, dst_row, n, 3, blue_idx, srgb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int row8uLuvChoose(const uchar* src_row, uchar *dst_row, int n, bool forward, int blue_idx, bool srgb)
|
||||||
|
{
|
||||||
|
if(forward)
|
||||||
|
return row8uRGB2Luv(src_row, dst_row, n, 3, blue_idx);
|
||||||
|
else
|
||||||
|
return row8uLuv2RGB(src_row, dst_row, n, 3, blue_idx, srgb);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST(Imgproc_ColorLab_Full, bitExactness)
|
TEST(Imgproc_ColorLab_Full, bitExactness)
|
||||||
{
|
{
|
||||||
@ -2605,9 +2708,13 @@ TEST(Imgproc_ColorLab_Full, bitExactness)
|
|||||||
|
|
||||||
TEST(Imgproc_ColorLuv_Full, bitExactness)
|
TEST(Imgproc_ColorLuv_Full, bitExactness)
|
||||||
{
|
{
|
||||||
/* to be expanded by more codes when bit-exactness is done for them */
|
int codes[] = { CV_BGR2Luv, CV_RGB2Luv, CV_LBGR2Luv, CV_LRGB2Luv,
|
||||||
int codes[] = { CV_BGR2Luv, CV_RGB2Luv };
|
CV_Luv2BGR, CV_Luv2RGB, CV_Luv2LBGR, CV_Luv2LRGB};
|
||||||
string names[] = { "CV_BGR2Luv", "CV_RGB2Luv" };
|
string names[] = { "CV_BGR2Luv", "CV_RGB2Luv", "CV_LBGR2Luv", "CV_LRGB2Luv",
|
||||||
|
"CV_Luv2BGR", "CV_Luv2RGB", "CV_Luv2LBGR", "CV_Luv2LRGB" };
|
||||||
|
/* to be enabled when bit-exactness is done for other codes */
|
||||||
|
bool codeEnabled[] = { true, true, false, false, true, true, true, true };
|
||||||
|
|
||||||
size_t nCodes = sizeof(codes)/sizeof(codes[0]);
|
size_t nCodes = sizeof(codes)/sizeof(codes[0]);
|
||||||
|
|
||||||
// need to be recalculated each time we change Luv algorithms, RNG or test system
|
// need to be recalculated each time we change Luv algorithms, RNG or test system
|
||||||
@ -2615,6 +2722,15 @@ TEST(Imgproc_ColorLuv_Full, bitExactness)
|
|||||||
uint32_t hashes[] = {
|
uint32_t hashes[] = {
|
||||||
0x9d4d983a, 0xd3d7b220, 0xd503b661, 0x73581d9b, 0x3beec8a6, 0xea6dfc16, 0xc867f4cd, 0x2c97f43a,
|
0x9d4d983a, 0xd3d7b220, 0xd503b661, 0x73581d9b, 0x3beec8a6, 0xea6dfc16, 0xc867f4cd, 0x2c97f43a,
|
||||||
0x8152fbc9, 0xd7e764a6, 0x5e01f9a3, 0x53e8961e, 0x6a64f1f7, 0x4fa89a44, 0x67096871, 0x4f3bce87,
|
0x8152fbc9, 0xd7e764a6, 0x5e01f9a3, 0x53e8961e, 0x6a64f1f7, 0x4fa89a44, 0x67096871, 0x4f3bce87,
|
||||||
|
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
|
||||||
|
0xec311a14, 0x995efefc, 0xf71b590b, 0xc1edfce7, 0x67b2b2e2, 0xe6d7f90d, 0xbcbaff5c, 0xd86ae19c,
|
||||||
|
0x3e8e4647, 0x53f1a5e3, 0x60dfb6ca, 0xcda851fe, 0xd91084b3, 0xe361bf6f, 0x90fe66ed, 0xb19c5b89,
|
||||||
|
|
||||||
|
0x190508ec, 0xc7764e22, 0x19b042a8, 0x2db4c5d8, 0x6e1cfd1d, 0x39bddd51, 0x942714ed, 0x19444d39,
|
||||||
|
0xed16e206, 0xc4102784, 0x590075fe, 0xaaef2ec6, 0xbeb84149, 0x8da31e4f, 0x7cbe7d77, 0x1c90b30a,
|
||||||
};
|
};
|
||||||
|
|
||||||
RNG rng(0);
|
RNG rng(0);
|
||||||
@ -2622,10 +2738,11 @@ TEST(Imgproc_ColorLuv_Full, bitExactness)
|
|||||||
bool next = true;
|
bool next = true;
|
||||||
for(size_t c = 0; next && c < nCodes; c++)
|
for(size_t c = 0; next && c < nCodes; c++)
|
||||||
{
|
{
|
||||||
|
if(!codeEnabled[c]) continue;
|
||||||
size_t v = c;
|
size_t v = c;
|
||||||
int blueIdx = (v % 2 != 0) ? 2 : 0; v /=2;
|
int blueIdx = (v % 2 != 0) ? 2 : 0; v /=2;
|
||||||
/* bool srgb = (v % 2 == 0); v /= 2; */
|
bool srgb = (v % 2 == 0); v /= 2;
|
||||||
/* bool forward = (v % 2 == 0); */
|
bool forward = (v % 2 == 0);
|
||||||
|
|
||||||
for(int iter = 0; next && iter < nIterations; iter++)
|
for(int iter = 0; next && iter < nIterations; iter++)
|
||||||
{
|
{
|
||||||
@ -2648,7 +2765,7 @@ TEST(Imgproc_ColorLuv_Full, bitExactness)
|
|||||||
uchar* probeRow = probe.ptr(y);
|
uchar* probeRow = probe.ptr(y);
|
||||||
uchar* resultRow = result.ptr(y);
|
uchar* resultRow = result.ptr(y);
|
||||||
|
|
||||||
row8uRGB2Luv(probeRow, goldRow, probe.cols, 3, blueIdx);
|
row8uLuvChoose(probeRow, goldRow, probe.cols, forward, blueIdx, srgb);
|
||||||
|
|
||||||
for(int x = 0; next && x < probe.cols; x++)
|
for(int x = 0; next && x < probe.cols; x++)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user