From 51fc891a5cb0c178c1c9825c8ac29edc758a41e9 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 23 Oct 2017 13:48:14 +0300 Subject: [PATCH 1/7] cvtColor: fixed tables init, moved some tables to heap --- modules/imgproc/src/color.cpp | 58 +++++++++++++---------------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 7a28300f77..c3f82af86a 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -140,31 +140,6 @@ const int CB2GI = -5636; const int CR2GI = -11698; const int CR2RI = 22987; -// computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n -template static void splineBuild(const _Tp* f, int n, _Tp* tab) -{ - _Tp cn = 0; - int i; - tab[0] = tab[1] = (_Tp)0; - - for(i = 1; i < n-1; i++) - { - _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]); - _Tp l = 1/(4 - tab[(i-1)*4]); - tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l; - } - - for(i = n-1; i >= 0; i--) - { - _Tp c = tab[i*4+1] - tab[i*4]*cn; - _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333; - _Tp d = (cn - c)*(_Tp)0.3333333333333333; - tab[i*4] = f[i]; tab[i*4+1] = b; - tab[i*4+2] = c; tab[i*4+3] = d; - cn = c; - } -} - static void splineBuild(const softfloat* f, int n, float* tab) { const softfloat f2(2), f3(3), f4(4); @@ -173,7 +148,7 @@ static void splineBuild(const softfloat* f, int n, float* tab) int i; tab[0] = tab[1] = 0.0f; - for(i = 1; i < n-1; i++) + for(i = 1; i <= n-1; i++) { softfloat t = (f[i+1] - f[i]*f2 + f[i-1])*f3; softfloat l = softfloat::one()/(f4 - sftab[(i-1)*4]); @@ -5845,10 +5820,11 @@ static const softdouble D65[] = {softdouble::fromRaw(0x3fee6a22b3892ee8), softdouble::fromRaw(0x3ff16b8950763a19)}; enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 }; -static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4]; +static float *LabCbrtTab; static const float LabCbrtTabScale = softfloat(LAB_CBRT_TAB_SIZE*2)/softfloat(3); -static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4]; +static float *sRGBGammaTab; +static float *sRGBInvGammaTab; static const float GammaTabScale((int)GAMMA_TAB_SIZE); static ushort sRGBGammaTab_b[256], linearGammaTab_b[256]; @@ -5873,21 +5849,21 @@ enum trilinear_shift = 8 - lab_lut_shift + 1, TRILINEAR_BASE = (1 << trilinear_shift) }; -static int16_t RGB2LabLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8]; +static int16_t *RGB2LabLUT_s16; static int16_t trilinearLUT[TRILINEAR_BASE*TRILINEAR_BASE*TRILINEAR_BASE*8]; static ushort LabToYF_b[256*2]; static const int minABvalue = -8145; -static int abToXZ_b[LAB_BASE*9/4]; +static int *abToXZ_b; // Luv constants static const bool enableRGB2LuvInterpolation = true; static const bool enablePackedRGB2Luv = true; static const bool enablePackedLuv2RGB = true; -static int16_t RGB2LuvLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8]; +static int16_t *RGB2LuvLUT_s16; static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow); static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow); -static int LuToUp_b[256*256]; -static int LvToVp_b[256*256]; -static long long int LvToVpl_b[256*256]; +static int *LuToUp_b; +static int *LvToVp_b; +static long long int *LvToVpl_b; #define clip(value) \ value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value; @@ -5935,6 +5911,7 @@ static void initLabTabs() softfloat x = scale*softfloat(i); f[i] = x < lthresh ? mulAdd(x, lscale, lbias) : cbrt(x); } + LabCbrtTab = new float[LAB_CBRT_TAB_SIZE*4]; splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab); scale = softfloat::one()/softfloat(GammaTabScale); @@ -5944,6 +5921,9 @@ static void initLabTabs() g[i] = applyGamma(x); ig[i] = applyInvGamma(x); } + + sRGBGammaTab = new float[GAMMA_TAB_SIZE*4]; + sRGBInvGammaTab = new float[GAMMA_TAB_SIZE*4]; splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab); splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab); @@ -5999,6 +5979,7 @@ static void initLabTabs() } //Lookup table for a,b to x,z conversion + abToXZ_b = new int[LAB_BASE*9/4]; for(i = minABvalue; i < LAB_BASE*9/4+minABvalue; i++) { int v; @@ -6032,6 +6013,9 @@ static void initLabTabs() */ //Luv LUT + LuToUp_b = new int[256*256]; + LvToVp_b = new int[256*256]; + LvToVpl_b = new long long int[256*256]; for(int LL = 0; LL < 256; LL++) { softfloat L = softfloat(LL*100)/f255; @@ -6145,6 +6129,8 @@ static void initLabTabs() } } } + RGB2LabLUT_s16 = new int16_t[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8]; + RGB2LuvLUT_s16 = new int16_t[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8]; for(int p = 0; p < LAB_LUT_DIM; p++) { for(int q = 0; q < LAB_LUT_DIM; q++) @@ -6199,7 +6185,7 @@ static void initLabTabs() // cx, cy, cz are in [0; LAB_BASE] -static inline void trilinearInterpolate(int cx, int cy, int cz, int16_t* LUT, +static inline void trilinearInterpolate(int cx, int cy, int cz, const int16_t* LUT, int& a, int& b, int& c) { //LUT idx of origin pt of cube @@ -6207,7 +6193,7 @@ static inline void trilinearInterpolate(int cx, int cy, int cz, int16_t* LUT, int ty = cy >> (lab_base_shift - lab_lut_shift); int tz = cz >> (lab_base_shift - lab_lut_shift); - int16_t* baseLUT = &LUT[3*8*tx + (3*8*LAB_LUT_DIM)*ty + (3*8*LAB_LUT_DIM*LAB_LUT_DIM)*tz]; + const int16_t* baseLUT = &LUT[3*8*tx + (3*8*LAB_LUT_DIM)*ty + (3*8*LAB_LUT_DIM*LAB_LUT_DIM)*tz]; int aa[8], bb[8], cc[8]; for(int i = 0; i < 8; i++) { From e75056a084b88ba6d8fb19f0bd4354c0c3b8b399 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 21 Nov 2017 16:49:58 +0300 Subject: [PATCH 2/7] static init --- modules/core/include/opencv2/core/private.hpp | 4 +++ modules/imgproc/src/color.cpp | 26 +++++++++---------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 247cf99e16..eb1b307f24 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -159,6 +159,10 @@ static inline cv::Size cvGetMatSize( const CvMat* mat ) namespace cv { CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0); + +//! Allocate all memory buffers which will not be freed, ease filtering memcheck issues +template +CV_EXPORTS T* allocSingleton(size_t count) { return new T[count]; } } // property implementation macros diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index c3f82af86a..a5eda83892 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -140,23 +140,24 @@ const int CB2GI = -5636; const int CR2GI = -11698; const int CR2RI = 22987; -static void splineBuild(const softfloat* f, int n, float* tab) +static const float * splineBuild(const softfloat* f, size_t n) { + float* tab = cv::allocSingleton(n * 4); const softfloat f2(2), f3(3), f4(4); softfloat cn(0); softfloat* sftab = reinterpret_cast(tab); - int i; tab[0] = tab[1] = 0.0f; - for(i = 1; i <= n-1; i++) + for(size_t i = 1; i < n; i++) { softfloat t = (f[i+1] - f[i]*f2 + f[i-1])*f3; softfloat l = softfloat::one()/(f4 - sftab[(i-1)*4]); sftab[i*4] = l; sftab[i*4+1] = (t - sftab[(i-1)*4+1])*l; } - for(i = n-1; i >= 0; i--) + for(size_t j = 0; j < n; ++j) { + size_t i = n - j - 1; softfloat c = sftab[i*4+1] - sftab[i*4]*cn; softfloat b = f[i+1] - f[i] - (cn + c*f2)/f3; softfloat d = (cn - c)/f3; @@ -164,8 +165,10 @@ static void splineBuild(const softfloat* f, int n, float* tab) sftab[i*4+2] = c; sftab[i*4+3] = d; cn = c; } + return tab; } + // interpolates value of a function at x, 0 <= x <= n using a cubic spline. template static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n) { @@ -5820,11 +5823,11 @@ static const softdouble D65[] = {softdouble::fromRaw(0x3fee6a22b3892ee8), softdouble::fromRaw(0x3ff16b8950763a19)}; enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 }; -static float *LabCbrtTab; +static const float *LabCbrtTab = 0; static const float LabCbrtTabScale = softfloat(LAB_CBRT_TAB_SIZE*2)/softfloat(3); -static float *sRGBGammaTab; -static float *sRGBInvGammaTab; +static const float *sRGBGammaTab = 0; +static const float *sRGBInvGammaTab = 0; static const float GammaTabScale((int)GAMMA_TAB_SIZE); static ushort sRGBGammaTab_b[256], linearGammaTab_b[256]; @@ -5911,8 +5914,7 @@ static void initLabTabs() softfloat x = scale*softfloat(i); f[i] = x < lthresh ? mulAdd(x, lscale, lbias) : cbrt(x); } - LabCbrtTab = new float[LAB_CBRT_TAB_SIZE*4]; - splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab); + LabCbrtTab = splineBuild(f, LAB_CBRT_TAB_SIZE); scale = softfloat::one()/softfloat(GammaTabScale); for(i = 0; i <= GAMMA_TAB_SIZE; i++) @@ -5922,10 +5924,8 @@ static void initLabTabs() ig[i] = applyInvGamma(x); } - sRGBGammaTab = new float[GAMMA_TAB_SIZE*4]; - sRGBInvGammaTab = new float[GAMMA_TAB_SIZE*4]; - splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab); - splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab); + sRGBGammaTab = splineBuild(g, GAMMA_TAB_SIZE); + sRGBInvGammaTab = splineBuild(ig, GAMMA_TAB_SIZE); static const softfloat intScale(255*(1 << gamma_shift)); for(i = 0; i < 256; i++) From b3018ba89e0c7e0da95bcd30cd1a0d65e20b4003 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 21 Nov 2017 17:28:45 +0300 Subject: [PATCH 3/7] LUV tables --- modules/imgproc/src/color.cpp | 88 ++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index a5eda83892..4f1d6636ed 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -5864,9 +5864,12 @@ static const bool enablePackedLuv2RGB = true; static int16_t *RGB2LuvLUT_s16; static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow); static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow); -static int *LuToUp_b; -static int *LvToVp_b; -static long long int *LvToVpl_b; + +static struct LUVLUT_T { + const int *LuToUp_b; + const int *LvToVp_b; + const long long int *LvToVpl_b; +} LUVLUT; #define clip(value) \ value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value; @@ -5896,6 +5899,41 @@ static inline softfloat applyInvGamma(softfloat x) pow(xd, softdouble::one()/gammaPower)*(softdouble::one()+gammaXshift) - gammaXshift); } +static LUVLUT_T initLUTforLUV(int BASE, const softfloat &un, const softfloat &vn) +{ + const softfloat oneof4 = softfloat::one()/softfloat(4); + const softfloat f255(255); + int *LuToUp_b = new int[256*256]; + int *LvToVp_b = new int[256*256]; + long long int *LvToVpl_b = new long long int[256*256]; + for(int LL = 0; LL < 256; LL++) + { + softfloat L = softfloat(LL*100)/f255; + for(int uu = 0; uu < 256; uu++) + { + softfloat u = softfloat(uu)*uRange/f255 + uLow; + softfloat up = softfloat(9)*(u + L*un); + LuToUp_b[LL*256+uu] = cvRound(up*softfloat(BASE/1024));//1024 is OK, 2048 gave maxerr 3 + } + for(int vv = 0; vv < 256; vv++) + { + softfloat v = softfloat(vv)*vRange/f255 + vLow; + softfloat vp = oneof4/(v + L*vn); + if(vp > oneof4) vp = oneof4; + if(vp < -oneof4) vp = -oneof4; + int ivp = cvRound(vp*softfloat(BASE*1024)); + LvToVp_b[LL*256+vv] = ivp; + int vpl = ivp*LL; + LvToVpl_b[LL*256+vv] = (12*13*100*(BASE/1024))*(long long)vpl; + } + } + LUVLUT_T res; + res.LuToUp_b = LuToUp_b; + res.LvToVp_b = LvToVp_b; + res.LvToVpl_b = LvToVpl_b; + return res; +} + static void initLabTabs() { static bool initialized = false; @@ -6002,7 +6040,6 @@ static void initLabTabs() dd = softfloat::one()/max(dd, softfloat::eps()); softfloat un = dd*softfloat(13*4)*D65[0]; softfloat vn = dd*softfloat(13*9)*D65[1]; - softfloat oneof4 = softfloat::one()/softfloat(4); //when XYZ are limited to [0, 2] /* @@ -6013,30 +6050,7 @@ static void initLabTabs() */ //Luv LUT - LuToUp_b = new int[256*256]; - LvToVp_b = new int[256*256]; - LvToVpl_b = new long long int[256*256]; - for(int LL = 0; LL < 256; LL++) - { - softfloat L = softfloat(LL*100)/f255; - for(int uu = 0; uu < 256; uu++) - { - softfloat u = softfloat(uu)*uRange/f255 + uLow; - softfloat up = softfloat(9)*(u + L*un); - LuToUp_b[LL*256+uu] = cvRound(up*softfloat(BASE/1024));//1024 is OK, 2048 gave maxerr 3 - } - for(int vv = 0; vv < 256; vv++) - { - softfloat v = softfloat(vv)*vRange/f255 + vLow; - softfloat vp = oneof4/(v + L*vn); - if(vp > oneof4) vp = oneof4; - if(vp < -oneof4) vp = -oneof4; - int ivp = cvRound(vp*softfloat(BASE*1024)); - LvToVp_b[LL*256+vv] = ivp; - int vpl = ivp*LL; - LvToVpl_b[LL*256+vv] = (12*13*100*(BASE/1024))*(long long)vpl; - } - } + LUVLUT = initLUTforLUV(BASE, un, vn); //try to suppress warning static const bool calcLUT = enableRGB2LabInterpolation || enableRGB2LuvInterpolation; @@ -8411,8 +8425,8 @@ struct Luv2RGBinteger // y : [0, BASE] // up: [-402, 1431.57]*(BASE/1024) // vp: +/- 0.25*BASE*1024 - int up = LuToUp_b[LL*256+uu]; - int vp = LvToVp_b[LL*256+vv]; + int up = LUVLUT.LuToUp_b[LL*256+uu]; + int vp = LUVLUT.LvToVp_b[LL*256+vv]; //X = y*3.f* up/((float)BASE/1024) *vp/((float)BASE*1024); //Z = y*(((12.f*13.f)*((float)LL)*100.f/255.f - up/((float)BASE))*vp/((float)BASE*1024) - 5.f); @@ -8420,7 +8434,7 @@ struct Luv2RGBinteger int x = (int)(xv/BASE); x = y*x/BASE; - long long int vpl = LvToVpl_b[LL*256+vv]; + long long int vpl = LUVLUT.LvToVpl_b[LL*256+vv]; long long int zp = vpl - xv*(255/3); zp /= BASE; long long int zq = zp - (long long)(5*255*BASE); @@ -8460,11 +8474,11 @@ struct Luv2RGBinteger int v = vvstore[i]; int y = LabToYF_b[LL*2]; - int up = LuToUp_b[LL*256+u]; - int vp = LvToVp_b[LL*256+v]; + int up = LUVLUT.LuToUp_b[LL*256+u]; + int vp = LUVLUT.LvToVp_b[LL*256+v]; long long int xv = up*(long long int)vp; - long long int vpl = LvToVpl_b[LL*256+v]; + long long int vpl = LUVLUT.LvToVpl_b[LL*256+v]; long long int zp = vpl - xv*(255/3); zp = zp >> base_shift; long long int zq = zp - (5*255*BASE); @@ -9828,9 +9842,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) static UMat usRGBGammaTab, ucoeffs, uLabCbrtTab; if (srgb && usRGBGammaTab.empty()) - Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, sRGBGammaTab).copyTo(usRGBGammaTab); + Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, const_cast(sRGBGammaTab)).copyTo(usRGBGammaTab); if (!lab && uLabCbrtTab.empty()) - Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, LabCbrtTab).copyTo(uLabCbrtTab); + Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, const_cast(LabCbrtTab)).copyTo(uLabCbrtTab); { float coeffs[9]; @@ -9916,7 +9930,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) static UMat ucoeffs, usRGBInvGammaTab; if (srgb && usRGBInvGammaTab.empty()) - Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, sRGBInvGammaTab).copyTo(usRGBInvGammaTab); + Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, const_cast(sRGBInvGammaTab)).copyTo(usRGBInvGammaTab); { float coeffs[9]; From 2178c5e95e5b2211515ca984e838dfc27ef1d187 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 21 Nov 2017 17:34:10 +0300 Subject: [PATCH 4/7] init ABtoXZ --- modules/imgproc/src/color.cpp | 50 ++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 4f1d6636ed..4a382644b6 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -5856,7 +5856,7 @@ static int16_t *RGB2LabLUT_s16; static int16_t trilinearLUT[TRILINEAR_BASE*TRILINEAR_BASE*TRILINEAR_BASE*8]; static ushort LabToYF_b[256*2]; static const int minABvalue = -8145; -static int *abToXZ_b; +static const int *abToXZ_b; // Luv constants static const bool enableRGB2LuvInterpolation = true; static const bool enablePackedRGB2Luv = true; @@ -5903,9 +5903,9 @@ static LUVLUT_T initLUTforLUV(int BASE, const softfloat &un, const softfloat &vn { const softfloat oneof4 = softfloat::one()/softfloat(4); const softfloat f255(255); - int *LuToUp_b = new int[256*256]; - int *LvToVp_b = new int[256*256]; - long long int *LvToVpl_b = new long long int[256*256]; + int *LuToUp_b = cv::allocSingleton(256*256); + int *LvToVp_b = cv::allocSingleton(256*256); + long long int *LvToVpl_b = cv::allocSingleton(256*256); for(int LL = 0; LL < 256; LL++) { softfloat L = softfloat(LL*100)/f255; @@ -5934,6 +5934,29 @@ static LUVLUT_T initLUTforLUV(int BASE, const softfloat &un, const softfloat &vn return res; } +static int * initLUTforABXZ(int BASE) +{ + int * res = cv::allocSingleton(LAB_BASE*9/4); + for(int i = minABvalue; i < LAB_BASE*9/4+minABvalue; i++) + { + int v; + //6.f/29.f*BASE = 3389.730 + if(i <= 3390) + { + //fxz[k] = (fxz[k] - 16.0f / 116.0f) / 7.787f; + // 7.787f = (29/3)^3/(29*4) + v = i*108/841 - BASE*16/116*108/841; + } + else + { + //fxz[k] = fxz[k] * fxz[k] * fxz[k]; + v = i*i/BASE*i/BASE; + } + res[i-minABvalue] = v; // -1335 <= v <= 88231 + } + return res; +} + static void initLabTabs() { static bool initialized = false; @@ -6017,24 +6040,7 @@ static void initLabTabs() } //Lookup table for a,b to x,z conversion - abToXZ_b = new int[LAB_BASE*9/4]; - for(i = minABvalue; i < LAB_BASE*9/4+minABvalue; i++) - { - int v; - //6.f/29.f*BASE = 3389.730 - if(i <= 3390) - { - //fxz[k] = (fxz[k] - 16.0f / 116.0f) / 7.787f; - // 7.787f = (29/3)^3/(29*4) - v = i*108/841 - BASE*16/116*108/841; - } - else - { - //fxz[k] = fxz[k] * fxz[k] * fxz[k]; - v = i*i/BASE*i/BASE; - } - abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231 - } + abToXZ_b = initLUTforABXZ(BASE); softfloat dd = D65[0] + D65[1]*softdouble(15) + D65[2]*softdouble(3); dd = softfloat::one()/max(dd, softfloat::eps()); From 1c46034166904f2d553f864802ff2adcee3d2059 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 21 Nov 2017 17:54:43 +0300 Subject: [PATCH 5/7] Other buffers --- modules/imgproc/src/color.cpp | 277 +++++++++++++++++----------------- 1 file changed, 142 insertions(+), 135 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 4a382644b6..e8912ba616 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -5852,7 +5852,6 @@ enum trilinear_shift = 8 - lab_lut_shift + 1, TRILINEAR_BASE = (1 << trilinear_shift) }; -static int16_t *RGB2LabLUT_s16; static int16_t trilinearLUT[TRILINEAR_BASE*TRILINEAR_BASE*TRILINEAR_BASE*8]; static ushort LabToYF_b[256*2]; static const int minABvalue = -8145; @@ -5861,15 +5860,19 @@ static const int *abToXZ_b; static const bool enableRGB2LuvInterpolation = true; static const bool enablePackedRGB2Luv = true; static const bool enablePackedLuv2RGB = true; -static int16_t *RGB2LuvLUT_s16; static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow); static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow); +static struct LABLUVLUT_s16_t { + const int16_t *RGB2LabLUT_s16; + const int16_t *RGB2LuvLUT_s16; +} LABLUVLUTs16 = {0, 0}; + static struct LUVLUT_T { const int *LuToUp_b; const int *LvToVp_b; const long long int *LvToVpl_b; -} LUVLUT; +} LUVLUT = {0, 0, 0}; #define clip(value) \ value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value; @@ -5881,6 +5884,12 @@ static const softdouble gammaLowScale = softdouble(323)/softdouble(25); static const softdouble gammaPower = softdouble(12)/softdouble(5); // 2.4 static const softdouble gammaXshift = softdouble(11)/softdouble(200); // 0.055 +static const softfloat lthresh = softfloat(216) / softfloat(24389); // 0.008856f = (6/29)^3 +static const softfloat lscale = softfloat(841) / softfloat(108); // 7.787f = (29/3)^3/(29*4) +static const softfloat lbias = softfloat(16) / softfloat(116); +static const softfloat f255(255); + + static inline softfloat applyGamma(softfloat x) { //return x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4); @@ -5902,7 +5911,6 @@ static inline softfloat applyInvGamma(softfloat x) static LUVLUT_T initLUTforLUV(int BASE, const softfloat &un, const softfloat &vn) { const softfloat oneof4 = softfloat::one()/softfloat(4); - const softfloat f255(255); int *LuToUp_b = cv::allocSingleton(256*256); int *LvToVp_b = cv::allocSingleton(256*256); long long int *LvToVpl_b = cv::allocSingleton(256*256); @@ -5957,16 +5965,135 @@ static int * initLUTforABXZ(int BASE) return res; } +inline void fill_one(int16_t *LAB, const int16_t *LAB_prev, int16_t *LUV, const int16_t *LUV_prev, int p, int q, int r, int _p, int _q, int _r) +{ + do { + int idxold = 0; + idxold += min(p+(_p), (int)(LAB_LUT_DIM-1))*3; + idxold += min(q+(_q), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*3; + idxold += min(r+(_r), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*LAB_LUT_DIM*3; + int idxnew = p*3*8 + q*LAB_LUT_DIM*3*8 + r*LAB_LUT_DIM*LAB_LUT_DIM*3*8+4*(_p)+2*(_q)+(_r); + LAB[idxnew] = LAB_prev[idxold]; + LAB[idxnew+8] = LAB_prev[idxold+1]; + LAB[idxnew+16] = LAB_prev[idxold+2]; + LUV[idxnew] = LUV_prev[idxold]; + LUV[idxnew+8] = LUV_prev[idxold+1]; + LUV[idxnew+16] = LUV_prev[idxold+2]; + } while(0); +} + +static LABLUVLUT_s16_t initLUTforLABLUVs16(const softfloat & un, const softfloat & vn) +{ + int i; + softfloat scaledCoeffs[9], coeffs[9]; + + //RGB2Lab coeffs + softdouble scaleWhite[] = { softdouble::one()/D65[0], + softdouble::one(), + softdouble::one()/D65[2] }; + + for(i = 0; i < 3; i++ ) + { + coeffs[i*3+2] = sRGB2XYZ_D65[i*3+0]; + coeffs[i*3+1] = sRGB2XYZ_D65[i*3+1]; + coeffs[i*3+0] = sRGB2XYZ_D65[i*3+2]; + scaledCoeffs[i*3+0] = sRGB2XYZ_D65[i*3+2] * scaleWhite[i]; + scaledCoeffs[i*3+1] = sRGB2XYZ_D65[i*3+1] * scaleWhite[i]; + scaledCoeffs[i*3+2] = sRGB2XYZ_D65[i*3+0] * scaleWhite[i]; + } + + softfloat S0 = scaledCoeffs[0], S1 = scaledCoeffs[1], S2 = scaledCoeffs[2], + S3 = scaledCoeffs[3], S4 = scaledCoeffs[4], S5 = scaledCoeffs[5], + S6 = scaledCoeffs[6], S7 = scaledCoeffs[7], S8 = scaledCoeffs[8]; + softfloat C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], + C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], + C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; + + //u, v: [-134.0, 220.0], [-140.0, 122.0] + static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16), f500(500), f200(200); + static const softfloat f100(100), f128(128), f256(256), lbase((int)LAB_BASE); + //903.3f = (29/3)^3 + static const softfloat f9033 = softfloat(29*29*29)/softfloat(27); + static const softfloat f9of4 = softfloat(9)/softfloat(4); + static const softfloat f15(15), f3(3); + + AutoBuffer RGB2Labprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3); + AutoBuffer RGB2Luvprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3); + for(int p = 0; p < LAB_LUT_DIM; p++) + { + for(int q = 0; q < LAB_LUT_DIM; q++) + { + for(int r = 0; r < LAB_LUT_DIM; r++) + { + int idx = p*3 + q*LAB_LUT_DIM*3 + r*LAB_LUT_DIM*LAB_LUT_DIM*3; + softfloat R = softfloat(p)/lld; + softfloat G = softfloat(q)/lld; + softfloat B = softfloat(r)/lld; + + R = applyGamma(R); + G = applyGamma(G); + B = applyGamma(B); + + //RGB 2 Lab LUT building + { + softfloat X = R*S0 + G*S1 + B*S2; + softfloat Y = R*S3 + G*S4 + B*S5; + softfloat Z = R*S6 + G*S7 + B*S8; + + softfloat FX = X > lthresh ? cbrt(X) : mulAdd(X, lscale, lbias); + softfloat FY = Y > lthresh ? cbrt(Y) : mulAdd(Y, lscale, lbias); + softfloat FZ = Z > lthresh ? cbrt(Z) : mulAdd(Z, lscale, lbias); + + softfloat L = Y > lthresh ? (f116*FY - f16) : (f9033*Y); + softfloat a = f500 * (FX - FY); + softfloat b = f200 * (FY - FZ); + + RGB2Labprev[idx] = (int16_t)(cvRound(lbase*L/f100)); + RGB2Labprev[idx+1] = (int16_t)(cvRound(lbase*(a + f128)/f256)); + RGB2Labprev[idx+2] = (int16_t)(cvRound(lbase*(b + f128)/f256)); + } + + //RGB 2 Luv LUT building + { + softfloat X = R*C0 + G*C1 + B*C2; + softfloat Y = R*C3 + G*C4 + B*C5; + softfloat Z = R*C6 + G*C7 + B*C8; + + softfloat L = Y < lthresh ? mulAdd(Y, lscale, lbias) : cbrt(Y); + L = L*f116 - f16; + + softfloat d = softfloat(4*13)/max(X + f15 * Y + f3 * Z, softfloat(FLT_EPSILON)); + softfloat u = L*(X*d - un); + softfloat v = L*(f9of4*Y*d - vn); + + RGB2Luvprev[idx ] = (int16_t)cvRound(lbase*L/f100); + RGB2Luvprev[idx+1] = (int16_t)cvRound(lbase*(u-uLow)/uRange); + RGB2Luvprev[idx+2] = (int16_t)cvRound(lbase*(v-vLow)/vRange); + } + } + } + } + + int16_t *RGB2LabLUT_s16 = cv::allocSingleton(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8); + int16_t *RGB2LuvLUT_s16 = cv::allocSingleton(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8); + for(int p = 0; p < LAB_LUT_DIM; p++) + for(int q = 0; q < LAB_LUT_DIM; q++) + for(int r = 0; r < LAB_LUT_DIM; r++) + for (int p_ = 0; p_ < 2; ++p_) + for (int q_ = 0; q_ < 2; ++q_) + for (int r_ = 0; r_ < 2; ++r_) + fill_one(RGB2LabLUT_s16, RGB2Labprev, RGB2LuvLUT_s16, RGB2Luvprev, p, q, r, p_, q_, r_); + LABLUVLUT_s16_t res; + res.RGB2LabLUT_s16 = RGB2LabLUT_s16; + res.RGB2LuvLUT_s16 = RGB2LuvLUT_s16; + return res; +} + static void initLabTabs() { static bool initialized = false; if(!initialized) { - static const softfloat lthresh = softfloat(216) / softfloat(24389); // 0.008856f = (6/29)^3 - static const softfloat lscale = softfloat(841) / softfloat(108); // 7.787f = (29/3)^3/(29*4) - static const softfloat lbias = softfloat(16) / softfloat(116); - static const softfloat f255(255); - softfloat f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1]; softfloat scale = softfloat::one()/softfloat(LabCbrtTabScale); int i; @@ -6062,125 +6189,8 @@ static void initLabTabs() static const bool calcLUT = enableRGB2LabInterpolation || enableRGB2LuvInterpolation; if(calcLUT) { - softfloat scaledCoeffs[9], coeffs[9]; - //RGB2Lab coeffs - softdouble scaleWhite[] = { softdouble::one()/D65[0], - softdouble::one(), - softdouble::one()/D65[2] }; - - for(i = 0; i < 3; i++ ) - { - coeffs[i*3+2] = sRGB2XYZ_D65[i*3+0]; - coeffs[i*3+1] = sRGB2XYZ_D65[i*3+1]; - coeffs[i*3+0] = sRGB2XYZ_D65[i*3+2]; - scaledCoeffs[i*3+0] = sRGB2XYZ_D65[i*3+2] * scaleWhite[i]; - scaledCoeffs[i*3+1] = sRGB2XYZ_D65[i*3+1] * scaleWhite[i]; - scaledCoeffs[i*3+2] = sRGB2XYZ_D65[i*3+0] * scaleWhite[i]; - } - - softfloat S0 = scaledCoeffs[0], S1 = scaledCoeffs[1], S2 = scaledCoeffs[2], - S3 = scaledCoeffs[3], S4 = scaledCoeffs[4], S5 = scaledCoeffs[5], - S6 = scaledCoeffs[6], S7 = scaledCoeffs[7], S8 = scaledCoeffs[8]; - softfloat C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], - C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], - C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; - - //u, v: [-134.0, 220.0], [-140.0, 122.0] - static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16), f500(500), f200(200); - static const softfloat f100(100), f128(128), f256(256), lbase((int)LAB_BASE); - //903.3f = (29/3)^3 - static const softfloat f9033 = softfloat(29*29*29)/softfloat(27); - static const softfloat f9of4 = softfloat(9)/softfloat(4); - static const softfloat f15(15), f3(3); - AutoBuffer RGB2Labprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3); - AutoBuffer RGB2Luvprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3); - for(int p = 0; p < LAB_LUT_DIM; p++) - { - for(int q = 0; q < LAB_LUT_DIM; q++) - { - for(int r = 0; r < LAB_LUT_DIM; r++) - { - int idx = p*3 + q*LAB_LUT_DIM*3 + r*LAB_LUT_DIM*LAB_LUT_DIM*3; - softfloat R = softfloat(p)/lld; - softfloat G = softfloat(q)/lld; - softfloat B = softfloat(r)/lld; - - R = applyGamma(R); - G = applyGamma(G); - B = applyGamma(B); - - //RGB 2 Lab LUT building - { - softfloat X = R*S0 + G*S1 + B*S2; - softfloat Y = R*S3 + G*S4 + B*S5; - softfloat Z = R*S6 + G*S7 + B*S8; - - softfloat FX = X > lthresh ? cbrt(X) : mulAdd(X, lscale, lbias); - softfloat FY = Y > lthresh ? cbrt(Y) : mulAdd(Y, lscale, lbias); - softfloat FZ = Z > lthresh ? cbrt(Z) : mulAdd(Z, lscale, lbias); - - softfloat L = Y > lthresh ? (f116*FY - f16) : (f9033*Y); - softfloat a = f500 * (FX - FY); - softfloat b = f200 * (FY - FZ); - - RGB2Labprev[idx] = (int16_t)(cvRound(lbase*L/f100)); - RGB2Labprev[idx+1] = (int16_t)(cvRound(lbase*(a + f128)/f256)); - RGB2Labprev[idx+2] = (int16_t)(cvRound(lbase*(b + f128)/f256)); - } - - //RGB 2 Luv LUT building - { - softfloat X = R*C0 + G*C1 + B*C2; - softfloat Y = R*C3 + G*C4 + B*C5; - softfloat Z = R*C6 + G*C7 + B*C8; - - softfloat L = Y < lthresh ? mulAdd(Y, lscale, lbias) : cbrt(Y); - L = L*f116 - f16; - - softfloat d = softfloat(4*13)/max(X + f15 * Y + f3 * Z, softfloat(FLT_EPSILON)); - softfloat u = L*(X*d - un); - softfloat v = L*(f9of4*Y*d - vn); - - RGB2Luvprev[idx ] = (int16_t)cvRound(lbase*L/f100); - RGB2Luvprev[idx+1] = (int16_t)cvRound(lbase*(u-uLow)/uRange); - RGB2Luvprev[idx+2] = (int16_t)cvRound(lbase*(v-vLow)/vRange); - } - } - } - } - RGB2LabLUT_s16 = new int16_t[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8]; - RGB2LuvLUT_s16 = new int16_t[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8]; - for(int p = 0; p < LAB_LUT_DIM; p++) - { - for(int q = 0; q < LAB_LUT_DIM; q++) - { - for(int r = 0; r < LAB_LUT_DIM; r++) - { - #define FILL(_p, _q, _r) \ - do {\ - int idxold = 0;\ - idxold += min(p+(_p), (int)(LAB_LUT_DIM-1))*3;\ - idxold += min(q+(_q), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*3;\ - idxold += min(r+(_r), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*LAB_LUT_DIM*3;\ - int idxnew = p*3*8 + q*LAB_LUT_DIM*3*8 + r*LAB_LUT_DIM*LAB_LUT_DIM*3*8+4*(_p)+2*(_q)+(_r);\ - RGB2LabLUT_s16[idxnew] = RGB2Labprev[idxold];\ - RGB2LabLUT_s16[idxnew+8] = RGB2Labprev[idxold+1];\ - RGB2LabLUT_s16[idxnew+16] = RGB2Labprev[idxold+2];\ - RGB2LuvLUT_s16[idxnew] = RGB2Luvprev[idxold];\ - RGB2LuvLUT_s16[idxnew+8] = RGB2Luvprev[idxold+1];\ - RGB2LuvLUT_s16[idxnew+16] = RGB2Luvprev[idxold+2];\ - } while(0) - - FILL(0, 0, 0); FILL(0, 0, 1); - FILL(0, 1, 0); FILL(0, 1, 1); - FILL(1, 0, 0); FILL(1, 0, 1); - FILL(1, 1, 0); FILL(1, 1, 1); - - #undef FILL - } - } - } + LABLUVLUTs16 = initLUTforLABLUVs16(un, vn); for(int16_t p = 0; p < TRILINEAR_BASE; p++) { @@ -6506,7 +6516,7 @@ struct RGB2Lab_f v_uint16x8 uibvec = v_reinterpret_as_u16(ibvec); v_uint16x8 ui_lvec, ui_avec, ui_bvec; - trilinearPackedInterpolate(uirvec, uigvec, uibvec, RGB2LabLUT_s16, ui_lvec, ui_avec, ui_bvec); + trilinearPackedInterpolate(uirvec, uigvec, uibvec, LABLUVLUTs16.RGB2LabLUT_s16, ui_lvec, ui_avec, ui_bvec); v_int16x8 i_lvec = v_reinterpret_as_s16(ui_lvec); v_int16x8 i_avec = v_reinterpret_as_s16(ui_avec); v_int16x8 i_bvec = v_reinterpret_as_s16(ui_bvec); @@ -6547,7 +6557,7 @@ struct RGB2Lab_f int iR = cvRound(R*LAB_BASE), iG = cvRound(G*LAB_BASE), iB = cvRound(B*LAB_BASE); int iL, ia, ib; - trilinearInterpolate(iR, iG, iB, RGB2LabLUT_s16, iL, ia, ib); + trilinearInterpolate(iR, iG, iB, LABLUVLUTs16.RGB2LabLUT_s16, iL, ia, ib); float L = iL*1.0f/LAB_BASE, a = ia*1.0f/LAB_BASE, b = ib*1.0f/LAB_BASE; dst[i] = L*100.0f; @@ -8127,8 +8137,8 @@ struct RGB2Luvinterpolate trilinearInterpolate(R, G, B, RGB2LuvLUT_s16, L, u, v); */ v_uint16x8 l80, u80, v80, l81, u81, v81; - trilinearPackedInterpolate(r80, g80, b80, RGB2LuvLUT_s16, l80, u80, v80); - trilinearPackedInterpolate(r81, g81, b81, RGB2LuvLUT_s16, l81, u81, v81); + trilinearPackedInterpolate(r80, g80, b80, LABLUVLUTs16.RGB2LuvLUT_s16, l80, u80, v80); + trilinearPackedInterpolate(r81, g81, b81, LABLUVLUTs16.RGB2LuvLUT_s16, l81, u81, v81); /* dst[i] = saturate_cast(L/baseDiv); @@ -8154,7 +8164,7 @@ struct RGB2Luvinterpolate R = R*baseDiv, G = G*baseDiv, B = B*baseDiv; int L, u, v; - trilinearInterpolate(R, G, B, RGB2LuvLUT_s16, L, u, v); + trilinearInterpolate(R, G, B, LABLUVLUTs16.RGB2LuvLUT_s16, L, u, v); dst[i] = saturate_cast(L/baseDiv); dst[i+1] = saturate_cast(u/baseDiv); @@ -8182,7 +8192,6 @@ struct RGB2Luv_b && enableBitExactness && enableRGB2LuvInterpolation); - static const softfloat f255(255); #if CV_NEON v_scale_inv = vdupq_n_f32(softfloat::one()/f255); v_scale = vdupq_n_f32(f255/softfloat(100)); @@ -8235,7 +8244,6 @@ struct RGB2Luv_b int i, j, scn = srccn; float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; - static const softfloat f255(255); #if CV_SSE2 __m128 v_coeffs = _mm_set_ps(f255/softfloat(100), f255/vRange, f255/uRange, f255/softfloat(100)); __m128 v_res = _mm_set_ps(0.f, -vLow*f255/vRange, -uLow*f255/uRange, 0.f); @@ -8625,7 +8633,6 @@ struct Luv2RGB_b uchar alpha = ColorChannel::max(); float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; - static const softfloat f255(255); static const softfloat fl = softfloat(100)/f255; static const softfloat fu = uRange/f255; static const softfloat fv = vRange/f255; From 12662e064b34a042ef75d4adb5cb61b509b51090 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 21 Nov 2017 17:54:57 +0300 Subject: [PATCH 6/7] align singleton malloc --- modules/core/include/opencv2/core/private.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index eb1b307f24..232214adef 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -162,7 +162,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un //! Allocate all memory buffers which will not be freed, ease filtering memcheck issues template -CV_EXPORTS T* allocSingleton(size_t count) { return new T[count]; } +CV_EXPORTS T* allocSingleton(size_t count) { return fastMalloc(sizeof(T) * count); } } // property implementation macros From e57f22a3860b25f58b18ff4af7ca89e0459f0b7f Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 21 Nov 2017 18:07:30 +0300 Subject: [PATCH 7/7] Fixed allocSingleton --- modules/core/include/opencv2/core/private.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 232214adef..a849040f94 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -162,7 +162,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un //! Allocate all memory buffers which will not be freed, ease filtering memcheck issues template -CV_EXPORTS T* allocSingleton(size_t count) { return fastMalloc(sizeof(T) * count); } +CV_EXPORTS T* allocSingleton(size_t count) { return static_cast(fastMalloc(sizeof(T) * count)); } } // property implementation macros