Merge pull request #9931 from mshabunin:bss-cleanup

2025-06-12 04:12:52 +08:00 · 2017-11-22 08:13:35 +00:00 · 2017-11-22 08:13:35 +00:00 · a9b5233f67
commit a9b5233f67
parent c3cced0f89 e57f22a386
2 changed files with 236 additions and 219 deletions
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@ -159,6 +159,10 @@ static inline cv::Size cvGetMatSize( const CvMat* mat )
 namespace cv
 {
 CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0);
+
+//! Allocate all memory buffers which will not be freed, ease filtering memcheck issues
+template <typename T>
+CV_EXPORTS T* allocSingleton(size_t count) { return static_cast<T*>(fastMalloc(sizeof(T) * count)); }
 }

 // property implementation macros
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@ -140,48 +140,24 @@ const int CB2GI = -5636;
 const int CR2GI = -11698;
 const int CR2RI = 22987;

-// computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
-template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
-{
-    _Tp cn = 0;
-    int i;
-    tab[0] = tab[1] = (_Tp)0;
-
-    for(i = 1; i < n-1; i++)
-    {
-        _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]);
-        _Tp l = 1/(4 - tab[(i-1)*4]);
-        tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l;
-    }
-
-    for(i = n-1; i >= 0; i--)
-    {
-        _Tp c = tab[i*4+1] - tab[i*4]*cn;
-        _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333;
-        _Tp d = (cn - c)*(_Tp)0.3333333333333333;
-        tab[i*4] = f[i]; tab[i*4+1] = b;
-        tab[i*4+2] = c; tab[i*4+3] = d;
-        cn = c;
-    }
-}
-
-static void splineBuild(const softfloat* f, int n, float* tab)
+static const float * splineBuild(const softfloat* f, size_t n)
 {
+    float* tab = cv::allocSingleton<float>(n * 4);
    const softfloat f2(2), f3(3), f4(4);
    softfloat cn(0);
    softfloat* sftab = reinterpret_cast<softfloat*>(tab);
-    int i;
    tab[0] = tab[1] = 0.0f;

-    for(i = 1; i < n-1; i++)
+    for(size_t i = 1; i < n; i++)
    {
        softfloat t = (f[i+1] - f[i]*f2 + f[i-1])*f3;
        softfloat l = softfloat::one()/(f4 - sftab[(i-1)*4]);
        sftab[i*4] = l; sftab[i*4+1] = (t - sftab[(i-1)*4+1])*l;
    }

-    for(i = n-1; i >= 0; i--)
+    for(size_t j = 0; j < n; ++j)
    {
+        size_t i = n - j - 1;
        softfloat c = sftab[i*4+1] - sftab[i*4]*cn;
        softfloat b = f[i+1] - f[i] - (cn + c*f2)/f3;
        softfloat d = (cn - c)/f3;
@ -189,8 +165,10 @@ static void splineBuild(const softfloat* f, int n, float* tab)
        sftab[i*4+2] = c; sftab[i*4+3] = d;
        cn = c;
    }
+    return tab;
 }

+
 // interpolates value of a function at x, 0 <= x <= n using a cubic spline.
 template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n)
 {
@ -5845,10 +5823,11 @@ static const softdouble D65[] = {softdouble::fromRaw(0x3fee6a22b3892ee8),
                                 softdouble::fromRaw(0x3ff16b8950763a19)};

 enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 };
-static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4];
+static const float *LabCbrtTab = 0;
 static const float LabCbrtTabScale = softfloat(LAB_CBRT_TAB_SIZE*2)/softfloat(3);

-static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4];
+static const float *sRGBGammaTab = 0;
+static const float *sRGBInvGammaTab = 0;
 static const float GammaTabScale((int)GAMMA_TAB_SIZE);

 static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];
@ -5873,21 +5852,27 @@ enum
    trilinear_shift = 8 - lab_lut_shift + 1,
    TRILINEAR_BASE = (1 << trilinear_shift)
 };
-static int16_t RGB2LabLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8];
 static int16_t trilinearLUT[TRILINEAR_BASE*TRILINEAR_BASE*TRILINEAR_BASE*8];
 static ushort LabToYF_b[256*2];
 static const int minABvalue = -8145;
-static int abToXZ_b[LAB_BASE*9/4];
+static const int *abToXZ_b;
 // Luv constants
 static const bool enableRGB2LuvInterpolation = true;
 static const bool enablePackedRGB2Luv = true;
 static const bool enablePackedLuv2RGB = true;
-static int16_t RGB2LuvLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8];
 static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow);
 static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow);
-static int LuToUp_b[256*256];
-static int LvToVp_b[256*256];
-static long long int LvToVpl_b[256*256];
+
+static struct LABLUVLUT_s16_t {
+    const int16_t *RGB2LabLUT_s16;
+    const int16_t *RGB2LuvLUT_s16;
+} LABLUVLUTs16 = {0, 0};
+
+static struct LUVLUT_T {
+    const int *LuToUp_b;
+    const int *LvToVp_b;
+    const long long int *LvToVpl_b;
+} LUVLUT = {0, 0, 0};

 #define clip(value) \
    value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
@ -5899,6 +5884,12 @@ static const softdouble gammaLowScale     = softdouble(323)/softdouble(25);
 static const softdouble gammaPower        = softdouble(12)/softdouble(5);         //  2.4
 static const softdouble gammaXshift       = softdouble(11)/softdouble(200);       // 0.055

+static const softfloat lthresh = softfloat(216) / softfloat(24389); // 0.008856f = (6/29)^3
+static const softfloat lscale  = softfloat(841) / softfloat(108); // 7.787f = (29/3)^3/(29*4)
+static const softfloat lbias = softfloat(16) / softfloat(116);
+static const softfloat f255(255);
+
+
 static inline softfloat applyGamma(softfloat x)
 {
    //return x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4);
@ -5917,16 +5908,192 @@ static inline softfloat applyInvGamma(softfloat x)
                pow(xd, softdouble::one()/gammaPower)*(softdouble::one()+gammaXshift) - gammaXshift);
 }

+static LUVLUT_T initLUTforLUV(int BASE, const softfloat &un, const softfloat &vn)
+{
+    const softfloat oneof4 = softfloat::one()/softfloat(4);
+    int *LuToUp_b = cv::allocSingleton<int>(256*256);
+    int *LvToVp_b = cv::allocSingleton<int>(256*256);
+    long long int *LvToVpl_b = cv::allocSingleton<long long int>(256*256);
+    for(int LL = 0; LL < 256; LL++)
+    {
+        softfloat L = softfloat(LL*100)/f255;
+        for(int uu = 0; uu < 256; uu++)
+        {
+            softfloat u = softfloat(uu)*uRange/f255 + uLow;
+            softfloat up = softfloat(9)*(u + L*un);
+            LuToUp_b[LL*256+uu] = cvRound(up*softfloat(BASE/1024));//1024 is OK, 2048 gave maxerr 3
+        }
+        for(int vv = 0; vv < 256; vv++)
+        {
+            softfloat v = softfloat(vv)*vRange/f255 + vLow;
+            softfloat vp = oneof4/(v + L*vn);
+            if(vp >  oneof4) vp =  oneof4;
+            if(vp < -oneof4) vp = -oneof4;
+            int ivp = cvRound(vp*softfloat(BASE*1024));
+            LvToVp_b[LL*256+vv] = ivp;
+            int vpl = ivp*LL;
+            LvToVpl_b[LL*256+vv] = (12*13*100*(BASE/1024))*(long long)vpl;
+        }
+    }
+    LUVLUT_T res;
+    res.LuToUp_b = LuToUp_b;
+    res.LvToVp_b = LvToVp_b;
+    res.LvToVpl_b = LvToVpl_b;
+    return res;
+}
+
+static int * initLUTforABXZ(int BASE)
+{
+    int * res = cv::allocSingleton<int>(LAB_BASE*9/4);
+    for(int i = minABvalue; i < LAB_BASE*9/4+minABvalue; i++)
+    {
+        int v;
+        //6.f/29.f*BASE = 3389.730
+        if(i <= 3390)
+        {
+            //fxz[k] = (fxz[k] - 16.0f / 116.0f) / 7.787f;
+            // 7.787f = (29/3)^3/(29*4)
+            v = i*108/841 - BASE*16/116*108/841;
+        }
+        else
+        {
+            //fxz[k] = fxz[k] * fxz[k] * fxz[k];
+            v = i*i/BASE*i/BASE;
+        }
+        res[i-minABvalue] = v; // -1335 <= v <= 88231
+    }
+    return res;
+}
+
+inline void fill_one(int16_t *LAB, const int16_t *LAB_prev, int16_t *LUV, const int16_t *LUV_prev, int p, int q, int r, int _p, int _q, int _r)
+{
+    do {
+        int idxold = 0;
+        idxold += min(p+(_p), (int)(LAB_LUT_DIM-1))*3;
+        idxold += min(q+(_q), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*3;
+        idxold += min(r+(_r), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*LAB_LUT_DIM*3;
+        int idxnew = p*3*8 + q*LAB_LUT_DIM*3*8 + r*LAB_LUT_DIM*LAB_LUT_DIM*3*8+4*(_p)+2*(_q)+(_r);
+        LAB[idxnew]    = LAB_prev[idxold];
+        LAB[idxnew+8]  = LAB_prev[idxold+1];
+        LAB[idxnew+16] = LAB_prev[idxold+2];
+        LUV[idxnew]    = LUV_prev[idxold];
+        LUV[idxnew+8]  = LUV_prev[idxold+1];
+        LUV[idxnew+16] = LUV_prev[idxold+2];
+    } while(0);
+}
+
+static LABLUVLUT_s16_t initLUTforLABLUVs16(const softfloat & un, const softfloat & vn)
+{
+    int i;
+    softfloat scaledCoeffs[9], coeffs[9];
+
+    //RGB2Lab coeffs
+    softdouble scaleWhite[] = { softdouble::one()/D65[0],
+                                softdouble::one(),
+                                softdouble::one()/D65[2] };
+
+    for(i = 0; i < 3; i++ )
+    {
+        coeffs[i*3+2] = sRGB2XYZ_D65[i*3+0];
+        coeffs[i*3+1] = sRGB2XYZ_D65[i*3+1];
+        coeffs[i*3+0] = sRGB2XYZ_D65[i*3+2];
+        scaledCoeffs[i*3+0] = sRGB2XYZ_D65[i*3+2] * scaleWhite[i];
+        scaledCoeffs[i*3+1] = sRGB2XYZ_D65[i*3+1] * scaleWhite[i];
+        scaledCoeffs[i*3+2] = sRGB2XYZ_D65[i*3+0] * scaleWhite[i];
+    }
+
+    softfloat S0 = scaledCoeffs[0], S1 = scaledCoeffs[1], S2 = scaledCoeffs[2],
+              S3 = scaledCoeffs[3], S4 = scaledCoeffs[4], S5 = scaledCoeffs[5],
+              S6 = scaledCoeffs[6], S7 = scaledCoeffs[7], S8 = scaledCoeffs[8];
+    softfloat C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
+              C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
+              C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
+
+    //u, v: [-134.0, 220.0], [-140.0, 122.0]
+    static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16), f500(500), f200(200);
+    static const softfloat f100(100), f128(128), f256(256), lbase((int)LAB_BASE);
+    //903.3f = (29/3)^3
+    static const softfloat f9033 = softfloat(29*29*29)/softfloat(27);
+    static const softfloat f9of4 = softfloat(9)/softfloat(4);
+    static const softfloat f15(15), f3(3);
+
+    AutoBuffer<int16_t> RGB2Labprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3);
+    AutoBuffer<int16_t> RGB2Luvprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3);
+    for(int p = 0; p < LAB_LUT_DIM; p++)
+    {
+        for(int q = 0; q < LAB_LUT_DIM; q++)
+        {
+            for(int r = 0; r < LAB_LUT_DIM; r++)
+            {
+                int idx = p*3 + q*LAB_LUT_DIM*3 + r*LAB_LUT_DIM*LAB_LUT_DIM*3;
+                softfloat R = softfloat(p)/lld;
+                softfloat G = softfloat(q)/lld;
+                softfloat B = softfloat(r)/lld;
+
+                R = applyGamma(R);
+                G = applyGamma(G);
+                B = applyGamma(B);
+
+                //RGB 2 Lab LUT building
+                {
+                    softfloat X = R*S0 + G*S1 + B*S2;
+                    softfloat Y = R*S3 + G*S4 + B*S5;
+                    softfloat Z = R*S6 + G*S7 + B*S8;
+
+                    softfloat FX = X > lthresh ? cbrt(X) : mulAdd(X, lscale, lbias);
+                    softfloat FY = Y > lthresh ? cbrt(Y) : mulAdd(Y, lscale, lbias);
+                    softfloat FZ = Z > lthresh ? cbrt(Z) : mulAdd(Z, lscale, lbias);
+
+                    softfloat L = Y > lthresh ? (f116*FY - f16) : (f9033*Y);
+                    softfloat a = f500 * (FX - FY);
+                    softfloat b = f200 * (FY - FZ);
+
+                    RGB2Labprev[idx]   = (int16_t)(cvRound(lbase*L/f100));
+                    RGB2Labprev[idx+1] = (int16_t)(cvRound(lbase*(a + f128)/f256));
+                    RGB2Labprev[idx+2] = (int16_t)(cvRound(lbase*(b + f128)/f256));
+                }
+
+                //RGB 2 Luv LUT building
+                {
+                    softfloat X = R*C0 + G*C1 + B*C2;
+                    softfloat Y = R*C3 + G*C4 + B*C5;
+                    softfloat Z = R*C6 + G*C7 + B*C8;
+
+                    softfloat L = Y < lthresh ? mulAdd(Y, lscale, lbias) : cbrt(Y);
+                    L = L*f116 - f16;
+
+                    softfloat d = softfloat(4*13)/max(X + f15 * Y + f3 * Z, softfloat(FLT_EPSILON));
+                    softfloat u = L*(X*d - un);
+                    softfloat v = L*(f9of4*Y*d - vn);
+
+                    RGB2Luvprev[idx  ] = (int16_t)cvRound(lbase*L/f100);
+                    RGB2Luvprev[idx+1] = (int16_t)cvRound(lbase*(u-uLow)/uRange);
+                    RGB2Luvprev[idx+2] = (int16_t)cvRound(lbase*(v-vLow)/vRange);
+                }
+            }
+        }
+    }
+
+    int16_t *RGB2LabLUT_s16 = cv::allocSingleton<int16_t>(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8);
+    int16_t *RGB2LuvLUT_s16 = cv::allocSingleton<int16_t>(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8);
+    for(int p = 0; p < LAB_LUT_DIM; p++)
+        for(int q = 0; q < LAB_LUT_DIM; q++)
+            for(int r = 0; r < LAB_LUT_DIM; r++)
+                for (int p_ = 0; p_ < 2; ++p_)
+                    for (int q_ = 0; q_ < 2; ++q_)
+                        for (int r_ = 0; r_ < 2; ++r_)
+                            fill_one(RGB2LabLUT_s16, RGB2Labprev, RGB2LuvLUT_s16, RGB2Luvprev, p, q, r, p_, q_, r_);
+    LABLUVLUT_s16_t res;
+    res.RGB2LabLUT_s16 = RGB2LabLUT_s16;
+    res.RGB2LuvLUT_s16 = RGB2LuvLUT_s16;
+    return res;
+}
+
 static void initLabTabs()
 {
    static bool initialized = false;
    if(!initialized)
    {
-        static const softfloat lthresh = softfloat(216) / softfloat(24389); // 0.008856f = (6/29)^3
-        static const softfloat lscale  = softfloat(841) / softfloat(108); // 7.787f = (29/3)^3/(29*4)
-        static const softfloat lbias = softfloat(16) / softfloat(116);
-        static const softfloat f255(255);
-
        softfloat f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1];
        softfloat scale = softfloat::one()/softfloat(LabCbrtTabScale);
        int i;
@ -5935,7 +6102,7 @@ static void initLabTabs()
            softfloat x = scale*softfloat(i);
            f[i] = x < lthresh ? mulAdd(x, lscale, lbias) : cbrt(x);
        }
-        splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab);
+        LabCbrtTab = splineBuild(f, LAB_CBRT_TAB_SIZE);

        scale = softfloat::one()/softfloat(GammaTabScale);
        for(i = 0; i <= GAMMA_TAB_SIZE; i++)
@ -5944,8 +6111,9 @@ static void initLabTabs()
            g[i] = applyGamma(x);
            ig[i] = applyInvGamma(x);
        }
-        splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab);
-        splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab);
+
+        sRGBGammaTab = splineBuild(g, GAMMA_TAB_SIZE);
+        sRGBInvGammaTab = splineBuild(ig, GAMMA_TAB_SIZE);

        static const softfloat intScale(255*(1 << gamma_shift));
        for(i = 0; i < 256; i++)
@ -5999,29 +6167,12 @@ static void initLabTabs()
        }

        //Lookup table for a,b to x,z conversion
-        for(i = minABvalue; i < LAB_BASE*9/4+minABvalue; i++)
-        {
-            int v;
-            //6.f/29.f*BASE = 3389.730
-            if(i <= 3390)
-            {
-                //fxz[k] = (fxz[k] - 16.0f / 116.0f) / 7.787f;
-                // 7.787f = (29/3)^3/(29*4)
-                v = i*108/841 - BASE*16/116*108/841;
-            }
-            else
-            {
-                //fxz[k] = fxz[k] * fxz[k] * fxz[k];
-                v = i*i/BASE*i/BASE;
-            }
-            abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231
-        }
+        abToXZ_b = initLUTforABXZ(BASE);

        softfloat dd = D65[0] + D65[1]*softdouble(15) + D65[2]*softdouble(3);
        dd = softfloat::one()/max(dd, softfloat::eps());
        softfloat un = dd*softfloat(13*4)*D65[0];
        softfloat vn = dd*softfloat(13*9)*D65[1];
-        softfloat oneof4 = softfloat::one()/softfloat(4);

        //when XYZ are limited to [0, 2]
        /*
@ -6032,149 +6183,14 @@ static void initLabTabs()
        */

        //Luv LUT
-        for(int LL = 0; LL < 256; LL++)
-        {
-            softfloat L = softfloat(LL*100)/f255;
-            for(int uu = 0; uu < 256; uu++)
-            {
-                softfloat u = softfloat(uu)*uRange/f255 + uLow;
-                softfloat up = softfloat(9)*(u + L*un);
-                LuToUp_b[LL*256+uu] = cvRound(up*softfloat(BASE/1024));//1024 is OK, 2048 gave maxerr 3
-            }
-            for(int vv = 0; vv < 256; vv++)
-            {
-                softfloat v = softfloat(vv)*vRange/f255 + vLow;
-                softfloat vp = oneof4/(v + L*vn);
-                if(vp >  oneof4) vp =  oneof4;
-                if(vp < -oneof4) vp = -oneof4;
-                int ivp = cvRound(vp*softfloat(BASE*1024));
-                LvToVp_b[LL*256+vv] = ivp;
-                int vpl = ivp*LL;
-                LvToVpl_b[LL*256+vv] = (12*13*100*(BASE/1024))*(long long)vpl;
-            }
-        }
+        LUVLUT = initLUTforLUV(BASE, un, vn);

        //try to suppress warning
        static const bool calcLUT = enableRGB2LabInterpolation || enableRGB2LuvInterpolation;
        if(calcLUT)
        {
-            softfloat scaledCoeffs[9], coeffs[9];

-            //RGB2Lab coeffs
-            softdouble scaleWhite[] = { softdouble::one()/D65[0],
-                                        softdouble::one(),
-                                        softdouble::one()/D65[2] };
-
-            for(i = 0; i < 3; i++ )
-            {
-                coeffs[i*3+2] = sRGB2XYZ_D65[i*3+0];
-                coeffs[i*3+1] = sRGB2XYZ_D65[i*3+1];
-                coeffs[i*3+0] = sRGB2XYZ_D65[i*3+2];
-                scaledCoeffs[i*3+0] = sRGB2XYZ_D65[i*3+2] * scaleWhite[i];
-                scaledCoeffs[i*3+1] = sRGB2XYZ_D65[i*3+1] * scaleWhite[i];
-                scaledCoeffs[i*3+2] = sRGB2XYZ_D65[i*3+0] * scaleWhite[i];
-            }
-
-            softfloat S0 = scaledCoeffs[0], S1 = scaledCoeffs[1], S2 = scaledCoeffs[2],
-                      S3 = scaledCoeffs[3], S4 = scaledCoeffs[4], S5 = scaledCoeffs[5],
-                      S6 = scaledCoeffs[6], S7 = scaledCoeffs[7], S8 = scaledCoeffs[8];
-            softfloat C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
-                      C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
-                      C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
-
-            //u, v: [-134.0, 220.0], [-140.0, 122.0]
-            static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16), f500(500), f200(200);
-            static const softfloat f100(100), f128(128), f256(256), lbase((int)LAB_BASE);
-            //903.3f = (29/3)^3
-            static const softfloat f9033 = softfloat(29*29*29)/softfloat(27);
-            static const softfloat f9of4 = softfloat(9)/softfloat(4);
-            static const softfloat f15(15), f3(3);
-            AutoBuffer<int16_t> RGB2Labprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3);
-            AutoBuffer<int16_t> RGB2Luvprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3);
-            for(int p = 0; p < LAB_LUT_DIM; p++)
-            {
-                for(int q = 0; q < LAB_LUT_DIM; q++)
-                {
-                    for(int r = 0; r < LAB_LUT_DIM; r++)
-                    {
-                        int idx = p*3 + q*LAB_LUT_DIM*3 + r*LAB_LUT_DIM*LAB_LUT_DIM*3;
-                        softfloat R = softfloat(p)/lld;
-                        softfloat G = softfloat(q)/lld;
-                        softfloat B = softfloat(r)/lld;
-
-                        R = applyGamma(R);
-                        G = applyGamma(G);
-                        B = applyGamma(B);
-
-                        //RGB 2 Lab LUT building
-                        {
-                            softfloat X = R*S0 + G*S1 + B*S2;
-                            softfloat Y = R*S3 + G*S4 + B*S5;
-                            softfloat Z = R*S6 + G*S7 + B*S8;
-
-                            softfloat FX = X > lthresh ? cbrt(X) : mulAdd(X, lscale, lbias);
-                            softfloat FY = Y > lthresh ? cbrt(Y) : mulAdd(Y, lscale, lbias);
-                            softfloat FZ = Z > lthresh ? cbrt(Z) : mulAdd(Z, lscale, lbias);
-
-                            softfloat L = Y > lthresh ? (f116*FY - f16) : (f9033*Y);
-                            softfloat a = f500 * (FX - FY);
-                            softfloat b = f200 * (FY - FZ);
-
-                            RGB2Labprev[idx]   = (int16_t)(cvRound(lbase*L/f100));
-                            RGB2Labprev[idx+1] = (int16_t)(cvRound(lbase*(a + f128)/f256));
-                            RGB2Labprev[idx+2] = (int16_t)(cvRound(lbase*(b + f128)/f256));
-                        }
-
-                        //RGB 2 Luv LUT building
-                        {
-                            softfloat X = R*C0 + G*C1 + B*C2;
-                            softfloat Y = R*C3 + G*C4 + B*C5;
-                            softfloat Z = R*C6 + G*C7 + B*C8;
-
-                            softfloat L = Y < lthresh ? mulAdd(Y, lscale, lbias) : cbrt(Y);
-                            L = L*f116 - f16;
-
-                            softfloat d = softfloat(4*13)/max(X + f15 * Y + f3 * Z, softfloat(FLT_EPSILON));
-                            softfloat u = L*(X*d - un);
-                            softfloat v = L*(f9of4*Y*d - vn);
-
-                            RGB2Luvprev[idx  ] = (int16_t)cvRound(lbase*L/f100);
-                            RGB2Luvprev[idx+1] = (int16_t)cvRound(lbase*(u-uLow)/uRange);
-                            RGB2Luvprev[idx+2] = (int16_t)cvRound(lbase*(v-vLow)/vRange);
-                        }
-                    }
-                }
-            }
-            for(int p = 0; p < LAB_LUT_DIM; p++)
-            {
-                for(int q = 0; q < LAB_LUT_DIM; q++)
-                {
-                    for(int r = 0; r < LAB_LUT_DIM; r++)
-                    {
-                        #define FILL(_p, _q, _r) \
-                            do {\
-                                int idxold = 0;\
-                                idxold += min(p+(_p), (int)(LAB_LUT_DIM-1))*3;\
-                                idxold += min(q+(_q), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*3;\
-                                idxold += min(r+(_r), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*LAB_LUT_DIM*3;\
-                                int idxnew = p*3*8 + q*LAB_LUT_DIM*3*8 + r*LAB_LUT_DIM*LAB_LUT_DIM*3*8+4*(_p)+2*(_q)+(_r);\
-                                RGB2LabLUT_s16[idxnew]    = RGB2Labprev[idxold];\
-                                RGB2LabLUT_s16[idxnew+8]  = RGB2Labprev[idxold+1];\
-                                RGB2LabLUT_s16[idxnew+16] = RGB2Labprev[idxold+2];\
-                                RGB2LuvLUT_s16[idxnew]    = RGB2Luvprev[idxold];\
-                                RGB2LuvLUT_s16[idxnew+8]  = RGB2Luvprev[idxold+1];\
-                                RGB2LuvLUT_s16[idxnew+16] = RGB2Luvprev[idxold+2];\
-                            } while(0)
-
-                        FILL(0, 0, 0); FILL(0, 0, 1);
-                        FILL(0, 1, 0); FILL(0, 1, 1);
-                        FILL(1, 0, 0); FILL(1, 0, 1);
-                        FILL(1, 1, 0); FILL(1, 1, 1);
-
-                        #undef FILL
-                    }
-                }
-            }
+            LABLUVLUTs16 = initLUTforLABLUVs16(un, vn);

            for(int16_t p = 0; p < TRILINEAR_BASE; p++)
            {
@ -6199,7 +6215,7 @@ static void initLabTabs()


 // cx, cy, cz are in [0; LAB_BASE]
-static inline void trilinearInterpolate(int cx, int cy, int cz, int16_t* LUT,
+static inline void trilinearInterpolate(int cx, int cy, int cz, const int16_t* LUT,
                                        int& a, int& b, int& c)
 {
    //LUT idx of origin pt of cube
@ -6207,7 +6223,7 @@ static inline void trilinearInterpolate(int cx, int cy, int cz, int16_t* LUT,
    int ty = cy >> (lab_base_shift - lab_lut_shift);
    int tz = cz >> (lab_base_shift - lab_lut_shift);

-    int16_t* baseLUT = &LUT[3*8*tx + (3*8*LAB_LUT_DIM)*ty + (3*8*LAB_LUT_DIM*LAB_LUT_DIM)*tz];
+    const int16_t* baseLUT = &LUT[3*8*tx + (3*8*LAB_LUT_DIM)*ty + (3*8*LAB_LUT_DIM*LAB_LUT_DIM)*tz];
    int aa[8], bb[8], cc[8];
    for(int i = 0; i < 8; i++)
    {
@ -6500,7 +6516,7 @@ struct RGB2Lab_f
                    v_uint16x8 uibvec = v_reinterpret_as_u16(ibvec);

                    v_uint16x8 ui_lvec, ui_avec, ui_bvec;
-                    trilinearPackedInterpolate(uirvec, uigvec, uibvec, RGB2LabLUT_s16, ui_lvec, ui_avec, ui_bvec);
+                    trilinearPackedInterpolate(uirvec, uigvec, uibvec, LABLUVLUTs16.RGB2LabLUT_s16, ui_lvec, ui_avec, ui_bvec);
                    v_int16x8 i_lvec = v_reinterpret_as_s16(ui_lvec);
                    v_int16x8 i_avec = v_reinterpret_as_s16(ui_avec);
                    v_int16x8 i_bvec = v_reinterpret_as_s16(ui_bvec);
@ -6541,7 +6557,7 @@ struct RGB2Lab_f

                int iR = cvRound(R*LAB_BASE), iG = cvRound(G*LAB_BASE), iB = cvRound(B*LAB_BASE);
                int iL, ia, ib;
-                trilinearInterpolate(iR, iG, iB, RGB2LabLUT_s16, iL, ia, ib);
+                trilinearInterpolate(iR, iG, iB, LABLUVLUTs16.RGB2LabLUT_s16, iL, ia, ib);
                float L = iL*1.0f/LAB_BASE, a = ia*1.0f/LAB_BASE, b = ib*1.0f/LAB_BASE;

                dst[i] = L*100.0f;
@ -8121,8 +8137,8 @@ struct RGB2Luvinterpolate
                    trilinearInterpolate(R, G, B, RGB2LuvLUT_s16, L, u, v);
                    */
                v_uint16x8 l80, u80, v80, l81, u81, v81;
-                trilinearPackedInterpolate(r80, g80, b80, RGB2LuvLUT_s16, l80, u80, v80);
-                trilinearPackedInterpolate(r81, g81, b81, RGB2LuvLUT_s16, l81, u81, v81);
+                trilinearPackedInterpolate(r80, g80, b80, LABLUVLUTs16.RGB2LuvLUT_s16, l80, u80, v80);
+                trilinearPackedInterpolate(r81, g81, b81, LABLUVLUTs16.RGB2LuvLUT_s16, l81, u81, v81);

                /*
                    dst[i] = saturate_cast<uchar>(L/baseDiv);
@ -8148,7 +8164,7 @@ struct RGB2Luvinterpolate
            R = R*baseDiv, G = G*baseDiv, B = B*baseDiv;

            int L, u, v;
-            trilinearInterpolate(R, G, B, RGB2LuvLUT_s16, L, u, v);
+            trilinearInterpolate(R, G, B, LABLUVLUTs16.RGB2LuvLUT_s16, L, u, v);

            dst[i] = saturate_cast<uchar>(L/baseDiv);
            dst[i+1] = saturate_cast<uchar>(u/baseDiv);
@ -8176,7 +8192,6 @@ struct RGB2Luv_b
                            && enableBitExactness
                            && enableRGB2LuvInterpolation);

-        static const softfloat f255(255);
        #if CV_NEON
        v_scale_inv = vdupq_n_f32(softfloat::one()/f255);
        v_scale = vdupq_n_f32(f255/softfloat(100));
@ -8229,7 +8244,6 @@ struct RGB2Luv_b
        int i, j, scn = srccn;
        float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];

-        static const softfloat f255(255);
        #if CV_SSE2
        __m128 v_coeffs = _mm_set_ps(f255/softfloat(100), f255/vRange, f255/uRange, f255/softfloat(100));
        __m128 v_res = _mm_set_ps(0.f, -vLow*f255/vRange, -uLow*f255/uRange, 0.f);
@ -8425,8 +8439,8 @@ struct Luv2RGBinteger
        // y : [0, BASE]
        // up: [-402, 1431.57]*(BASE/1024)
        // vp: +/- 0.25*BASE*1024
-        int up = LuToUp_b[LL*256+uu];
-        int vp = LvToVp_b[LL*256+vv];
+        int up = LUVLUT.LuToUp_b[LL*256+uu];
+        int vp = LUVLUT.LvToVp_b[LL*256+vv];
        //X = y*3.f* up/((float)BASE/1024) *vp/((float)BASE*1024);
        //Z = y*(((12.f*13.f)*((float)LL)*100.f/255.f - up/((float)BASE))*vp/((float)BASE*1024) - 5.f);

@ -8434,7 +8448,7 @@ struct Luv2RGBinteger
        int x = (int)(xv/BASE);
        x = y*x/BASE;

-        long long int vpl = LvToVpl_b[LL*256+vv];
+        long long int vpl = LUVLUT.LvToVpl_b[LL*256+vv];
        long long int zp = vpl - xv*(255/3);
        zp /= BASE;
        long long int zq = zp - (long long)(5*255*BASE);
@ -8474,11 +8488,11 @@ struct Luv2RGBinteger
            int v = vvstore[i];
            int y = LabToYF_b[LL*2];

-            int up = LuToUp_b[LL*256+u];
-            int vp = LvToVp_b[LL*256+v];
+            int up = LUVLUT.LuToUp_b[LL*256+u];
+            int vp = LUVLUT.LvToVp_b[LL*256+v];

            long long int xv = up*(long long int)vp;
-            long long int vpl = LvToVpl_b[LL*256+v];
+            long long int vpl = LUVLUT.LvToVpl_b[LL*256+v];
            long long int zp = vpl - xv*(255/3);
            zp = zp >> base_shift;
            long long int zq = zp - (5*255*BASE);
@ -8619,7 +8633,6 @@ struct Luv2RGB_b
        uchar alpha = ColorChannel<uchar>::max();
        float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];

-        static const softfloat f255(255);
        static const softfloat fl = softfloat(100)/f255;
        static const softfloat fu = uRange/f255;
        static const softfloat fv = vRange/f255;
@ -9842,9 +9855,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
            static UMat usRGBGammaTab, ucoeffs, uLabCbrtTab;

            if (srgb && usRGBGammaTab.empty())
-                Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, sRGBGammaTab).copyTo(usRGBGammaTab);
+                Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, const_cast<float*>(sRGBGammaTab)).copyTo(usRGBGammaTab);
            if (!lab && uLabCbrtTab.empty())
-                Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, LabCbrtTab).copyTo(uLabCbrtTab);
+                Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, const_cast<float*>(LabCbrtTab)).copyTo(uLabCbrtTab);

            {
                float coeffs[9];
@ -9930,7 +9943,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
        static UMat ucoeffs, usRGBInvGammaTab;

        if (srgb && usRGBInvGammaTab.empty())
-            Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, sRGBInvGammaTab).copyTo(usRGBInvGammaTab);
+            Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, const_cast<float*>(sRGBInvGammaTab)).copyTo(usRGBInvGammaTab);

        {
            float coeffs[9];