From 33a3fba2d1d448166e03a4f83864adc33a657af9 Mon Sep 17 00:00:00 2001
From: Olexa Bilaniuk <obilaniu@gmail.com>
Date: Mon, 12 Jan 2015 04:58:07 -0500
Subject: [PATCH] Work on LevMarq code.

Refactoring of Cholesky decomposition.
Fix for memory corruption bug.
LevMarq as a whole still non-functional.
---
 modules/calib3d/src/rhorefc.cpp | 349 ++++++++++++++++++++------------
 modules/calib3d/src/rhorefc.h   |   1 -
 2 files changed, 218 insertions(+), 132 deletions(-)

diff --git a/modules/calib3d/src/rhorefc.cpp b/modules/calib3d/src/rhorefc.cpp
index cda70f4162..1da916d9b1 100644
--- a/modules/calib3d/src/rhorefc.cpp
+++ b/modules/calib3d/src/rhorefc.cpp
@@ -132,24 +132,25 @@ static inline unsigned sacCalcIterBound(double   confidence,
 static inline void   hFuncRefC(float* packedPoints, float* H);
 static inline int    sacCanRefine(RHO_HEST_REFC* p);
 static inline void   sacRefine(RHO_HEST_REFC* p);
-static inline void   sacCalcJtMats(float     (* restrict JtJ)[8],
-                                   float*       restrict Jte,
-                                   float*       restrict Sp,
-                                   const float* restrict H,
-                                   const float* restrict src,
-                                   const float* restrict dst,
-                                   const char*  restrict inl,
-                                   unsigned              N);
-static inline void   sacChol8x8 (const float (*A)[8],
-                                 float       (*L)[8]);
+static inline void   sacCalcJacobianErrors(const float* restrict H,
+                                           const float* restrict src,
+                                           const float* restrict dst,
+                                           const char*  restrict inl,
+                                           unsigned              N,
+                                           float     (* restrict JtJ)[8],
+                                           float*       restrict Jte,
+                                           float*       restrict Sp);
+static inline float  sacDs(const float (*JtJ)[8],
+                           const float*  dH,
+                           const float*  Jte);
+static inline int    sacChol8x8Damped (const float (*A)[8],
+                                       float         lambda,
+                                       float       (*L)[8]);
 static inline void   sacTRInv8x8(const float (*L)[8],
                                  float       (*M)[8]);
 static inline void   sacTRISolve8x8(const float (*L)[8],
                                     const float*  Jte,
                                     float*        dH);
-static inline void   sacScaleDiag8x8(const float (*A)[8],
-                                     float         lambda,
-                                     float       (*B)[8]);
 static inline void   sacSub8x1(float* Hout, const float* H, const float* dH);
 
 
@@ -189,6 +190,11 @@ int  rhoRefCInit(RHO_HEST_REFC* p){
     p->nr.size     = 0;
     p->nr.beta     = 0.0;
 
+    p->lm.ws       = NULL;
+    p->lm.JtJ      = NULL;
+    p->lm.tmp1     = NULL;
+    p->lm.Jte      = NULL;
+
 
     int areAllAllocsSuccessful = p->ctrl.smpl   &&
                                  p->curr.H      &&
@@ -502,10 +508,30 @@ static inline int    sacInitRun(RHO_HEST_REFC* p){
         return 0;
     }
 
+    /**
+     * LevMarq workspace alloc.
+     *
+     * Runs third, consists only in a few conditional mallocs. If malloc fails
+     * we wish to quit before doing any serious work.
+     */
+
+    if(sacIsRefineEnabled(p) || sacIsFinalRefineEnabled(p)){
+        p->lm.ws   = (float*)almalloc(2*8*8*sizeof(float) + 1*8*sizeof(float));
+        if(!p->lm.ws){
+            return 0;
+        }
+
+        p->lm.JtJ  = (float(*)[8])(p->lm.ws + 0*8*8);
+        p->lm.tmp1 = (float(*)[8])(p->lm.ws + 1*8*8);
+        p->lm.Jte  = (float*)     (p->lm.ws + 2*8*8);
+    }else{
+        p->lm.ws = NULL;
+    }
+
     /**
      * Reset scalar per-run state.
      *
-     * Runs third because there's no point in resetting/calculating a large
+     * Runs fourth because there's no point in resetting/calculating a large
      * number of fields if something in the above junk failed.
      */
 
@@ -570,6 +596,13 @@ static inline void   sacFiniRun(RHO_HEST_REFC* p){
 
     p->best.inl = NULL;
     p->curr.inl = NULL;
+
+    /**
+     * ₣ree the Levenberg-Marquardt workspace.
+     */
+
+    alfree(p->lm.ws);
+    p->lm.ws = NULL;
 }
 
 /**
@@ -912,6 +945,11 @@ static inline void   sacEvaluateModelSPRT(RHO_HEST_REFC* p){
  *
  * If a "good" model that is also the best was encountered, update epsilon,
  * since
+ *
+ * Reads:  eval.good, eval.delta, eval.epsilon, eval.t_M, eval.m_S,
+ *         curr.numInl, best.numInl
+ * Writes: eval.epsilon, eval.delta, eval.A, eval.lambdaAccept,
+ *         eval.lambdaReject
  */
 
 static inline void   sacUpdateSPRT(RHO_HEST_REFC* p){
@@ -1032,9 +1070,11 @@ static inline void   sacSaveBestModel(RHO_HEST_REFC* p){
     float*   H      = p->curr.H;
     char*    inl    = p->curr.inl;
     unsigned numInl = p->curr.numInl;
+
     p->curr.H       = p->best.H;
     p->curr.inl     = p->best.inl;
     p->curr.numInl  = p->best.numInl;
+
     p->best.H       = H;
     p->best.inl     = inl;
     p->best.numInl  = numInl;
@@ -1538,18 +1578,47 @@ static inline int    sacCanRefine(RHO_HEST_REFC* p){
  */
 
 static inline void   sacRefine(RHO_HEST_REFC* p){
-    int         i, j;
-    float       S;    /* Sum of squared errors */
-    float       L = 1;/* Lambda of LevMarq */
+    int         i = 0;
+    float       S = 0, newS = 0, dS = 0;/* Sum of squared errors */
+    float       R = 0;          /* R - Parameter */
+    float       L  = 1.0f;  /* Lambda of LevMarq */
+    float dH[8], newH[8];
 
+    /**
+     * Iteratively refine the homography.
+     */
+
+    /* Find initial conditions */
+    sacCalcJacobianErrors(p->best.H, p->arg.src, p->arg.dst, p->arg.inl, p->arg.N,
+                          p->lm.JtJ, p->lm.Jte,  &S);
+
+    /* Levenberg-Marquardt Loop */
     for(i=0;i<MAXLEVMARQITERS;i++){
-        float dH[8];
-        sacCalcJtMats(p->lm.JtJ, p->lm.Jte, &S, p->best.H, p->arg.src, p->arg.dst, p->arg.inl, p->arg.N);
-        sacScaleDiag8x8(p->lm.JtJ, L, p->lm.JtJ);
-        sacChol8x8(p->lm.JtJ, p->lm.JtJ);
-        sacTRInv8x8(p->lm.JtJ, p->lm.JtJ);
-        sacTRISolve8x8(p->lm.JtJ, p->lm.Jte, dH);
-        sacSub8x1(p->best.H, p->best.H, dH);
+        /* The code below becomes an infinite loop when L reeaches infinity.
+        while(sacChol8x8Damped(p->lm.JtJ, L, p->lm.tmp1)){
+            L *= 2.0f;
+        }*/
+        sacChol8x8Damped(p->lm.JtJ, L, p->lm.tmp1);
+        sacTRInv8x8   (p->lm.tmp1, p->lm.tmp1);
+        sacTRISolve8x8(p->lm.tmp1, p->lm.Jte,  dH);
+        sacSub8x1     (newH,       p->best.H,  dH);
+        sacCalcJacobianErrors(newH, p->arg.src, p->arg.dst, p->arg.inl, p->arg.N,
+                              NULL, NULL, &newS);
+        dS = sacDs    (p->lm.JtJ,  dH,         p->lm.Jte);
+        R  = (S - newS)/(fabs(dS) > DBL_EPSILON ? dS : 1);/* Don't understand */
+
+        if(R > 0.75f){
+            L *= 0.5f;
+        }else if(R < 0.25f){
+            L *= 2.0f;
+        }
+
+        if(newS < S){
+            S = newS;
+            memcpy(p->best.H, newH, sizeof(newH));
+            sacCalcJacobianErrors(p->best.H, p->arg.src, p->arg.dst, p->arg.inl, p->arg.N,
+                                  p->lm.JtJ, p->lm.Jte,  &S);
+        }
     }
 }
 
@@ -1564,31 +1633,22 @@ static inline void   sacRefine(RHO_HEST_REFC* p){
  *
  * What this allows is a constant-space implementation of Lev-Marq that can
  * nevertheless be vectorized if need be.
- *
- * @param JtJ
- * @param Jte
- * @param Sp
- * @param H
- * @param src
- * @param dst
- * @param inl
- * @param N
  */
 
-static inline void   sacCalcJtMats(float     (* restrict JtJ)[8],
-                                   float*       restrict Jte,
-                                   float*       restrict Sp,
-                                   const float* restrict H,
-                                   const float* restrict src,
-                                   const float* restrict dst,
-                                   const char*  restrict inl,
-                                   unsigned              N){
+static inline void   sacCalcJacobianErrors(const float* restrict H,
+                                           const float* restrict src,
+                                           const float* restrict dst,
+                                           const char*  restrict inl,
+                                           unsigned              N,
+                                           float     (* restrict JtJ)[8],
+                                           float*       restrict Jte,
+                                           float*       restrict Sp){
     unsigned i;
     float    S;
 
     /* Zero out JtJ, Jte and S */
-    memset(JtJ, 0, 8*8*sizeof(*JtJ));
-    memset(Jte, 0, 8*1*sizeof(*Jte));
+    if(JtJ){memset(JtJ, 0, 8*8*sizeof(float));}
+    if(Jte){memset(Jte, 0, 8*1*sizeof(float));}
     S = 0.0f;
 
     /* Additively compute JtJ and Jte */
@@ -1633,81 +1693,119 @@ static inline void   sacCalcJtMats(float     (* restrict JtJ)[8],
         S            += e;
 
         /* Compute Jacobian */
-        float dxh11 = x          * iW;
-        float dxh12 = y          * iW;
-        float dxh13 =              iW;
-        float dxh21 = 0.0f;
-        float dxh22 = 0.0f;
-        float dxh23 = 0.0f;
-        float dxh31 = -reprojX*x * iW;
-        float dxh32 = -reprojX*y * iW;
+        if(JtJ || Jte){
+            float dxh11 = x          * iW;
+            float dxh12 = y          * iW;
+            float dxh13 =              iW;
+            float dxh21 = 0.0f;
+            float dxh22 = 0.0f;
+            float dxh23 = 0.0f;
+            float dxh31 = -reprojX*x * iW;
+            float dxh32 = -reprojX*y * iW;
 
-        float dyh11 = 0.0f;
-        float dyh12 = 0.0f;
-        float dyh13 = 0.0f;
-        float dyh21 = x          * iW;
-        float dyh22 = y          * iW;
-        float dyh23 =              iW;
-        float dyh31 = -reprojY*x * iW;
-        float dyh32 = -reprojY*y * iW;
+            float dyh11 = 0.0f;
+            float dyh12 = 0.0f;
+            float dyh13 = 0.0f;
+            float dyh21 = x          * iW;
+            float dyh22 = y          * iW;
+            float dyh23 =              iW;
+            float dyh31 = -reprojY*x * iW;
+            float dyh32 = -reprojY*y * iW;
 
-        /* Update Jte:          X             Y   */
-        Jte[0]    += eX   *dxh11 + eY   *dyh11;
-        Jte[1]    += eX   *dxh12 + eY   *dyh12;
-        Jte[2]    += eX   *dxh13 + eY   *dyh13;
-        Jte[3]    += eX   *dxh21 + eY   *dyh21;
-        Jte[4]    += eX   *dxh22 + eY   *dyh22;
-        Jte[5]    += eX   *dxh23 + eY   *dyh23;
-        Jte[6]    += eX   *dxh31 + eY   *dyh31;
-        Jte[7]    += eX   *dxh32 + eY   *dyh32;
+            /* Update Jte:          X             Y   */
+            if(Jte){
+                Jte[0]    += eX   *dxh11 + eY   *dyh11;
+                Jte[1]    += eX   *dxh12 + eY   *dyh12;
+                Jte[2]    += eX   *dxh13 + eY   *dyh13;
+                Jte[3]    += eX   *dxh21 + eY   *dyh21;
+                Jte[4]    += eX   *dxh22 + eY   *dyh22;
+                Jte[5]    += eX   *dxh23 + eY   *dyh23;
+                Jte[6]    += eX   *dxh31 + eY   *dyh31;
+                Jte[7]    += eX   *dxh32 + eY   *dyh32;
+            }
 
-        /* Update JtJ:          X             Y    */
-        JtJ[0][0] += dxh11*dxh11 + dyh11*dyh11;
+            /* Update JtJ:          X             Y    */
+            if(JtJ){
+                JtJ[0][0] += dxh11*dxh11 + dyh11*dyh11;
 
-        JtJ[1][0] += dxh11*dxh12 + dyh11*dyh12;
-        JtJ[1][1] += dxh12*dxh12 + dyh12*dyh12;
+                JtJ[1][0] += dxh11*dxh12 + dyh11*dyh12;
+                JtJ[1][1] += dxh12*dxh12 + dyh12*dyh12;
 
-        JtJ[2][0] += dxh11*dxh13 + dyh11*dyh13;
-        JtJ[2][1] += dxh12*dxh13 + dyh12*dyh13;
-        JtJ[2][2] += dxh13*dxh13 + dyh13*dyh13;
+                JtJ[2][0] += dxh11*dxh13 + dyh11*dyh13;
+                JtJ[2][1] += dxh12*dxh13 + dyh12*dyh13;
+                JtJ[2][2] += dxh13*dxh13 + dyh13*dyh13;
 
-        JtJ[3][0] += dxh11*dxh21 + dyh11*dyh21;
-        JtJ[3][1] += dxh12*dxh21 + dyh12*dyh21;
-        JtJ[3][2] += dxh13*dxh21 + dyh13*dyh21;
-        JtJ[3][3] += dxh21*dxh21 + dyh21*dyh21;
+                JtJ[3][0] += dxh11*dxh21 + dyh11*dyh21;
+                JtJ[3][1] += dxh12*dxh21 + dyh12*dyh21;
+                JtJ[3][2] += dxh13*dxh21 + dyh13*dyh21;
+                JtJ[3][3] += dxh21*dxh21 + dyh21*dyh21;
 
-        JtJ[4][0] += dxh11*dxh22 + dyh11*dyh22;
-        JtJ[4][1] += dxh12*dxh22 + dyh12*dyh22;
-        JtJ[4][2] += dxh13*dxh22 + dyh13*dyh22;
-        JtJ[4][3] += dxh21*dxh22 + dyh21*dyh22;
-        JtJ[4][4] += dxh22*dxh22 + dyh22*dyh22;
+                JtJ[4][0] += dxh11*dxh22 + dyh11*dyh22;
+                JtJ[4][1] += dxh12*dxh22 + dyh12*dyh22;
+                JtJ[4][2] += dxh13*dxh22 + dyh13*dyh22;
+                JtJ[4][3] += dxh21*dxh22 + dyh21*dyh22;
+                JtJ[4][4] += dxh22*dxh22 + dyh22*dyh22;
 
-        JtJ[5][0] += dxh11*dxh23 + dyh11*dyh23;
-        JtJ[5][1] += dxh12*dxh23 + dyh12*dyh23;
-        JtJ[5][2] += dxh13*dxh23 + dyh13*dyh23;
-        JtJ[5][3] += dxh21*dxh23 + dyh21*dyh23;
-        JtJ[5][4] += dxh22*dxh23 + dyh22*dyh23;
-        JtJ[5][5] += dxh23*dxh23 + dyh23*dyh23;
+                JtJ[5][0] += dxh11*dxh23 + dyh11*dyh23;
+                JtJ[5][1] += dxh12*dxh23 + dyh12*dyh23;
+                JtJ[5][2] += dxh13*dxh23 + dyh13*dyh23;
+                JtJ[5][3] += dxh21*dxh23 + dyh21*dyh23;
+                JtJ[5][4] += dxh22*dxh23 + dyh22*dyh23;
+                JtJ[5][5] += dxh23*dxh23 + dyh23*dyh23;
 
-        JtJ[6][0] += dxh11*dxh31 + dyh11*dyh31;
-        JtJ[6][1] += dxh12*dxh31 + dyh12*dyh31;
-        JtJ[6][2] += dxh13*dxh31 + dyh13*dyh31;
-        JtJ[6][3] += dxh21*dxh31 + dyh21*dyh31;
-        JtJ[6][4] += dxh22*dxh31 + dyh22*dyh31;
-        JtJ[6][5] += dxh23*dxh31 + dyh23*dyh31;
-        JtJ[6][6] += dxh31*dxh31 + dyh31*dyh31;
+                JtJ[6][0] += dxh11*dxh31 + dyh11*dyh31;
+                JtJ[6][1] += dxh12*dxh31 + dyh12*dyh31;
+                JtJ[6][2] += dxh13*dxh31 + dyh13*dyh31;
+                JtJ[6][3] += dxh21*dxh31 + dyh21*dyh31;
+                JtJ[6][4] += dxh22*dxh31 + dyh22*dyh31;
+                JtJ[6][5] += dxh23*dxh31 + dyh23*dyh31;
+                JtJ[6][6] += dxh31*dxh31 + dyh31*dyh31;
 
-        JtJ[7][0] += dxh11*dxh32 + dyh11*dyh32;
-        JtJ[7][1] += dxh12*dxh32 + dyh12*dyh32;
-        JtJ[7][2] += dxh13*dxh32 + dyh13*dyh32;
-        JtJ[7][3] += dxh21*dxh32 + dyh21*dyh32;
-        JtJ[7][4] += dxh22*dxh32 + dyh22*dyh32;
-        JtJ[7][5] += dxh23*dxh32 + dyh23*dyh32;
-        JtJ[7][6] += dxh31*dxh32 + dyh31*dyh32;
-        JtJ[7][7] += dxh32*dxh32 + dyh32*dyh32;
+                JtJ[7][0] += dxh11*dxh32 + dyh11*dyh32;
+                JtJ[7][1] += dxh12*dxh32 + dyh12*dyh32;
+                JtJ[7][2] += dxh13*dxh32 + dyh13*dyh32;
+                JtJ[7][3] += dxh21*dxh32 + dyh21*dyh32;
+                JtJ[7][4] += dxh22*dxh32 + dyh22*dyh32;
+                JtJ[7][5] += dxh23*dxh32 + dyh23*dyh32;
+                JtJ[7][6] += dxh31*dxh32 + dyh31*dyh32;
+                JtJ[7][7] += dxh32*dxh32 + dyh32*dyh32;
+            }
+        }
     }
 
-    *Sp = S;
+    if(Sp){*Sp = S;}
+}
+
+/**
+ * Compute the derivative of the rate of change of the SSE.
+ *
+ * Inspired entirely by OpenCV's levmarq.cpp. To be reviewed.
+ */
+
+static inline float  sacDs(const float (*JtJ)[8],
+                           const float*  dH,
+                           const float*  Jte){
+    float tdH[8];
+    int   i, j;
+    float dS = 0;
+
+    /* Perform tdH = -JtJ*dH + 2*Jte. */
+    for(i=0;i<8;i++){
+        tdH[i] = 0;
+
+        for(j=0;j<8;j++){
+            tdH[i] -= JtJ[i][j] * dH[j];
+        }
+
+        tdH[i] += 2*Jte[i];
+    }
+
+    /* Perform dS = dH.dot(tdH).  */
+    for(i=0;i<8;i++){
+        dS += dH[i]*tdH[i];
+    }
+
+    return dS;
 }
 
 /**
@@ -1718,14 +1816,20 @@ static inline void   sacCalcJtMats(float     (* restrict JtJ)[8],
  * A and L can overlap fully (in-place) or not at all, but may not partially
  * overlap.
  *
+ * For damping, the diagonal elements are scaled by 1.0 + lambda.
+ *
+ * Returns 0 if decomposition successful, and non-zero otherwise.
+ *
  * Source: http://en.wikipedia.org/wiki/Cholesky_decomposition#
  * The_Cholesky.E2.80.93Banachiewicz_and_Cholesky.E2.80.93Crout_algorithms
  */
 
-static inline void   sacChol8x8(const float (*A)[8],
-                                float       (*L)[8]){
+static inline int    sacChol8x8Damped(const float (*A)[8],
+                                      float         lambda,
+                                      float       (*L)[8]){
     const register int N = 8;
     int i, j, k;
+    float  lambdap1 = lambda + 1.0f;
     double x;
 
     for(i=0;i<N;i++){/* Row */
@@ -1740,13 +1844,18 @@ static inline void   sacChol8x8(const float (*A)[8],
 
         /* Diagonal element */
         {j = i;
-            x = A[j][j];                       /* Ajj */
+            x = A[j][j] * lambdap1;            /* Ajj */
             for(k=0;k<j;k++){
                 x -= (double)L[j][k] * L[j][k];/* - Sum_{k=0..j-1} Ljk^2 */
             }
+            if(x<0){
+                return 1;
+            }
             L[j][j] = sqrt(x);                 /* Ljj = sqrt( ... ) */
         }
     }
+
+    return 0;
 }
 
 /**
@@ -1998,28 +2107,6 @@ static inline void   sacTRISolve8x8(const float (*L)[8],
     dH[7] =                                                                                            L[7][7]*t[7];
 }
 
-/**
- * Multiply the diagonal elements of the 8x8 matrix A by 1+lambda and store to
- * B.
- *
- * A and B may overlap exactly or not at all; Partial overlap is forbidden.
- */
-
-static inline void   sacScaleDiag8x8(const float (*A)[8],
-                                     float         lambda,
-                                     float       (*B)[8]){
-    float lambdap1 = lambda + 1.0f;
-    int   i;
-
-    if(A != B){
-        memcpy((void*)B, (void*)A, 8*8*sizeof(float));
-    }
-
-    for(i=0;i<8;i++){
-        B[i][i] *= lambdap1;
-    }
-}
-
 /**
  * Subtract dH from H.
  */
diff --git a/modules/calib3d/src/rhorefc.h b/modules/calib3d/src/rhorefc.h
index 3c0014188f..be7012f002 100644
--- a/modules/calib3d/src/rhorefc.h
+++ b/modules/calib3d/src/rhorefc.h
@@ -173,7 +173,6 @@ typedef struct{
         float*             ws;              /* Levenberg-Marqhard Workspace */
         float  (* restrict JtJ)[8];         /* JtJ matrix */
         float  (* restrict tmp1)[8];        /* Temporary 1 */
-        float  (* restrict tmp2)[8];        /* Temporary 2 */
         float*    restrict Jte;             /* Jte vector */
     } lm;
 } RHO_HEST_REFC;