opencv/modules/legacy/src/spilltree.cpp

499 lines
14 KiB
C++
Raw Normal View History

/* Original code has been submitted by Liu Liu.
----------------------------------------------------------------------------------
* Spill-Tree for Approximate KNN Search
* Author: Liu Liu
* mailto: liuliu.1987+opencv@gmail.com
* Refer to Paper:
* An Investigation of Practical Approximate Nearest Neighbor Algorithms
* cvMergeSpillTree TBD
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
* The name of Contributor may not be used to endorse or
* promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*/
#include "precomp.hpp"
#include "_featuretree.h"
struct CvSpillTreeNode
{
bool leaf; // is leaf or not (leaf is the point that have no more child)
bool spill; // is not a non-overlapping point (defeatist search)
CvSpillTreeNode* lc; // left child (<)
CvSpillTreeNode* rc; // right child (>)
int cc; // child count
CvMat* u; // projection vector
CvMat* center; // center
int i; // original index
double r; // radius of remaining feature point
double ub; // upper bound
double lb; // lower bound
double mp; // mean point
double p; // projection value
};
struct CvSpillTree
2012-10-17 15:12:04 +08:00
{
CvSpillTreeNode* root;
CvMat** refmat; // leaf ref matrix
int total; // total leaves
int naive; // under this value, we perform naive search
int type; // mat type
double rho; // under this value, it is a spill tree
double tau; // the overlapping buffer ratio
};
struct CvResult
{
2012-10-17 15:12:04 +08:00
int index;
double distance;
};
// find the farthest node in the "list" from "node"
static inline CvSpillTreeNode*
icvFarthestNode( CvSpillTreeNode* node,
2012-10-17 15:12:04 +08:00
CvSpillTreeNode* list,
int total )
{
double farthest = -1.;
CvSpillTreeNode* result = NULL;
for ( int i = 0; i < total; i++ )
{
double norm = cvNorm( node->center, list->center );
if ( norm > farthest )
2012-10-17 15:12:04 +08:00
{
farthest = norm;
result = list;
}
list = list->rc;
}
return result;
}
// clone a new tree node
static inline CvSpillTreeNode*
icvCloneSpillTreeNode( CvSpillTreeNode* node )
{
CvSpillTreeNode* result = (CvSpillTreeNode*)cvAlloc( sizeof(CvSpillTreeNode) );
memcpy( result, node, sizeof(CvSpillTreeNode) );
return result;
}
// append the link-list of a tree node
static inline void
icvAppendSpillTreeNode( CvSpillTreeNode* node,
2012-10-17 15:12:04 +08:00
CvSpillTreeNode* append )
{
if ( node->lc == NULL )
{
node->lc = node->rc = append;
node->lc->lc = node->rc->rc = NULL;
} else {
append->lc = node->rc;
append->rc = NULL;
node->rc->rc = append;
node->rc = append;
}
node->cc++;
}
#define _dispatch_mat_ptr(x, step) (CV_MAT_DEPTH((x)->type) == CV_32F ? (void*)((x)->data.fl+(step)) : (CV_MAT_DEPTH((x)->type) == CV_64F ? (void*)((x)->data.db+(step)) : (void*)(0)))
static void
icvDFSInitSpillTreeNode( const CvSpillTree* tr,
2012-10-17 15:12:04 +08:00
const int d,
CvSpillTreeNode* node )
{
if ( node->cc <= tr->naive )
{
// already get to a leaf, terminate the recursion.
node->leaf = true;
node->spill = false;
return;
}
// random select a node, then find a farthest node from this one, then find a farthest from that one...
// to approximate the farthest node-pair
static CvRNG rng_state = cvRNG(0xdeadbeef);
int rn = cvRandInt( &rng_state ) % node->cc;
CvSpillTreeNode* lnode = NULL;
CvSpillTreeNode* rnode = node->lc;
for ( int i = 0; i < rn; i++ )
rnode = rnode->rc;
lnode = icvFarthestNode( rnode, node->lc, node->cc );
rnode = icvFarthestNode( lnode, node->lc, node->cc );
// u is the projection vector
node->u = cvCreateMat( 1, d, tr->type );
cvSub( lnode->center, rnode->center, node->u );
cvNormalize( node->u, node->u );
2012-10-17 15:12:04 +08:00
// find the center of node in hyperspace
node->center = cvCreateMat( 1, d, tr->type );
cvZero( node->center );
CvSpillTreeNode* it = node->lc;
for ( int i = 0; i < node->cc; i++ )
{
cvAdd( it->center, node->center, node->center );
it = it->rc;
}
cvConvertScale( node->center, node->center, 1./node->cc );
// project every node to "u", and find the mean point "mp"
it = node->lc;
node->r = -1.;
node->mp = 0;
for ( int i = 0; i < node->cc; i++ )
{
node->mp += ( it->p = cvDotProduct( it->center, node->u ) );
double norm = cvNorm( node->center, it->center );
if ( norm > node->r )
2012-10-17 15:12:04 +08:00
node->r = norm;
it = it->rc;
}
node->mp = node->mp / node->cc;
// overlapping buffer and upper bound, lower bound
double ob = (lnode->p-rnode->p)*tr->tau*.5;
node->ub = node->mp+ob;
node->lb = node->mp-ob;
int sl = 0, l = 0;
int sr = 0, r = 0;
it = node->lc;
for ( int i = 0; i < node->cc; i++ )
{
if ( it->p <= node->ub )
2012-10-17 15:12:04 +08:00
sl++;
if ( it->p >= node->lb )
2012-10-17 15:12:04 +08:00
sr++;
if ( it->p < node->mp )
2012-10-17 15:12:04 +08:00
l++;
else
2012-10-17 15:12:04 +08:00
r++;
it = it->rc;
}
// precision problem, return the node as it is.
if (( l == 0 )||( r == 0 ))
{
cvReleaseMat( &(node->u) );
cvReleaseMat( &(node->center) );
node->leaf = true;
node->spill = false;
return;
}
CvSpillTreeNode* lc = (CvSpillTreeNode*)cvAlloc( sizeof(CvSpillTreeNode) );
memset(lc, 0, sizeof(CvSpillTreeNode));
CvSpillTreeNode* rc = (CvSpillTreeNode*)cvAlloc( sizeof(CvSpillTreeNode) );
memset(rc, 0, sizeof(CvSpillTreeNode));
lc->lc = lc->rc = rc->lc = rc->rc = NULL;
lc->cc = rc->cc = 0;
int undo = cvRound(node->cc*tr->rho);
if (( sl >= undo )||( sr >= undo ))
{
// it is not a spill point (defeatist search disabled)
it = node->lc;
for ( int i = 0; i < node->cc; i++ )
2012-10-17 15:12:04 +08:00
{
CvSpillTreeNode* next = it->rc;
if ( it->p < node->mp )
icvAppendSpillTreeNode( lc, it );
else
icvAppendSpillTreeNode( rc, it );
it = next;
}
node->spill = false;
} else {
// a spill point
it = node->lc;
for ( int i = 0; i < node->cc; i++ )
2012-10-17 15:12:04 +08:00
{
CvSpillTreeNode* next = it->rc;
if ( it->p < node->lb )
icvAppendSpillTreeNode( lc, it );
else if ( it->p > node->ub )
icvAppendSpillTreeNode( rc, it );
else {
CvSpillTreeNode* cit = icvCloneSpillTreeNode( it );
icvAppendSpillTreeNode( lc, it );
icvAppendSpillTreeNode( rc, cit );
}
it = next;
}
node->spill = true;
}
node->lc = lc;
node->rc = rc;
// recursion process
icvDFSInitSpillTreeNode( tr, d, node->lc );
icvDFSInitSpillTreeNode( tr, d, node->rc );
}
static CvSpillTree*
icvCreateSpillTree( const CvMat* raw_data,
2012-10-17 15:12:04 +08:00
const int naive,
const double rho,
const double tau )
{
int n = raw_data->rows;
int d = raw_data->cols;
CvSpillTree* tr = (CvSpillTree*)cvAlloc( sizeof(CvSpillTree) );
tr->root = (CvSpillTreeNode*)cvAlloc( sizeof(CvSpillTreeNode) );
memset(tr->root, 0, sizeof(CvSpillTreeNode));
tr->refmat = (CvMat**)cvAlloc( sizeof(CvMat*)*n );
tr->total = n;
tr->naive = naive;
tr->rho = rho;
tr->tau = tau;
tr->type = raw_data->type;
// tie a link-list to the root node
tr->root->lc = (CvSpillTreeNode*)cvAlloc( sizeof(CvSpillTreeNode) );
memset(tr->root->lc, 0, sizeof(CvSpillTreeNode));
tr->root->lc->center = cvCreateMatHeader( 1, d, tr->type );
cvSetData( tr->root->lc->center, _dispatch_mat_ptr(raw_data, 0), raw_data->step );
tr->refmat[0] = tr->root->lc->center;
tr->root->lc->lc = NULL;
tr->root->lc->leaf = true;
tr->root->lc->i = 0;
CvSpillTreeNode* node = tr->root->lc;
for ( int i = 1; i < n; i++ )
{
CvSpillTreeNode* newnode = (CvSpillTreeNode*)cvAlloc( sizeof(CvSpillTreeNode) );
memset(newnode, 0, sizeof(CvSpillTreeNode));
newnode->center = cvCreateMatHeader( 1, d, tr->type );
cvSetData( newnode->center, _dispatch_mat_ptr(raw_data, i*d), raw_data->step );
tr->refmat[i] = newnode->center;
newnode->lc = node;
newnode->i = i;
newnode->leaf = true;
newnode->rc = NULL;
node->rc = newnode;
node = newnode;
}
tr->root->rc = node;
tr->root->cc = n;
icvDFSInitSpillTreeNode( tr, d, tr->root );
return tr;
}
static void
icvSpillTreeNodeHeapify( CvResult * heap,
2012-10-17 15:12:04 +08:00
int i,
const int k )
{
if ( heap[i].index == -1 )
return;
int l, r, largest = i;
CvResult inp;
do {
i = largest;
r = (i+1)<<1;
l = r-1;
if (( l < k )&&( heap[l].index == -1 ))
largest = l;
else if (( r < k )&&( heap[r].index == -1 ))
largest = r;
2012-10-17 15:12:04 +08:00
else {
if (( l < k )&&( heap[l].distance > heap[i].distance ))
largest = l;
if (( r < k )&&( heap[r].distance > heap[largest].distance ))
largest = r;
}
if ( largest != i )
CV_SWAP( heap[largest], heap[i], inp );
} while ( largest != i );
}
static void
icvSpillTreeDFSearch( CvSpillTree* tr,
2012-10-17 15:12:04 +08:00
CvSpillTreeNode* node,
CvResult* heap,
int* es,
const CvMat* desc,
const int k,
const int emax,
bool * cache)
{
if ((emax > 0)&&( *es >= emax ))
return;
double dist, p=0;
double distance;
while ( node->spill )
{
// defeatist search
if ( !node->leaf )
2012-10-17 15:12:04 +08:00
p = cvDotProduct( node->u, desc );
if ( p < node->lb && node->lc->cc >= k ) // check the number of children larger than k otherwise you'll skip over better neighbor
2012-10-17 15:12:04 +08:00
node = node->lc;
else if ( p > node->ub && node->rc->cc >= k )
2012-10-17 15:12:04 +08:00
node = node->rc;
else
2012-10-17 15:12:04 +08:00
break;
if ( NULL == node )
2012-10-17 15:12:04 +08:00
return;
}
if ( node->leaf )
{
// a leaf, naive search
CvSpillTreeNode* it = node->lc;
for ( int i = 0; i < node->cc; i++ )
{
if ( !cache[it->i] )
{
2012-10-17 15:12:04 +08:00
distance = cvNorm( it->center, desc );
cache[it->i] = true;
2012-10-17 15:12:04 +08:00
if (( heap[0].index == -1)||( distance < heap[0].distance ))
{
CvResult current_result;
current_result.index = it->i;
current_result.distance = distance;
heap[0] = current_result;
2012-10-17 15:12:04 +08:00
icvSpillTreeNodeHeapify( heap, 0, k );
(*es)++;
}
}
it = it->rc;
2012-10-17 15:12:04 +08:00
}
return;
}
dist = cvNorm( node->center, desc );
// impossible case, skip
if (( heap[0].index != -1 )&&( dist-node->r > heap[0].distance ))
return;
p = cvDotProduct( node->u, desc );
// guided dfs
if ( p < node->mp )
{
icvSpillTreeDFSearch( tr, node->lc, heap, es, desc, k, emax, cache );
icvSpillTreeDFSearch( tr, node->rc, heap, es, desc, k, emax, cache );
} else {
icvSpillTreeDFSearch( tr, node->rc, heap, es, desc, k, emax, cache );
icvSpillTreeDFSearch( tr, node->lc, heap, es, desc, k, emax, cache );
}
}
static void
icvFindSpillTreeFeatures( CvSpillTree* tr,
2012-10-17 15:12:04 +08:00
const CvMat* desc,
CvMat* results,
CvMat* dist,
const int k,
const int emax )
{
assert( desc->type == tr->type );
CvResult* heap = (CvResult*)cvAlloc( k*sizeof(heap[0]) );
bool* cache = (bool*)cvAlloc( sizeof(bool)*tr->total );
for ( int j = 0; j < desc->rows; j++ )
{
CvMat _desc = cvMat( 1, desc->cols, desc->type, _dispatch_mat_ptr(desc, j*desc->cols) );
for ( int i = 0; i < k; i++ ) {
CvResult current;
current.index=-1;
current.distance=-1;
2012-10-17 15:12:04 +08:00
heap[i] = current;
}
memset( cache, 0, sizeof(bool)*tr->total );
int es = 0;
icvSpillTreeDFSearch( tr, tr->root, heap, &es, &_desc, k, emax, cache );
CvResult inp;
for ( int i = k-1; i > 0; i-- )
2012-10-17 15:12:04 +08:00
{
CV_SWAP( heap[i], heap[0], inp );
icvSpillTreeNodeHeapify( heap, 0, i );
}
int* rs = results->data.i+j*results->cols;
double* dt = dist->data.db+j*dist->cols;
for ( int i = 0; i < k; i++, rs++, dt++ )
2012-10-17 15:12:04 +08:00
if ( heap[i].index != -1 )
{
*rs = heap[i].index;
*dt = heap[i].distance;
} else
*rs = -1;
}
cvFree( &heap );
cvFree( &cache );
}
static void
icvDFSReleaseSpillTreeNode( CvSpillTreeNode* node )
{
if ( node->leaf )
{
CvSpillTreeNode* it = node->lc;
for ( int i = 0; i < node->cc; i++ )
{
CvSpillTreeNode* s = it;
it = it->rc;
cvFree( &s );
}
} else {
cvReleaseMat( &node->u );
cvReleaseMat( &node->center );
icvDFSReleaseSpillTreeNode( node->lc );
icvDFSReleaseSpillTreeNode( node->rc );
}
cvFree( &node );
}
static void
icvReleaseSpillTree( CvSpillTree** tr )
{
for ( int i = 0; i < (*tr)->total; i++ )
cvReleaseMat( &((*tr)->refmat[i]) );
cvFree( &((*tr)->refmat) );
icvDFSReleaseSpillTreeNode( (*tr)->root );
cvFree( tr );
}
class CvSpillTreeWrap : public CvFeatureTree {
CvSpillTree* tr;
public:
CvSpillTreeWrap(const CvMat* raw_data,
2012-10-17 15:12:04 +08:00
const int naive,
const double rho,
const double tau) {
tr = icvCreateSpillTree(raw_data, naive, rho, tau);
}
~CvSpillTreeWrap() {
icvReleaseSpillTree(&tr);
}
void FindFeatures(const CvMat* desc, int k, int emax, CvMat* results, CvMat* dist) {
icvFindSpillTreeFeatures(tr, desc, results, dist, k, emax);
}
};
CvFeatureTree* cvCreateSpillTree( const CvMat* raw_data,
2012-10-17 15:12:04 +08:00
const int naive,
const double rho,
const double tau ) {
return new CvSpillTreeWrap(raw_data, naive, rho, tau);
}