mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 14:13:15 +08:00
Add a new method for initializing KMeans centers that leads to better clusters and thus better retrieval when final centers have to be existing keypoints instead of clusters barycenters.
This commit is contained in:
parent
65b020f0d3
commit
112d63ae96
@ -107,6 +107,7 @@ enum flann_centers_init_t
|
||||
FLANN_CENTERS_RANDOM = 0,
|
||||
FLANN_CENTERS_GONZALES = 1,
|
||||
FLANN_CENTERS_KMEANSPP = 2,
|
||||
FLANN_CENTERS_GROUPWISE = 3,
|
||||
|
||||
// deprecated constants, should use the FLANN_CENTERS_* ones instead
|
||||
CENTERS_RANDOM = 0,
|
||||
|
@ -257,6 +257,84 @@ private:
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Chooses the initial centers in a way inspired by Gonzales (by Pierre-Emmanuel Viel):
|
||||
* select the first point of the list as a candidate, then parse the points list. If another
|
||||
* point is further than current candidate from the other centers, test if it is a good center
|
||||
* of a local aggregation. If it is, replace current candidate by this point. And so on...
|
||||
*
|
||||
* Used with KMeansIndex that computes centers coordinates by averaging positions of clusters points,
|
||||
* this doesn't make a real difference with previous methods. But used with HierarchicalClusteringIndex
|
||||
* class that pick centers among existing points instead of computing the barycenters, there is a real
|
||||
* improvement.
|
||||
*
|
||||
* Params:
|
||||
* k = number of centers
|
||||
* vecs = the dataset of points
|
||||
* indices = indices in the dataset
|
||||
* Returns:
|
||||
*/
|
||||
void GroupWiseCenterChooser(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
|
||||
{
|
||||
const float kSpeedUpFactor = 1.3f;
|
||||
|
||||
int n = indices_length;
|
||||
|
||||
DistanceType* closestDistSq = new DistanceType[n];
|
||||
|
||||
// Choose one random center and set the closestDistSq values
|
||||
int index = rand_int(n);
|
||||
assert(index >=0 && index < n);
|
||||
centers[0] = dsindices[index];
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
|
||||
}
|
||||
|
||||
|
||||
// Choose each center
|
||||
int centerCount;
|
||||
for (centerCount = 1; centerCount < k; centerCount++) {
|
||||
|
||||
// Repeat several trials
|
||||
double bestNewPot = -1;
|
||||
int bestNewIndex = 0;
|
||||
DistanceType furthest = 0;
|
||||
for (index = 0; index < n; index++) {
|
||||
|
||||
// We will test only the potential of the points further than current candidate
|
||||
if( closestDistSq[index] > kSpeedUpFactor * (float)furthest ) {
|
||||
|
||||
// Compute the new potential
|
||||
double newPot = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols)
|
||||
, closestDistSq[i] );
|
||||
}
|
||||
|
||||
// Store the best result
|
||||
if ((bestNewPot < 0)||(newPot <= bestNewPot)) {
|
||||
bestNewPot = newPot;
|
||||
bestNewIndex = index;
|
||||
furthest = closestDistSq[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add the appropriate center
|
||||
centers[centerCount] = dsindices[bestNewIndex];
|
||||
for (int i = 0; i < n; i++) {
|
||||
closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols)
|
||||
, closestDistSq[i] );
|
||||
}
|
||||
}
|
||||
|
||||
centers_length = centerCount;
|
||||
|
||||
delete[] closestDistSq;
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
|
||||
|
||||
@ -290,6 +368,9 @@ public:
|
||||
else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
|
||||
chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp;
|
||||
}
|
||||
else if (centers_init_==FLANN_CENTERS_GROUPWISE) {
|
||||
chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser;
|
||||
}
|
||||
else {
|
||||
throw FLANNException("Unknown algorithm for choosing initial centers.");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user