diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h index a823986e09..98ec68a87b 100644 --- a/modules/flann/include/opencv2/flann/kmeans_index.h +++ b/modules/flann/include/opencv2/flann/kmeans_index.h @@ -463,14 +463,10 @@ public: root_[i] = pool_.allocate(); std::memset(root_[i], 0, sizeof(KMeansNode)); - if(is_kdtree_distance::val || is_vector_space_distance::val) { - computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_); - computeClustering(root_[i], indices_[i], (int)size_, branching_,0); - } - else { - computeBitfieldNodeStatistics(root_[i], indices_[i], (unsigned int)size_); - computeBitfieldClustering(root_[i], indices_[i], (int)size_, branching_,0); - } + Distance* dummy = NULL; + computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_, dummy); + + computeClustering(root_[i], indices_[i], (int)size_, branching_,0); } } @@ -829,6 +825,413 @@ private: } + template + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const DistType* identifier) + { + (void)identifier; + computeNodeStatistics(node, indices, indices_length); + } + + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const cvflann::HammingLUT* identifier) + { + (void)identifier; + computeBitfieldNodeStatistics(node, indices, indices_length); + } + + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const cvflann::Hamming* identifier) + { + (void)identifier; + computeBitfieldNodeStatistics(node, indices, indices_length); + } + + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const cvflann::Hamming2* identifier) + { + (void)identifier; + computeBitfieldNodeStatistics(node, indices, indices_length); + } + + + void refineClustering(int* indices, int indices_length, int branching, CentersType** centers, + std::vector& radiuses, int* belongs_to, int* count) + { + cv::AutoBuffer dcenters_buf(branching*veclen_); + Matrix dcenters(dcenters_buf.data(), branching, veclen_); + + bool converged = false; + int iteration = 0; + while (!converged && iteration new_centroids(indices_length); + std::vector sq_dists(indices_length); + + // reassign points to clusters + KMeansDistanceComputer > invoker( + distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists); + parallel_for_(cv::Range(0, (int)indices_length), invoker); + + for (int i=0; i < (int)indices_length; ++i) { + DistanceType sq_dist(sq_dists[i]); + int new_centroid(new_centroids[i]); + if (sq_dist > radiuses[new_centroid]) { + radiuses[new_centroid] = sq_dist; + } + if (new_centroid != belongs_to[i]) { + count[belongs_to[i]]--; + count[new_centroid]++; + belongs_to[i] = new_centroid; + converged = false; + } + } + + for (int i=0; i& radiuses, int* belongs_to, int* count) + { + for (int i=0; i( + veclen_*sizeof(ElementType)*BITS_PER_CHAR); + cv::AutoBuffer dcenters_buf(branching*accumulator_veclen); + Matrix dcenters(dcenters_buf.data(), branching, accumulator_veclen); + + bool converged = false; + int iteration = 0; + while (!converged && iteration>1) & 0x01; + dcenter[k+2] += (vec[l]>>2) & 0x01; + dcenter[k+3] += (vec[l]>>3) & 0x01; + dcenter[k+4] += (vec[l]>>4) & 0x01; + dcenter[k+5] += (vec[l]>>5) & 0x01; + dcenter[k+6] += (vec[l]>>6) & 0x01; + dcenter[k+7] += (vec[l]>>7) & 0x01; + } + } + for (int i=0; i(count[i]); + unsigned int* dcenter = dcenters[i]; + unsigned char* charCenter = (unsigned char*)centers[i]; + for (size_t k=0, l=0; k( + (((int)(0.5 + (double)(dcenter[k]) / cnt))) + | (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1) + | (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2) + | (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3) + | (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4) + | (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5) + | (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6) + | (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7)); + } + } + + std::vector new_centroids(indices_length); + std::vector dists(indices_length); + + // reassign points to clusters + KMeansDistanceComputer invoker( + distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists); + parallel_for_(cv::Range(0, (int)indices_length), invoker); + + for (int i=0; i < indices_length; ++i) { + DistanceType dist(dists[i]); + int new_centroid(new_centroids[i]); + if (dist > radiuses[new_centroid]) { + radiuses[new_centroid] = dist; + } + if (new_centroid != belongs_to[i]) { + count[belongs_to[i]]--; + count[new_centroid]++; + belongs_to[i] = new_centroid; + converged = false; + } + } + + for (int i=0; i& radiuses, int* belongs_to, int* count) + { + // compute kmeans clustering for each of the resulting clusters + node->childs = pool_.allocate(branching); + int start = 0; + int end = start; + for (int c=0; c(), veclen_); + variance += d; + mean_radius += static_cast( sqrt(d) ); + std::swap(indices[i],indices[end]); + std::swap(belongs_to[i],belongs_to[end]); + end++; + } + } + variance /= s; + mean_radius /= s; + variance -= distance_(centers[c], ZeroIterator(), veclen_); + + node->childs[c] = pool_.allocate(); + std::memset(node->childs[c], 0, sizeof(KMeansNode)); + node->childs[c]->radius = radiuses[c]; + node->childs[c]->pivot = centers[c]; + node->childs[c]->variance = variance; + node->childs[c]->mean_radius = mean_radius; + computeClustering(node->childs[c],indices+start, end-start, branching, level+1); + start=end; + } + } + + + void computeAnyBitfieldSubClustering(KMeansNodePtr node, int* indices, int indices_length, + int branching, int level, CentersType** centers, + std::vector& radiuses, int* belongs_to, int* count) + { + // compute kmeans clustering for each of the resulting clusters + node->childs = pool_.allocate(branching); + int start = 0; + int end = start; + for (int c=0; c(), veclen_); + variance += static_cast( ensureSquareDistance(d) ); + mean_radius += ensureSimpleDistance(d); + std::swap(indices[i],indices[end]); + std::swap(belongs_to[i],belongs_to[end]); + end++; + } + } + mean_radius = static_cast( + 0.5f + static_cast(mean_radius) / static_cast(s)); + variance = static_cast( + 0.5 + static_cast(variance) / static_cast(s)); + variance -= static_cast( + ensureSquareDistance( + distance_(centers[c], ZeroIterator(), veclen_))); + + node->childs[c] = pool_.allocate(); + std::memset(node->childs[c], 0, sizeof(KMeansNode)); + node->childs[c]->radius = radiuses[c]; + node->childs[c]->pivot = centers[c]; + node->childs[c]->variance = static_cast(variance); + node->childs[c]->mean_radius = mean_radius; + computeClustering(node->childs[c],indices+start, end-start, branching, level+1); + start=end; + } + } + + + template + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const DistType* identifier) + { + (void)identifier; + refineClustering(indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + + + /** + * The methods responsible with doing the recursive hierarchical clustering on + * binary vectors. + * As some might have heared that KMeans on binary data doesn't make sense, + * it's worth a little explanation why it actually fairly works. As + * with the Hierarchical Clustering algortihm, we seed several centers for the + * current node by picking some of its points. Then in a first pass each point + * of the node is then related to its closest center. Now let's have a look at + * the 5 central dimensions of the 9 following points: + * + * xxxxxx11100xxxxx (1) + * xxxxxx11010xxxxx (2) + * xxxxxx11001xxxxx (3) + * xxxxxx10110xxxxx (4) + * xxxxxx10101xxxxx (5) + * xxxxxx10011xxxxx (6) + * xxxxxx01110xxxxx (7) + * xxxxxx01101xxxxx (8) + * xxxxxx01011xxxxx (9) + * sum _____ + * of 1: 66555 + * + * Even if the barycenter notion doesn't apply, we can set a center + * xxxxxx11111xxxxx that will better fit the five dimensions we are focusing + * on for these points. + * + * Note that convergence isn't ensured anymore. In practice, using Gonzales + * as seeding algorithm should be fine for getting convergence ("iterations" + * value can be set to -1). But with KMeans++ seeding you should definitely + * set a maximum number of iterations (but make it higher than the "iterations" + * default value of 11). + * + * Params: + * node = the node to cluster + * indices = indices of the points belonging to the current node + * indices_length = number of points in the current node + * branching = the branching factor to use in the clustering + * level = 0 for the root node, it increases with the subdivision levels + * centers = clusters centers to compute + * radiuses = radiuses of clusters + * belongs_to = LookUp Table returning, for a given indice id, the center id it belongs to + * count = array storing the number of indices for a given center id + * identifier = dummy pointer on an instance of Distance (use to branch correctly among templates) + */ + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const cvflann::HammingLUT* identifier) + { + (void)identifier; + refineBitfieldClustering( + indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeAnyBitfieldSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + + + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const cvflann::Hamming* identifier) + { + (void)identifier; + refineBitfieldClustering( + indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeAnyBitfieldSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + + + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const cvflann::Hamming2* identifier) + { + (void)identifier; + refineBitfieldClustering( + indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeAnyBitfieldSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + /** * The method responsible with actually doing the recursive hierarchical @@ -893,372 +1296,16 @@ private: count[belongs_to[i]]++; } - cv::AutoBuffer dcenters_buf(branching*veclen_); - Matrix dcenters(dcenters_buf.data(), branching, veclen_); - for (int i=0; i new_centroids(indices_length); - std::vector sq_dists(indices_length); - - // reassign points to clusters - KMeansDistanceComputer > invoker(distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists); - parallel_for_(cv::Range(0, (int)indices_length), invoker); - - for (int i=0; i < (int)indices_length; ++i) { - DistanceType sq_dist(sq_dists[i]); - int new_centroid(new_centroids[i]); - if (sq_dist > radiuses[new_centroid]) { - radiuses[new_centroid] = sq_dist; - } - if (new_centroid != belongs_to[i]) { - count[belongs_to[i]]--; - count[new_centroid]++; - belongs_to[i] = new_centroid; - converged = false; - } - } - - for (int i=0; ichilds = pool_.allocate(branching); - int start = 0; - int end = start; - for (int c=0; c(), veclen_); - variance += d; - mean_radius += static_cast( sqrt(d) ); - std::swap(indices[i],indices[end]); - std::swap(belongs_to[i],belongs_to[end]); - end++; - } - } - variance /= s; - mean_radius /= s; - variance -= distance_(centers[c], ZeroIterator(), veclen_); - - node->childs[c] = pool_.allocate(); - std::memset(node->childs[c], 0, sizeof(KMeansNode)); - node->childs[c]->radius = radiuses[c]; - node->childs[c]->pivot = centers[c]; - node->childs[c]->variance = variance; - node->childs[c]->mean_radius = mean_radius; - computeClustering(node->childs[c],indices+start, end-start, branching, level+1); - start=end; - } + Distance* dummy = NULL; + refineAndSplitClustering(node, indices, indices_length, branching, level, + centers, radiuses, belongs_to, count, dummy); delete[] centers; } - /** - * The method responsible with doing the recursive hierarchical clustering on - * binary vectors. - * As some might have heared that KMeans on binary data doesn't make sense, - * it's worth a little explanation why it actually fairly works. As - * with the Hierarchical Clustering algortihm, we seed several centers for the - * current node by picking some of its points. Then in a first pass each point - * of the node is then related to its closest center. Now let's have a look at - * the 5 central dimensions of the 9 following points: - * - * xxxxxx11100xxxxx (1) - * xxxxxx11010xxxxx (2) - * xxxxxx11001xxxxx (3) - * xxxxxx10110xxxxx (4) - * xxxxxx10101xxxxx (5) - * xxxxxx10011xxxxx (6) - * xxxxxx01110xxxxx (7) - * xxxxxx01101xxxxx (8) - * xxxxxx01011xxxxx (9) - * sum _____ - * of 1: 66555 - * - * Even if the barycenter notion doesn't apply, we can set a center - * xxxxxx11111xxxxx that will better fit the five dimensions we are focusing - * on for these points. - * - * Note that convergence isn't ensured anymore. In practice, using Gonzales - * as seeding algorithm should be fine for getting convergence ("iterations" - * value can be set to -1). But with KMeans++ seeding you should definitely - * set a maximum number of iterations (but make it higher than the "iterations" - * default value of 11). - * - * Params: - * node = the node to cluster - * indices = indices of the points belonging to the current node - * indices_length = number of points in the current node - * branching = the branching factor to use in the clustering - * level = 0 for the root node, it increases with the subdivision levels - */ - void computeBitfieldClustering(KMeansNodePtr node, int* indices, - int indices_length, int branching, int level) - { - node->size = indices_length; - node->level = level; - - if (indices_length < branching) { - node->indices = indices; - std::sort(node->indices,node->indices+indices_length); - node->childs = NULL; - return; - } - - cv::AutoBuffer centers_idx_buf(branching); - int* centers_idx = centers_idx_buf.data(); - int centers_length; - (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length); - - if (centers_lengthindices = indices; - std::sort(node->indices,node->indices+indices_length); - node->childs = NULL; - return; - } - - const unsigned int accumulator_veclen = static_cast( - veclen_*sizeof(ElementType)*BITS_PER_CHAR); - cv::AutoBuffer dcenters_buf(branching*accumulator_veclen); - Matrix dcenters(dcenters_buf.data(), branching, accumulator_veclen); - - CentersType** centers = new CentersType*[branching]; - - for (int i=0; i radiuses(branching); - cv::AutoBuffer count_buf(branching); - int* count = count_buf.data(); - for (int i=0; i belongs_to_buf(indices_length); - int* belongs_to = belongs_to_buf.data(); - for (int i=0; inew_dist) { - belongs_to[i] = j; - dist = new_dist; - } - } - if (dist>radiuses[belongs_to[i]]) { - radiuses[belongs_to[i]] = dist; - } - count[belongs_to[i]]++; - } - - bool converged = false; - int iteration = 0; - while (!converged && iteration>1) & 0x01; - dcenter[k+2] += (vec[l]>>2) & 0x01; - dcenter[k+3] += (vec[l]>>3) & 0x01; - dcenter[k+4] += (vec[l]>>4) & 0x01; - dcenter[k+5] += (vec[l]>>5) & 0x01; - dcenter[k+6] += (vec[l]>>6) & 0x01; - dcenter[k+7] += (vec[l]>>7) & 0x01; - } - } - for (int i=0; i(count[i]); - unsigned int* dcenter = dcenters[i]; - unsigned char* charCenter = (unsigned char*)centers[i]; - for (size_t k=0, l=0; k( - (((int)(0.5 + (double)(dcenter[k]) / cnt))) - | (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1) - | (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2) - | (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3) - | (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4) - | (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5) - | (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6) - | (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7)); - } - } - - std::vector new_centroids(indices_length); - std::vector dists(indices_length); - - // reassign points to clusters - KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists); - parallel_for_(cv::Range(0, (int)indices_length), invoker); - - for (int i=0; i < indices_length; ++i) { - DistanceType dist(dists[i]); - int new_centroid(new_centroids[i]); - if (dist > radiuses[new_centroid]) { - radiuses[new_centroid] = dist; - } - if (new_centroid != belongs_to[i]) { - count[belongs_to[i]]--; - count[new_centroid]++; - belongs_to[i] = new_centroid; - converged = false; - } - } - - for (int i=0; ichilds = pool_.allocate(branching); - int start = 0; - int end = start; - for (int c=0; c(), veclen_); - variance += static_cast( ensureSquareDistance(d) ); - mean_radius += ensureSimpleDistance(d); - std::swap(indices[i],indices[end]); - std::swap(belongs_to[i],belongs_to[end]); - end++; - } - } - mean_radius = static_cast( - 0.5f + static_cast(mean_radius) / static_cast(s)); - variance = static_cast( - 0.5 + static_cast(variance) / static_cast(s)); - variance -= static_cast( - ensureSquareDistance( - distance_(centers[c], ZeroIterator(), veclen_))); - - node->childs[c] = pool_.allocate(); - std::memset(node->childs[c], 0, sizeof(KMeansNode)); - node->childs[c]->radius = radiuses[c]; - node->childs[c]->pivot = centers[c]; - node->childs[c]->variance = static_cast(variance); - node->childs[c]->mean_radius = mean_radius; - computeBitfieldClustering(node->childs[c],indices+start, end-start, branching, level+1); - start=end; - } - - delete[] centers; - } - - - - /** * Performs one descent in the hierarchical k-means tree. The branches not * visited are stored in a priority queue.