diff --git a/modules/core/perf/perf_math.cpp b/modules/core/perf/perf_math.cpp index eb3fbb0b24..944db96a47 100644 --- a/modules/core/perf/perf_math.cpp +++ b/modules/core/perf/perf_math.cpp @@ -3,14 +3,11 @@ using namespace std; using namespace cv; using namespace perf; -using std::tr1::make_tuple; -using std::tr1::get; + +namespace { typedef perf::TestBaseWithParam VectorLength; -typedef std::tr1::tuple MaxDim_MaxPoints_t; -typedef perf::TestBaseWithParam MaxDim_MaxPoints; - PERF_TEST_P(VectorLength, phase32f, testing::Values(128, 1000, 128*1024, 512*1024, 1024*1024)) { size_t length = GetParam(); @@ -39,47 +36,125 @@ PERF_TEST_P(VectorLength, phase64f, testing::Values(128, 1000, 128*1024, 512*102 SANITY_CHECK(angle, 5e-5); } -PERF_TEST_P( MaxDim_MaxPoints, kmeans, - testing::Combine( testing::Values( 16, 32, 64 ), - testing::Values( 300, 400, 500) ) ) +typedef perf::TestBaseWithParam< testing::tuple > KMeans; + +PERF_TEST_P_(KMeans, single_iter) { RNG& rng = theRNG(); - const int MAX_DIM = get<0>(GetParam()); - const int MAX_POINTS = get<1>(GetParam()); + const int K = testing::get<0>(GetParam()); + const int dims = testing::get<1>(GetParam()); + const int N = testing::get<2>(GetParam()); const int attempts = 5; - Mat labels, centers; - int i, N = 0, N0 = 0, K = 0, dims = 0; - dims = rng.uniform(1, MAX_DIM+1); - N = rng.uniform(1, MAX_POINTS+1); - N0 = rng.uniform(1, MAX(N/10, 2)); - K = rng.uniform(1, N+1); + Mat data(N, dims, CV_32F); + rng.fill(data, RNG::UNIFORM, -0.1, 0.1); + const int N0 = K; Mat data0(N0, dims, CV_32F); rng.fill(data0, RNG::UNIFORM, -1, 1); - Mat data(N, dims, CV_32F); - for( i = 0; i < N; i++ ) - data0.row(rng.uniform(0, N0)).copyTo(data.row(i)); + for (int i = 0; i < N; i++) + { + int base = rng.uniform(0, N0); + cv::add(data0.row(base), data.row(i), data.row(i)); + } declare.in(data); + Mat labels, centers; + + TEST_CYCLE() + { + kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 1, 0), + attempts, KMEANS_PP_CENTERS, centers); + } + + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P_(KMeans, good) +{ + RNG& rng = theRNG(); + const int K = testing::get<0>(GetParam()); + const int dims = testing::get<1>(GetParam()); + const int N = testing::get<2>(GetParam()); + const int attempts = 5; + + Mat data(N, dims, CV_32F); + rng.fill(data, RNG::UNIFORM, -0.1, 0.1); + + const int N0 = K; + Mat data0(N0, dims, CV_32F); + rng.fill(data0, RNG::UNIFORM, -1, 1); + + for (int i = 0; i < N; i++) + { + int base = rng.uniform(0, N0); + cv::add(data0.row(base), data.row(i), data.row(i)); + } + + declare.in(data); + + Mat labels, centers; + TEST_CYCLE() { kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0), attempts, KMEANS_PP_CENTERS, centers); } - Mat clusterPointsNumber = Mat::zeros(1, K, CV_32S); + SANITY_CHECK_NOTHING(); +} - for( i = 0; i < labels.rows; i++ ) +PERF_TEST_P_(KMeans, with_duplicates) +{ + RNG& rng = theRNG(); + const int K = testing::get<0>(GetParam()); + const int dims = testing::get<1>(GetParam()); + const int N = testing::get<2>(GetParam()); + const int attempts = 5; + + Mat data(N, dims, CV_32F, Scalar::all(0)); + + const int N0 = std::max(2, K * 2 / 3); + Mat data0(N0, dims, CV_32F); + rng.fill(data0, RNG::UNIFORM, -1, 1); + + for (int i = 0; i < N; i++) { - int clusterIdx = labels.at(i); - clusterPointsNumber.at(clusterIdx)++; + int base = rng.uniform(0, N0); + data0.row(base).copyTo(data.row(i)); } - Mat sortedClusterPointsNumber; - cv::sort(clusterPointsNumber, sortedClusterPointsNumber, cv::SORT_EVERY_ROW + cv::SORT_ASCENDING); + declare.in(data); + + Mat labels, centers; + + TEST_CYCLE() + { + kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0), + attempts, KMEANS_PP_CENTERS, centers); + } + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(/*nothing*/ , KMeans, + testing::Values( + // K clusters, dims, N points + testing::make_tuple(2, 3, 100000), + testing::make_tuple(4, 3, 500), + testing::make_tuple(4, 3, 1000), + testing::make_tuple(4, 3, 10000), + testing::make_tuple(8, 3, 1000), + testing::make_tuple(8, 16, 1000), + testing::make_tuple(8, 64, 1000), + testing::make_tuple(16, 16, 1000), + testing::make_tuple(16, 32, 1000), + testing::make_tuple(32, 16, 1000), + testing::make_tuple(32, 32, 1000), + testing::make_tuple(100, 2, 1000) + ) +); - SANITY_CHECK(sortedClusterPointsNumber); } diff --git a/modules/core/src/kmeans.cpp b/modules/core/src/kmeans.cpp index 495ff5e3c1..cbf9e47b87 100644 --- a/modules/core/src/kmeans.cpp +++ b/modules/core/src/kmeans.cpp @@ -10,7 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2000-2018, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. @@ -42,105 +42,100 @@ //M*/ #include "precomp.hpp" +#include ////////////////////////////////////////// kmeans //////////////////////////////////////////// namespace cv { -static void generateRandomCenter(const std::vector& box, float* center, RNG& rng) +static int CV_KMEANS_PARALLEL_GRANULARITY = (int)utils::getConfigurationParameterSizeT("OPENCV_KMEANS_PARALLEL_GRANULARITY", 1000); + +static void generateRandomCenter(int dims, const Vec2f* box, float* center, RNG& rng) { - size_t j, dims = box.size(); float margin = 1.f/dims; - for( j = 0; j < dims; j++ ) + for (int j = 0; j < dims; j++) center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0]; } class KMeansPPDistanceComputer : public ParallelLoopBody { public: - KMeansPPDistanceComputer( float *_tdist2, - const float *_data, - const float *_dist, - int _dims, - size_t _step, - size_t _stepci ) - : tdist2(_tdist2), - data(_data), - dist(_dist), - dims(_dims), - step(_step), - stepci(_stepci) { } + KMeansPPDistanceComputer(float *tdist2_, const Mat& data_, const float *dist_, int ci_) : + tdist2(tdist2_), data(data_), dist(dist_), ci(ci_) + { } void operator()( const cv::Range& range ) const { CV_TRACE_FUNCTION(); const int begin = range.start; const int end = range.end; + const int dims = data.cols; - for ( int i = begin; i(i), data.ptr(ci), dims), dist[i]); } } private: - KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC + KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // = delete float *tdist2; - const float *data; + const Mat& data; const float *dist; - const int dims; - const size_t step; - const size_t stepci; + const int ci; }; /* k-means center initialization using the following algorithm: Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding */ -static void generateCentersPP(const Mat& _data, Mat& _out_centers, +static void generateCentersPP(const Mat& data, Mat& _out_centers, int K, RNG& rng, int trials) { CV_TRACE_FUNCTION(); - int i, j, k, dims = _data.cols, N = _data.rows; - const float* data = _data.ptr(0); - size_t step = _data.step/sizeof(data[0]); - std::vector _centers(K); + const int dims = data.cols, N = data.rows; + cv::AutoBuffer _centers(K); int* centers = &_centers[0]; - std::vector _dist(N*3); + cv::AutoBuffer _dist(N*3); float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N; double sum0 = 0; centers[0] = (unsigned)rng % N; - for( i = 0; i < N; i++ ) + for (int i = 0; i < N; i++) { - dist[i] = normL2Sqr(data + step*i, data + step*centers[0], dims); + dist[i] = normL2Sqr(data.ptr(i), data.ptr(centers[0]), dims); sum0 += dist[i]; } - for( k = 1; k < K; k++ ) + for (int k = 1; k < K; k++) { double bestSum = DBL_MAX; int bestCenter = -1; - for( j = 0; j < trials; j++ ) + for (int j = 0; j < trials; j++) { - double p = (double)rng*sum0, s = 0; - for( i = 0; i < N-1; i++ ) - if( (p -= dist[i]) <= 0 ) + double p = (double)rng*sum0; + int ci = 0; + for (; ci < N - 1; ci++) + { + p -= dist[ci]; + if (p <= 0) break; - int ci = i; + } parallel_for_(Range(0, N), - KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci)); - for( i = 0; i < N; i++ ) + KMeansPPDistanceComputer(tdist2, data, dist, ci), + divUp(dims * N, CV_KMEANS_PARALLEL_GRANULARITY)); + double s = 0; + for (int i = 0; i < N; i++) { s += tdist2[i]; } - if( s < bestSum ) + if (s < bestSum) { bestSum = s; bestCenter = ci; @@ -152,39 +147,39 @@ static void generateCentersPP(const Mat& _data, Mat& _out_centers, std::swap(dist, tdist); } - for( k = 0; k < K; k++ ) + for (int k = 0; k < K; k++) { - const float* src = data + step*centers[k]; + const float* src = data.ptr(centers[k]); float* dst = _out_centers.ptr(k); - for( j = 0; j < dims; j++ ) + for (int j = 0; j < dims; j++) dst[j] = src[j]; } } +template class KMeansDistanceComputer : public ParallelLoopBody { public: - KMeansDistanceComputer( double *_distances, - int *_labels, - const Mat& _data, - const Mat& _centers, - bool _onlyDistance = false ) - : distances(_distances), - labels(_labels), - data(_data), - centers(_centers), - onlyDistance(_onlyDistance) + KMeansDistanceComputer( double *distances_, + int *labels_, + const Mat& data_, + const Mat& centers_) + : distances(distances_), + labels(labels_), + data(data_), + centers(centers_) { } void operator()( const Range& range ) const { + CV_TRACE_FUNCTION(); const int begin = range.start; const int end = range.end; const int K = centers.rows; const int dims = centers.cols; - for( int i = begin; i(i); if (onlyDistance) @@ -193,34 +188,36 @@ public: distances[i] = normL2Sqr(sample, center, dims); continue; } - int k_best = 0; - double min_dist = DBL_MAX; - - for( int k = 0; k < K; k++ ) + else { - const float* center = centers.ptr(k); - const double dist = normL2Sqr(sample, center, dims); + int k_best = 0; + double min_dist = DBL_MAX; - if( min_dist > dist ) + for (int k = 0; k < K; k++) { - min_dist = dist; - k_best = k; - } - } + const float* center = centers.ptr(k); + const double dist = normL2Sqr(sample, center, dims); - distances[i] = min_dist; - labels[i] = k_best; + if (min_dist > dist) + { + min_dist = dist; + k_best = k; + } + } + + distances[i] = min_dist; + labels[i] = k_best; + } } } private: - KMeansDistanceComputer& operator=(const KMeansDistanceComputer&); // to quiet MSVC + KMeansDistanceComputer& operator=(const KMeansDistanceComputer&); // = delete double *distances; int *labels; const Mat& data; const Mat& centers; - bool onlyDistance; }; } @@ -231,13 +228,12 @@ double cv::kmeans( InputArray _data, int K, int flags, OutputArray _centers ) { CV_INSTRUMENT_REGION() - const int SPP_TRIALS = 3; Mat data0 = _data.getMat(); - bool isrow = data0.rows == 1; - int N = isrow ? data0.cols : data0.rows; - int dims = (isrow ? 1 : data0.cols)*data0.channels(); - int type = data0.depth(); + const bool isrow = data0.rows == 1; + const int N = isrow ? data0.cols : data0.rows; + const int dims = (isrow ? 1 : data0.cols)*data0.channels(); + const int type = data0.depth(); attempts = std::max(attempts, 1); CV_Assert( data0.dims <= 2 && type == CV_32F && K > 0 ); @@ -248,129 +244,115 @@ double cv::kmeans( InputArray _data, int K, _bestLabels.create(N, 1, CV_32S, -1, true); Mat _labels, best_labels = _bestLabels.getMat(); - if( flags & CV_KMEANS_USE_INITIAL_LABELS ) + if (flags & CV_KMEANS_USE_INITIAL_LABELS) { CV_Assert( (best_labels.cols == 1 || best_labels.rows == 1) && best_labels.cols*best_labels.rows == N && best_labels.type() == CV_32S && best_labels.isContinuous()); - best_labels.copyTo(_labels); + best_labels.reshape(1, N).copyTo(_labels); + for (int i = 0; i < N; i++) + { + CV_Assert((unsigned)_labels.at(i) < (unsigned)K); + } } else { - if( !((best_labels.cols == 1 || best_labels.rows == 1) && + if (!((best_labels.cols == 1 || best_labels.rows == 1) && best_labels.cols*best_labels.rows == N && - best_labels.type() == CV_32S && - best_labels.isContinuous())) - best_labels.create(N, 1, CV_32S); + best_labels.type() == CV_32S && + best_labels.isContinuous())) + { + _bestLabels.create(N, 1, CV_32S); + best_labels = _bestLabels.getMat(); + } _labels.create(best_labels.size(), best_labels.type()); } int* labels = _labels.ptr(); Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); - std::vector counters(K); - std::vector _box(dims); - Mat dists(1, N, CV_64F); - Vec2f* box = &_box[0]; - double best_compactness = DBL_MAX, compactness = 0; + cv::AutoBuffer counters(K); + cv::AutoBuffer dists(N); RNG& rng = theRNG(); - int a, iter, i, j, k; - if( criteria.type & TermCriteria::EPS ) + if (criteria.type & TermCriteria::EPS) criteria.epsilon = std::max(criteria.epsilon, 0.); else criteria.epsilon = FLT_EPSILON; criteria.epsilon *= criteria.epsilon; - if( criteria.type & TermCriteria::COUNT ) + if (criteria.type & TermCriteria::COUNT) criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100); else criteria.maxCount = 100; - if( K == 1 ) + if (K == 1) { attempts = 1; criteria.maxCount = 2; } - const float* sample = data.ptr(0); - for( j = 0; j < dims; j++ ) - box[j] = Vec2f(sample[j], sample[j]); - - for( i = 1; i < N; i++ ) + cv::AutoBuffer box(dims); + if (!(flags & KMEANS_PP_CENTERS)) { - sample = data.ptr(i); - for( j = 0; j < dims; j++ ) { - float v = sample[j]; - box[j][0] = std::min(box[j][0], v); - box[j][1] = std::max(box[j][1], v); + const float* sample = data.ptr(0); + for (int j = 0; j < dims; j++) + box[j] = Vec2f(sample[j], sample[j]); + } + for (int i = 1; i < N; i++) + { + const float* sample = data.ptr(i); + for (int j = 0; j < dims; j++) + { + float v = sample[j]; + box[j][0] = std::min(box[j][0], v); + box[j][1] = std::max(box[j][1], v); + } } } - for( a = 0; a < attempts; a++ ) + double best_compactness = DBL_MAX; + for (int a = 0; a < attempts; a++) { - double max_center_shift = DBL_MAX; - for( iter = 0;; ) + double compactness = 0; + + for (int iter = 0; ;) { + double max_center_shift = iter == 0 ? DBL_MAX : 0.0; + swap(centers, old_centers); - if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) ) + if (iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS))) { - if( flags & KMEANS_PP_CENTERS ) + if (flags & KMEANS_PP_CENTERS) generateCentersPP(data, centers, K, rng, SPP_TRIALS); else { - for( k = 0; k < K; k++ ) - generateRandomCenter(_box, centers.ptr(k), rng); + for (int k = 0; k < K; k++) + generateRandomCenter(dims, box, centers.ptr(k), rng); } } else { - if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) ) - { - for( i = 0; i < N; i++ ) - CV_Assert( (unsigned)labels[i] < (unsigned)K ); - } - // compute centers centers = Scalar(0); - for( k = 0; k < K; k++ ) + for (int k = 0; k < K; k++) counters[k] = 0; - for( i = 0; i < N; i++ ) + for (int i = 0; i < N; i++) { - sample = data.ptr(i); - k = labels[i]; + const float* sample = data.ptr(i); + int k = labels[i]; float* center = centers.ptr(k); - j=0; - #if CV_ENABLE_UNROLLED - for(; j <= dims - 4; j += 4 ) - { - float t0 = center[j] + sample[j]; - float t1 = center[j+1] + sample[j+1]; - - center[j] = t0; - center[j+1] = t1; - - t0 = center[j+2] + sample[j+2]; - t1 = center[j+3] + sample[j+3]; - - center[j+2] = t0; - center[j+3] = t1; - } - #endif - for( ; j < dims; j++ ) + for (int j = 0; j < dims; j++) center[j] += sample[j]; counters[k]++; } - if( iter > 0 ) - max_center_shift = 0; - - for( k = 0; k < K; k++ ) + for (int k = 0; k < K; k++) { - if( counters[k] != 0 ) + if (counters[k] != 0) continue; // if some cluster appeared to be empty then: @@ -378,29 +360,28 @@ double cv::kmeans( InputArray _data, int K, // 2. find the farthest from the center point in the biggest cluster // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster. int max_k = 0; - for( int k1 = 1; k1 < K; k1++ ) + for (int k1 = 1; k1 < K; k1++) { - if( counters[max_k] < counters[k1] ) + if (counters[max_k] < counters[k1]) max_k = k1; } double max_dist = 0; int farthest_i = -1; - float* new_center = centers.ptr(k); - float* old_center = centers.ptr(max_k); - float* _old_center = temp.ptr(); // normalized + float* base_center = centers.ptr(max_k); + float* _base_center = temp.ptr(); // normalized float scale = 1.f/counters[max_k]; - for( j = 0; j < dims; j++ ) - _old_center[j] = old_center[j]*scale; + for (int j = 0; j < dims; j++) + _base_center[j] = base_center[j]*scale; - for( i = 0; i < N; i++ ) + for (int i = 0; i < N; i++) { - if( labels[i] != max_k ) + if (labels[i] != max_k) continue; - sample = data.ptr(i); - double dist = normL2Sqr(sample, _old_center, dims); + const float* sample = data.ptr(i); + double dist = normL2Sqr(sample, _base_center, dims); - if( max_dist <= dist ) + if (max_dist <= dist) { max_dist = dist; farthest_i = i; @@ -410,29 +391,30 @@ double cv::kmeans( InputArray _data, int K, counters[max_k]--; counters[k]++; labels[farthest_i] = k; - sample = data.ptr(farthest_i); - for( j = 0; j < dims; j++ ) + const float* sample = data.ptr(farthest_i); + float* cur_center = centers.ptr(k); + for (int j = 0; j < dims; j++) { - old_center[j] -= sample[j]; - new_center[j] += sample[j]; + base_center[j] -= sample[j]; + cur_center[j] += sample[j]; } } - for( k = 0; k < K; k++ ) + for (int k = 0; k < K; k++) { float* center = centers.ptr(k); CV_Assert( counters[k] != 0 ); float scale = 1.f/counters[k]; - for( j = 0; j < dims; j++ ) + for (int j = 0; j < dims; j++) center[j] *= scale; - if( iter > 0 ) + if (iter > 0) { double dist = 0; const float* old_center = old_centers.ptr(k); - for( j = 0; j < dims; j++ ) + for (int j = 0; j < dims; j++) { double t = center[j] - old_center[j]; dist += t*t; @@ -444,25 +426,29 @@ double cv::kmeans( InputArray _data, int K, bool isLastIter = (++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon); - // assign labels - dists = 0; - double* dist = dists.ptr(0); - parallel_for_(Range(0, N), KMeansDistanceComputer(dist, labels, data, centers, isLastIter)); - compactness = sum(dists)[0]; - if (isLastIter) + { + // don't re-assign labels to avoid creation of empty clusters + parallel_for_(Range(0, N), KMeansDistanceComputer(dists, labels, data, centers), divUp(dims * N, CV_KMEANS_PARALLEL_GRANULARITY)); + compactness = sum(Mat(Size(N, 1), CV_64F, &dists[0]))[0]; break; + } + else + { + // assign labels + parallel_for_(Range(0, N), KMeansDistanceComputer(dists, labels, data, centers), divUp(dims * N * K, CV_KMEANS_PARALLEL_GRANULARITY)); + } } - if( compactness < best_compactness ) + if (compactness < best_compactness) { best_compactness = compactness; - if( _centers.needed() ) + if (_centers.needed()) { - Mat reshaped = centers; - if(_centers.fixedType() && _centers.channels() == dims) - reshaped = centers.reshape(dims); - reshaped.copyTo(_centers); + if (_centers.fixedType() && _centers.channels() == dims) + centers.reshape(dims).copyTo(_centers); + else + centers.copyTo(_centers); } _labels.copyTo(best_labels); }