From b417b4dbee83926d627d5cbbf9472731966232dd Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 31 Jan 2017 12:05:08 +0300 Subject: [PATCH] KMeans improvement - fixed returned compactness value - added centers drawing to the example app - added compactness test --- modules/core/src/kmeans.cpp | 31 +++++++++++------- modules/core/test/test_math.cpp | 57 ++++++++++++++++++++++++--------- samples/cpp/kmeans.cpp | 8 ++++- 3 files changed, 68 insertions(+), 28 deletions(-) diff --git a/modules/core/src/kmeans.cpp b/modules/core/src/kmeans.cpp index df017adca3..584efcf334 100644 --- a/modules/core/src/kmeans.cpp +++ b/modules/core/src/kmeans.cpp @@ -165,11 +165,13 @@ public: KMeansDistanceComputer( double *_distances, int *_labels, const Mat& _data, - const Mat& _centers ) + const Mat& _centers, + bool _onlyDistance = false ) : distances(_distances), labels(_labels), data(_data), - centers(_centers) + centers(_centers), + onlyDistance(_onlyDistance) { } @@ -183,6 +185,12 @@ public: for( int i = begin; i(i); + if (onlyDistance) + { + const float* center = centers.ptr(labels[i]); + distances[i] = normL2Sqr(sample, center, dims); + continue; + } int k_best = 0; double min_dist = DBL_MAX; @@ -210,6 +218,7 @@ private: int *labels; const Mat& data; const Mat& centers; + bool onlyDistance; }; } @@ -259,6 +268,7 @@ double cv::kmeans( InputArray _data, int K, Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); std::vector counters(K); std::vector _box(dims); + Mat dists(1, N, CV_64F); Vec2f* box = &_box[0]; double best_compactness = DBL_MAX, compactness = 0; RNG& rng = theRNG(); @@ -430,19 +440,16 @@ double cv::kmeans( InputArray _data, int K, } } - if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon ) - break; + bool isLastIter = (++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon); // assign labels - Mat dists(1, N, CV_64F); + dists = 0; double* dist = dists.ptr(0); - parallel_for_(Range(0, N), - KMeansDistanceComputer(dist, labels, data, centers)); - compactness = 0; - for( i = 0; i < N; i++ ) - { - compactness += dist[i]; - } + parallel_for_(Range(0, N), KMeansDistanceComputer(dist, labels, data, centers, isLastIter)); + compactness = sum(dists)[0]; + + if (isLastIter) + break; } if( compactness < best_compactness ) diff --git a/modules/core/test/test_math.cpp b/modules/core/test/test_math.cpp index 3870d31cd5..e9fc57ed0b 100644 --- a/modules/core/test/test_math.cpp +++ b/modules/core/test/test_math.cpp @@ -2748,21 +2748,23 @@ public: protected: void run(int inVariant) { + RNG& rng = ts->get_rng(); int i, iter = 0, N = 0, N0 = 0, K = 0, dims = 0; Mat labels; - try + { - RNG& rng = theRNG(); const int MAX_DIM=5; int MAX_POINTS = 100, maxIter = 100; for( iter = 0; iter < maxIter; iter++ ) { ts->update_context(this, iter, true); dims = rng.uniform(inVariant == MAT_1_N_CDIM ? 2 : 1, MAX_DIM+1); - N = rng.uniform(1, MAX_POINTS+1); + N = rng.uniform(2, MAX_POINTS+1); N0 = rng.uniform(1, MAX(N/10, 2)); K = rng.uniform(1, N+1); + Mat centers; + if (inVariant == VECTOR) { dims = 2; @@ -2775,7 +2777,7 @@ protected: data[i] = data0[rng.uniform(0, N0)]; kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0), - 5, KMEANS_PP_CENTERS); + 5, KMEANS_PP_CENTERS, centers); } else { @@ -2820,28 +2822,24 @@ protected: } kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0), - 5, KMEANS_PP_CENTERS); + 5, KMEANS_PP_CENTERS, centers); } + ASSERT_EQ(centers.rows, K); + ASSERT_EQ(labels.rows, N); + Mat hist(K, 1, CV_32S, Scalar(0)); for( i = 0; i < N; i++ ) { int l = labels.at(i); - CV_Assert(0 <= l && l < K); + ASSERT_GE(l, 0); + ASSERT_LT(l, K); hist.at(l)++; } for( i = 0; i < K; i++ ) - CV_Assert( hist.at(i) != 0 ); + ASSERT_GT(hist.at(i), 0); } } - catch(...) - { - ts->printf(cvtest::TS::LOG, - "context: iteration=%d, N=%d, N0=%d, K=%d\n", - iter, N, N0, K); - std::cout << labels << std::endl; - ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH); - } } }; @@ -2859,6 +2857,35 @@ TEST_P(Core_KMeans_InputVariants, singular) INSTANTIATE_TEST_CASE_P(AllVariants, Core_KMeans_InputVariants, KMeansInputVariant::all()); +TEST(Core_KMeans, compactness) +{ + const int N = 1024; + const int attempts = 4; + const TermCriteria crit = TermCriteria(TermCriteria::COUNT, 5, 0); // low number of iterations + cvtest::TS& ts = *cvtest::TS::ptr(); + for (int K = 1; K <= N; K *= 2) + { + Mat data(N, 1, CV_32FC2); + cvtest::randUni(ts.get_rng(), data, Scalar(-200, -200), Scalar(200, 200)); + Mat labels, centers; + double compactness = kmeans(data, K, labels, crit, attempts, KMEANS_PP_CENTERS, centers); + centers = centers.reshape(2); + EXPECT_EQ(labels.rows, N); + EXPECT_EQ(centers.rows, K); + EXPECT_GE(compactness, 0.0); + double expected = 0.0; + for (int i = 0; i < N; ++i) + { + int l = labels.at(i); + Point2f d = data.at(i) - centers.at(l); + expected += d.x * d.x + d.y * d.y; + } + EXPECT_NEAR(expected, compactness, expected * 1e-8); + if (K == N) + EXPECT_DOUBLE_EQ(compactness, 0.0); + } +} + TEST(CovariationMatrixVectorOfMat, accuracy) { unsigned int col_problem_size = 8, row_problem_size = 8, vector_size = 16; diff --git a/samples/cpp/kmeans.cpp b/samples/cpp/kmeans.cpp index e90ec2c108..eeba97a3ff 100644 --- a/samples/cpp/kmeans.cpp +++ b/samples/cpp/kmeans.cpp @@ -53,7 +53,7 @@ int main( int /*argc*/, char** /*argv*/ ) randShuffle(points, 1, &rng); - kmeans(points, clusterCount, labels, + double compactness = kmeans(points, clusterCount, labels, TermCriteria( TermCriteria::EPS+TermCriteria::COUNT, 10, 1.0), 3, KMEANS_PP_CENTERS, centers); @@ -65,6 +65,12 @@ int main( int /*argc*/, char** /*argv*/ ) Point ipt = points.at(i); circle( img, ipt, 2, colorTab[clusterIdx], FILLED, LINE_AA ); } + for (i = 0; i < centers.rows; ++i) + { + Point2f c = centers.at(i); + circle( img, c, 40, colorTab[i], 1, LINE_AA ); + } + cout << "Compactness: " << compactness << endl; imshow("clusters", img);