Merge pull request #8081 from mshabunin:fix-kmeans-compactness

This commit is contained in:
Vadim Pisarevsky 2017-01-31 12:19:46 +00:00
commit dfb348ef0b
3 changed files with 68 additions and 28 deletions

View File

@ -165,11 +165,13 @@ public:
KMeansDistanceComputer( double *_distances,
int *_labels,
const Mat& _data,
const Mat& _centers )
const Mat& _centers,
bool _onlyDistance = false )
: distances(_distances),
labels(_labels),
data(_data),
centers(_centers)
centers(_centers),
onlyDistance(_onlyDistance)
{
}
@ -183,6 +185,12 @@ public:
for( int i = begin; i<end; ++i)
{
const float *sample = data.ptr<float>(i);
if (onlyDistance)
{
const float* center = centers.ptr<float>(labels[i]);
distances[i] = normL2Sqr(sample, center, dims);
continue;
}
int k_best = 0;
double min_dist = DBL_MAX;
@ -210,6 +218,7 @@ private:
int *labels;
const Mat& data;
const Mat& centers;
bool onlyDistance;
};
}
@ -259,6 +268,7 @@ double cv::kmeans( InputArray _data, int K,
Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
std::vector<int> counters(K);
std::vector<Vec2f> _box(dims);
Mat dists(1, N, CV_64F);
Vec2f* box = &_box[0];
double best_compactness = DBL_MAX, compactness = 0;
RNG& rng = theRNG();
@ -430,19 +440,16 @@ double cv::kmeans( InputArray _data, int K,
}
}
if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon )
break;
bool isLastIter = (++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon);
// assign labels
Mat dists(1, N, CV_64F);
dists = 0;
double* dist = dists.ptr<double>(0);
parallel_for_(Range(0, N),
KMeansDistanceComputer(dist, labels, data, centers));
compactness = 0;
for( i = 0; i < N; i++ )
{
compactness += dist[i];
}
parallel_for_(Range(0, N), KMeansDistanceComputer(dist, labels, data, centers, isLastIter));
compactness = sum(dists)[0];
if (isLastIter)
break;
}
if( compactness < best_compactness )

View File

@ -2748,21 +2748,23 @@ public:
protected:
void run(int inVariant)
{
RNG& rng = ts->get_rng();
int i, iter = 0, N = 0, N0 = 0, K = 0, dims = 0;
Mat labels;
try
{
RNG& rng = theRNG();
const int MAX_DIM=5;
int MAX_POINTS = 100, maxIter = 100;
for( iter = 0; iter < maxIter; iter++ )
{
ts->update_context(this, iter, true);
dims = rng.uniform(inVariant == MAT_1_N_CDIM ? 2 : 1, MAX_DIM+1);
N = rng.uniform(1, MAX_POINTS+1);
N = rng.uniform(2, MAX_POINTS+1);
N0 = rng.uniform(1, MAX(N/10, 2));
K = rng.uniform(1, N+1);
Mat centers;
if (inVariant == VECTOR)
{
dims = 2;
@ -2775,7 +2777,7 @@ protected:
data[i] = data0[rng.uniform(0, N0)];
kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0),
5, KMEANS_PP_CENTERS);
5, KMEANS_PP_CENTERS, centers);
}
else
{
@ -2820,28 +2822,24 @@ protected:
}
kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0),
5, KMEANS_PP_CENTERS);
5, KMEANS_PP_CENTERS, centers);
}
ASSERT_EQ(centers.rows, K);
ASSERT_EQ(labels.rows, N);
Mat hist(K, 1, CV_32S, Scalar(0));
for( i = 0; i < N; i++ )
{
int l = labels.at<int>(i);
CV_Assert(0 <= l && l < K);
ASSERT_GE(l, 0);
ASSERT_LT(l, K);
hist.at<int>(l)++;
}
for( i = 0; i < K; i++ )
CV_Assert( hist.at<int>(i) != 0 );
ASSERT_GT(hist.at<int>(i), 0);
}
}
catch(...)
{
ts->printf(cvtest::TS::LOG,
"context: iteration=%d, N=%d, N0=%d, K=%d\n",
iter, N, N0, K);
std::cout << labels << std::endl;
ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
}
}
};
@ -2859,6 +2857,35 @@ TEST_P(Core_KMeans_InputVariants, singular)
INSTANTIATE_TEST_CASE_P(AllVariants, Core_KMeans_InputVariants, KMeansInputVariant::all());
TEST(Core_KMeans, compactness)
{
const int N = 1024;
const int attempts = 4;
const TermCriteria crit = TermCriteria(TermCriteria::COUNT, 5, 0); // low number of iterations
cvtest::TS& ts = *cvtest::TS::ptr();
for (int K = 1; K <= N; K *= 2)
{
Mat data(N, 1, CV_32FC2);
cvtest::randUni(ts.get_rng(), data, Scalar(-200, -200), Scalar(200, 200));
Mat labels, centers;
double compactness = kmeans(data, K, labels, crit, attempts, KMEANS_PP_CENTERS, centers);
centers = centers.reshape(2);
EXPECT_EQ(labels.rows, N);
EXPECT_EQ(centers.rows, K);
EXPECT_GE(compactness, 0.0);
double expected = 0.0;
for (int i = 0; i < N; ++i)
{
int l = labels.at<int>(i);
Point2f d = data.at<Point2f>(i) - centers.at<Point2f>(l);
expected += d.x * d.x + d.y * d.y;
}
EXPECT_NEAR(expected, compactness, expected * 1e-8);
if (K == N)
EXPECT_DOUBLE_EQ(compactness, 0.0);
}
}
TEST(CovariationMatrixVectorOfMat, accuracy)
{
unsigned int col_problem_size = 8, row_problem_size = 8, vector_size = 16;

View File

@ -53,7 +53,7 @@ int main( int /*argc*/, char** /*argv*/ )
randShuffle(points, 1, &rng);
kmeans(points, clusterCount, labels,
double compactness = kmeans(points, clusterCount, labels,
TermCriteria( TermCriteria::EPS+TermCriteria::COUNT, 10, 1.0),
3, KMEANS_PP_CENTERS, centers);
@ -65,6 +65,12 @@ int main( int /*argc*/, char** /*argv*/ )
Point ipt = points.at<Point2f>(i);
circle( img, ipt, 2, colorTab[clusterIdx], FILLED, LINE_AA );
}
for (i = 0; i < centers.rows; ++i)
{
Point2f c = centers.at<Point2f>(i);
circle( img, c, 40, colorTab[i], 1, LINE_AA );
}
cout << "Compactness: " << compactness << endl;
imshow("clusters", img);