Merge pull request #18084 from pemmanuelviel:pev--add-DNA-distances

This commit is contained in:
Alexander Alekhin 2020-08-20 13:26:02 +00:00
commit fc0f9da7a7
3 changed files with 162 additions and 0 deletions

View File

@ -95,6 +95,8 @@ using ::cvflann::MaxDistance;
using ::cvflann::HammingLUT;
using ::cvflann::Hamming;
using ::cvflann::Hamming2;
using ::cvflann::DNAmmingLUT;
using ::cvflann::DNAmming2;
using ::cvflann::HistIntersectionDistance;
using ::cvflann::HellingerDistance;
using ::cvflann::ChiSquareDistance;
@ -131,6 +133,14 @@ performed using library calls, if available. Lookup table implementation is used
cv::flann::Hamming2 - %Hamming distance functor. Population count is
implemented in 12 arithmetic operations (one of which is multiplication).
cv::flann::DNAmmingLUT - %Adaptation of the Hamming distance functor to DNA comparison.
As the four bases A, C, G, T of the DNA (or A, G, C, U for RNA) can be coded on 2 bits,
it counts the bits pairs differences between two sequences using a lookup table implementation.
cv::flann::DNAmming2 - %Adaptation of the Hamming distance functor to DNA comparison.
Bases differences count are vectorised thanks to arithmetic operations using standard
registers (AVX2 and AVX-512 should come in a near future).
cv::flann::HistIntersectionDistance - The histogram
intersection distance functor.

View File

@ -128,6 +128,7 @@ enum flann_distance_t
FLANN_DIST_KULLBACK_LEIBLER = 8,
FLANN_DIST_KL = 8,
FLANN_DIST_HAMMING = 9,
FLANN_DIST_DNAMMING = 10,
// deprecated constants, should use the FLANN_DIST_* ones instead
EUCLIDEAN = 1,

View File

@ -748,6 +748,157 @@ private:
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct DNAmmingLUT
{
typedef False is_kdtree_distance;
typedef False is_vector_space_distance;
typedef unsigned char ElementType;
typedef int ResultType;
typedef ElementType CentersType;
/** this will count the bits in a ^ b
*/
template<typename Iterator2>
ResultType operator()(const unsigned char* a, const Iterator2 b, size_t size) const
{
static const uchar popCountTable[] =
{
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
};
ResultType result = 0;
const unsigned char* b2 = reinterpret_cast<const unsigned char*> (b);
for (size_t i = 0; i < size; i++) {
result += popCountTable[a[i] ^ b2[i]];
}
return result;
}
ResultType operator()(const unsigned char* a, const ZeroIterator<unsigned char> b, size_t size) const
{
(void)b;
static const uchar popCountTable[] =
{
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
};
ResultType result = 0;
for (size_t i = 0; i < size; i++) {
result += popCountTable[a[i]];
}
return result;
}
};
template<typename T>
struct DNAmming2
{
typedef False is_kdtree_distance;
typedef False is_vector_space_distance;
typedef T ElementType;
typedef int ResultType;
typedef ElementType CentersType;
/** This is popcount_3() from:
* http://en.wikipedia.org/wiki/Hamming_weight */
unsigned int popcnt32(uint32_t n) const
{
n = ((n >> 1) | n) & 0x55555555;
n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
return (((n + (n >> 4))& 0x0F0F0F0F)* 0x01010101) >> 24;
}
#ifdef FLANN_PLATFORM_64_BIT
unsigned int popcnt64(uint64_t n) const
{
n = ((n >> 1) | n) & 0x5555555555555555;
n = (n & 0x3333333333333333) + ((n >> 2) & 0x3333333333333333);
return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0f)* 0x0101010101010101) >> 56;
}
#endif
template <typename Iterator1, typename Iterator2>
ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
{
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
#ifdef FLANN_PLATFORM_64_BIT
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
ResultType result = 0;
size /= long_word_size_;
for(size_t i = 0; i < size; ++i ) {
result += popcnt64(*pa ^ *pb);
++pa;
++pb;
}
#else
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
ResultType result = 0;
size /= long_word_size_;
for(size_t i = 0; i < size; ++i ) {
result += popcnt32(*pa ^ *pb);
++pa;
++pb;
}
#endif
return result;
}
template <typename Iterator1>
ResultType operator()(const Iterator1 a, ZeroIterator<unsigned char> b, size_t size, ResultType /*worst_dist*/ = -1) const
{
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
(void)b;
#ifdef FLANN_PLATFORM_64_BIT
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
ResultType result = 0;
size /= long_word_size_;
for(size_t i = 0; i < size; ++i ) {
result += popcnt64(*pa);
++pa;
}
#else
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
ResultType result = 0;
size /= long_word_size_;
for(size_t i = 0; i < size; ++i ) {
result += popcnt32(*pa);
++pa;
}
#endif
return result;
}
private:
#ifdef FLANN_PLATFORM_64_BIT
static const size_t long_word_size_= sizeof(uint64_t)/sizeof(unsigned char);
#else
static const size_t long_word_size_= sizeof(uint32_t)/sizeof(unsigned char);
#endif
};
template<class T>
struct HistIntersectionDistance
{