mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
Merge pull request #18084 from pemmanuelviel:pev--add-DNA-distances
This commit is contained in:
commit
fc0f9da7a7
@ -95,6 +95,8 @@ using ::cvflann::MaxDistance;
|
||||
using ::cvflann::HammingLUT;
|
||||
using ::cvflann::Hamming;
|
||||
using ::cvflann::Hamming2;
|
||||
using ::cvflann::DNAmmingLUT;
|
||||
using ::cvflann::DNAmming2;
|
||||
using ::cvflann::HistIntersectionDistance;
|
||||
using ::cvflann::HellingerDistance;
|
||||
using ::cvflann::ChiSquareDistance;
|
||||
@ -131,6 +133,14 @@ performed using library calls, if available. Lookup table implementation is used
|
||||
cv::flann::Hamming2 - %Hamming distance functor. Population count is
|
||||
implemented in 12 arithmetic operations (one of which is multiplication).
|
||||
|
||||
cv::flann::DNAmmingLUT - %Adaptation of the Hamming distance functor to DNA comparison.
|
||||
As the four bases A, C, G, T of the DNA (or A, G, C, U for RNA) can be coded on 2 bits,
|
||||
it counts the bits pairs differences between two sequences using a lookup table implementation.
|
||||
|
||||
cv::flann::DNAmming2 - %Adaptation of the Hamming distance functor to DNA comparison.
|
||||
Bases differences count are vectorised thanks to arithmetic operations using standard
|
||||
registers (AVX2 and AVX-512 should come in a near future).
|
||||
|
||||
cv::flann::HistIntersectionDistance - The histogram
|
||||
intersection distance functor.
|
||||
|
||||
|
@ -128,6 +128,7 @@ enum flann_distance_t
|
||||
FLANN_DIST_KULLBACK_LEIBLER = 8,
|
||||
FLANN_DIST_KL = 8,
|
||||
FLANN_DIST_HAMMING = 9,
|
||||
FLANN_DIST_DNAMMING = 10,
|
||||
|
||||
// deprecated constants, should use the FLANN_DIST_* ones instead
|
||||
EUCLIDEAN = 1,
|
||||
|
@ -748,6 +748,157 @@ private:
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct DNAmmingLUT
|
||||
{
|
||||
typedef False is_kdtree_distance;
|
||||
typedef False is_vector_space_distance;
|
||||
|
||||
typedef unsigned char ElementType;
|
||||
typedef int ResultType;
|
||||
typedef ElementType CentersType;
|
||||
|
||||
/** this will count the bits in a ^ b
|
||||
*/
|
||||
template<typename Iterator2>
|
||||
ResultType operator()(const unsigned char* a, const Iterator2 b, size_t size) const
|
||||
{
|
||||
static const uchar popCountTable[] =
|
||||
{
|
||||
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
|
||||
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
|
||||
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
|
||||
};
|
||||
ResultType result = 0;
|
||||
const unsigned char* b2 = reinterpret_cast<const unsigned char*> (b);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
result += popCountTable[a[i] ^ b2[i]];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
ResultType operator()(const unsigned char* a, const ZeroIterator<unsigned char> b, size_t size) const
|
||||
{
|
||||
(void)b;
|
||||
static const uchar popCountTable[] =
|
||||
{
|
||||
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
|
||||
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
|
||||
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
|
||||
};
|
||||
ResultType result = 0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
result += popCountTable[a[i]];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename T>
|
||||
struct DNAmming2
|
||||
{
|
||||
typedef False is_kdtree_distance;
|
||||
typedef False is_vector_space_distance;
|
||||
|
||||
typedef T ElementType;
|
||||
typedef int ResultType;
|
||||
typedef ElementType CentersType;
|
||||
|
||||
/** This is popcount_3() from:
|
||||
* http://en.wikipedia.org/wiki/Hamming_weight */
|
||||
unsigned int popcnt32(uint32_t n) const
|
||||
{
|
||||
n = ((n >> 1) | n) & 0x55555555;
|
||||
n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
|
||||
return (((n + (n >> 4))& 0x0F0F0F0F)* 0x01010101) >> 24;
|
||||
}
|
||||
|
||||
#ifdef FLANN_PLATFORM_64_BIT
|
||||
unsigned int popcnt64(uint64_t n) const
|
||||
{
|
||||
n = ((n >> 1) | n) & 0x5555555555555555;
|
||||
n = (n & 0x3333333333333333) + ((n >> 2) & 0x3333333333333333);
|
||||
return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0f)* 0x0101010101010101) >> 56;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Iterator1, typename Iterator2>
|
||||
ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
|
||||
{
|
||||
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
|
||||
|
||||
#ifdef FLANN_PLATFORM_64_BIT
|
||||
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
|
||||
const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
|
||||
ResultType result = 0;
|
||||
size /= long_word_size_;
|
||||
for(size_t i = 0; i < size; ++i ) {
|
||||
result += popcnt64(*pa ^ *pb);
|
||||
++pa;
|
||||
++pb;
|
||||
}
|
||||
#else
|
||||
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
|
||||
const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
|
||||
ResultType result = 0;
|
||||
size /= long_word_size_;
|
||||
for(size_t i = 0; i < size; ++i ) {
|
||||
result += popcnt32(*pa ^ *pb);
|
||||
++pa;
|
||||
++pb;
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
template <typename Iterator1>
|
||||
ResultType operator()(const Iterator1 a, ZeroIterator<unsigned char> b, size_t size, ResultType /*worst_dist*/ = -1) const
|
||||
{
|
||||
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
|
||||
|
||||
(void)b;
|
||||
#ifdef FLANN_PLATFORM_64_BIT
|
||||
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
|
||||
ResultType result = 0;
|
||||
size /= long_word_size_;
|
||||
for(size_t i = 0; i < size; ++i ) {
|
||||
result += popcnt64(*pa);
|
||||
++pa;
|
||||
}
|
||||
#else
|
||||
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
|
||||
ResultType result = 0;
|
||||
size /= long_word_size_;
|
||||
for(size_t i = 0; i < size; ++i ) {
|
||||
result += popcnt32(*pa);
|
||||
++pa;
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef FLANN_PLATFORM_64_BIT
|
||||
static const size_t long_word_size_= sizeof(uint64_t)/sizeof(unsigned char);
|
||||
#else
|
||||
static const size_t long_word_size_= sizeof(uint32_t)/sizeof(unsigned char);
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<class T>
|
||||
struct HistIntersectionDistance
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user