BFMatcher

match

radiusMatch
This commit is contained in:
Konstantin Matskevich 2013-12-27 13:04:02 +04:00
parent ee331001f5
commit ca5689e0db
8 changed files with 1947 additions and 63 deletions

View File

@ -113,6 +113,7 @@ public:
virtual Mat getMat(int idx=-1) const;
virtual UMat getUMat(int idx=-1) const;
virtual void getMatVector(std::vector<Mat>& mv) const;
virtual void getUMatVector(std::vector<UMat>& umv) const;
virtual cuda::GpuMat getGpuMat() const;
virtual ogl::Buffer getOGlBuffer() const;
void* getObj() const;
@ -134,7 +135,7 @@ public:
virtual size_t step(int i=-1) const;
bool isMat() const;
bool isUMat() const;
bool isMatVectot() const;
bool isMatVector() const;
bool isUMatVector() const;
bool isMatx();

View File

@ -110,7 +110,7 @@ inline _InputArray::~_InputArray() {}
inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; }
inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; }
inline bool _InputArray::isMatVectot() const { return kind() == _InputArray::STD_VECTOR_MAT; }
inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; }
inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; }
inline bool _InputArray::isMatx() { return kind() == _InputArray::MATX; }

View File

@ -1324,6 +1324,42 @@ void _InputArray::getMatVector(std::vector<Mat>& mv) const
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
}
void _InputArray::getUMatVector(std::vector<UMat>& umv) const
{
int k = kind();
int accessFlags = flags & ACCESS_MASK;
if( k == NONE )
{
umv.clear();
return;
}
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
size_t i, n = v.size();
umv.resize(n);
for( i = 0; i < n; i++ )
umv[i] = v[i].getUMat(accessFlags);
return;
}
if( k == STD_VECTOR_UMAT )
{
const std::vector<UMat>& v = *(const std::vector<UMat>*)obj;
size_t i, n = v.size();
umv.resize(n);
for( i = 0; i < n; i++ )
umv[i] = v[i];
return;
}
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
}
cuda::GpuMat _InputArray::getGpuMat() const
{
int k = kind();

View File

@ -998,7 +998,7 @@ public:
* Add descriptors to train descriptor collection.
* descriptors Descriptors to add. Each descriptors[i] is a descriptors set from one image.
*/
CV_WRAP virtual void add( const std::vector<Mat>& descriptors );
CV_WRAP virtual void add( InputArray descriptors );
/*
* Get train descriptors collection.
*/
@ -1034,29 +1034,29 @@ public:
* Method train() is run in this methods.
*/
// Find one best match for each query descriptor (if mask is empty).
CV_WRAP void match( const Mat& queryDescriptors, const Mat& trainDescriptors,
CV_OUT std::vector<DMatch>& matches, const Mat& mask=Mat() ) const;
CV_WRAP void match( InputArray queryDescriptors, InputArray trainDescriptors,
CV_OUT std::vector<DMatch>& matches, InputArray mask=Mat() ) const;
// Find k best matches for each query descriptor (in increasing order of distances).
// compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors.
CV_WRAP void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors,
CV_WRAP void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors,
CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
const Mat& mask=Mat(), bool compactResult=false ) const;
InputArray mask=Mat(), bool compactResult=false ) const;
// Find best matches for each query descriptor which have distance less than
// maxDistance (in increasing order of distances).
void radiusMatch( const Mat& queryDescriptors, const Mat& trainDescriptors,
void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors,
std::vector<std::vector<DMatch> >& matches, float maxDistance,
const Mat& mask=Mat(), bool compactResult=false ) const;
InputArray mask=Mat(), bool compactResult=false ) const;
/*
* Group of methods to match descriptors from one image to image set.
* See description of similar methods for matching image pair above.
*/
CV_WRAP void match( const Mat& queryDescriptors, CV_OUT std::vector<DMatch>& matches,
CV_WRAP void match( InputArray queryDescriptors, CV_OUT std::vector<DMatch>& matches,
const std::vector<Mat>& masks=std::vector<Mat>() );
CV_WRAP void knnMatch( const Mat& queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
CV_WRAP void knnMatch( InputArray queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
void radiusMatch( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
void radiusMatch( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
// Reads matcher object from a file node
@ -1101,10 +1101,10 @@ protected:
// In fact the matching is implemented only by the following two methods. These methods suppose
// that the class object has been trained already. Public match methods call these methods
// after calling train().
virtual void knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false ) = 0;
virtual void radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false ) = 0;
virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false ) = 0;
virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false ) = 0;
static bool isPossibleMatch( const Mat& mask, int queryIdx, int trainIdx );
static bool isMaskedOut( const std::vector<Mat>& masks, int queryIdx );
@ -1114,6 +1114,7 @@ protected:
// Collection of descriptors from train images.
std::vector<Mat> trainDescCollection;
std::vector<UMat> utrainDescCollection;
};
/*
@ -1137,10 +1138,16 @@ public:
AlgorithmInfo* info() const;
protected:
virtual void knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
virtual void radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false );
virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false );
bool ocl_knnMatch(InputArray query, InputArray train, std::vector< std::vector<DMatch> > &matches,
int k, int dstType, bool compactResult=false);
bool ocl_radiusMatch(InputArray query, InputArray train, std::vector< std::vector<DMatch> > &matches,
float maxDistance, int dstType, bool compactResult=false);
bool ocl_match(InputArray query, InputArray train, std::vector< std::vector<DMatch> > &matches, int dstType);
int normType;
bool crossCheck;
@ -1175,10 +1182,10 @@ protected:
const Mat& indices, const Mat& distances,
std::vector<std::vector<DMatch> >& matches );
virtual void knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
virtual void radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false );
virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false );
Ptr<flann::IndexParams> indexParams;
Ptr<flann::SearchParams> searchParams;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,789 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Nathan, liujun@multicorewareinc.com
// Peng Xiao, pengxiao@outlook.com
// Baichuan Su, baichuan@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
#define MAX_FLOAT 3.40282e+038f
#ifndef T
#define T float
#endif
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 16
#endif
#ifndef MAX_DESC_LEN
#define MAX_DESC_LEN 64
#endif
#ifndef DIST_TYPE
#define DIST_TYPE 2
#endif
// dirty fix for non-template support
#if (DIST_TYPE == 2) // L1Dist
# ifdef T_FLOAT
# define DIST(x, y) fabs((x) - (y))
typedef float value_type;
typedef float result_type;
# else
# define DIST(x, y) abs((x) - (y))
typedef int value_type;
typedef int result_type;
# endif
#define DIST_RES(x) (x)
#elif (DIST_TYPE == 4) // L2Dist
#define DIST(x, y) (((x) - (y)) * ((x) - (y)))
typedef float value_type;
typedef float result_type;
#define DIST_RES(x) sqrt(x)
#elif (DIST_TYPE == 6) // Hamming
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
inline int bit1Count(int v)
{
v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
}
#define DIST(x, y) bit1Count( (x) ^ (y) )
typedef int value_type;
typedef int result_type;
#define DIST_RES(x) (x)
#endif
inline result_type reduce_block(
__local value_type *s_query,
__local value_type *s_train,
int lidx,
int lidy
)
{
result_type result = 0;
#pragma unroll
for (int j = 0 ; j < BLOCK_SIZE ; j++)
{
result += DIST(
s_query[lidy * BLOCK_SIZE + j],
s_train[j * BLOCK_SIZE + lidx]);
}
return DIST_RES(result);
}
inline result_type reduce_block_match(
__local value_type *s_query,
__local value_type *s_train,
int lidx,
int lidy
)
{
result_type result = 0;
#pragma unroll
for (int j = 0 ; j < BLOCK_SIZE ; j++)
{
result += DIST(
s_query[lidy * BLOCK_SIZE + j],
s_train[j * BLOCK_SIZE + lidx]);
}
return (result);
}
inline result_type reduce_multi_block(
__local value_type *s_query,
__local value_type *s_train,
int block_index,
int lidx,
int lidy
)
{
result_type result = 0;
#pragma unroll
for (int j = 0 ; j < BLOCK_SIZE ; j++)
{
result += DIST(
s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j],
s_train[j * BLOCK_SIZE + lidx]);
}
return result;
}
/* 2dim launch, global size: dim0 is (query rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, dim1 is BLOCK_SIZE
local size: dim0 is BLOCK_SIZE, dim1 is BLOCK_SIZE.
*/
__kernel void BruteForceMatch_UnrollMatch(
__global T *query,
__global T *train,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int step
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
__local value_type *s_query = (__local value_type *)sharebuffer;
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
int queryIdx = groupidx * BLOCK_SIZE + lidy;
// load the query into local memory.
#pragma unroll
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
{
int loadx = lidx + i * BLOCK_SIZE;
s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
}
float myBestDistance = MAX_FLOAT;
int myBestTrainIdx = -1;
// loopUnrolledCached to find the best trainIdx and best distance.
for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; t++)
{
result_type result = 0;
#pragma unroll
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
{
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = lidx + i * BLOCK_SIZE;
s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
result = DIST_RES(result);
int trainIdx = t * BLOCK_SIZE + lidx;
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
{
myBestDistance = result;
myBestTrainIdx = trainIdx;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float*)(sharebuffer);
__local int* s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
//find BestMatch
s_distance += lidy * BLOCK_SIZE;
s_trainIdx += lidy * BLOCK_SIZE;
s_distance[lidx] = myBestDistance;
s_trainIdx[lidx] = myBestTrainIdx;
barrier(CLK_LOCAL_MEM_FENCE);
//reduce -- now all reduce implement in each threads.
#pragma unroll
for (int k = 0 ; k < BLOCK_SIZE; k++)
{
if (myBestDistance > s_distance[k])
{
myBestDistance = s_distance[k];
myBestTrainIdx = s_trainIdx[k];
}
}
if (queryIdx < query_rows && lidx == 0)
{
bestTrainIdx[queryIdx] = myBestTrainIdx;
bestDistance[queryIdx] = myBestDistance;
}
}
__kernel void BruteForceMatch_Match(
__global T *query,
__global T *train,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int step
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
const int queryIdx = groupidx * BLOCK_SIZE + lidy;
float myBestDistance = MAX_FLOAT;
int myBestTrainIdx = -1;
__local value_type *s_query = (__local value_type *)sharebuffer;
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
// loop
for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
result_type result = 0;
for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; i++)
{
const int loadx = lidx + i * BLOCK_SIZE;
//load query and train into local memory
s_query[lidy * BLOCK_SIZE + lidx] = 0;
s_train[lidx * BLOCK_SIZE + lidy] = 0;
if (loadx < query_cols)
{
s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
}
barrier(CLK_LOCAL_MEM_FENCE);
result += reduce_block_match(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
result = DIST_RES(result);
const int trainIdx = t * BLOCK_SIZE + lidx;
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
{
myBestDistance = result;
myBestTrainIdx = trainIdx;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float *)sharebuffer;
__local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
//findBestMatch
s_distance += lidy * BLOCK_SIZE;
s_trainIdx += lidy * BLOCK_SIZE;
s_distance[lidx] = myBestDistance;
s_trainIdx[lidx] = myBestTrainIdx;
barrier(CLK_LOCAL_MEM_FENCE);
//reduce -- now all reduce implement in each threads.
for (int k = 0 ; k < BLOCK_SIZE; k++)
{
if (myBestDistance > s_distance[k])
{
myBestDistance = s_distance[k];
myBestTrainIdx = s_trainIdx[k];
}
}
if (queryIdx < query_rows && lidx == 0)
{
bestTrainIdx[queryIdx] = myBestTrainIdx;
bestDistance[queryIdx] = myBestDistance;
}
}
//radius_unrollmatch
__kernel void BruteForceMatch_RadiusUnrollMatch(
__global T *query,
__global T *train,
float maxDistance,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__global int *nMatches,
__local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int bestTrainIdx_cols,
int step,
int ostep
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
const int groupidy = get_group_id(1);
const int queryIdx = groupidy * BLOCK_SIZE + lidy;
const int trainIdx = groupidx * BLOCK_SIZE + lidx;
__local value_type *s_query = (__local value_type *)sharebuffer;
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
result_type result = 0;
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; ++i)
{
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = lidx + i * BLOCK_SIZE;
s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
if (queryIdx < query_rows && trainIdx < train_rows &&
convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
{
int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
if(ind < bestTrainIdx_cols)
{
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
}
}
}
//radius_match
__kernel void BruteForceMatch_RadiusMatch(
__global T *query,
__global T *train,
float maxDistance,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__global int *nMatches,
__local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int bestTrainIdx_cols,
int step,
int ostep
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
const int groupidy = get_group_id(1);
const int queryIdx = groupidy * BLOCK_SIZE + lidy;
const int trainIdx = groupidx * BLOCK_SIZE + lidx;
__local value_type *s_query = (__local value_type *)sharebuffer;
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
result_type result = 0;
for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i)
{
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = lidx + i * BLOCK_SIZE;
s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
if (queryIdx < query_rows && trainIdx < train_rows &&
convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
{
int ind = atom_inc(nMatches + queryIdx);
if(ind < bestTrainIdx_cols)
{
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
}
}
}
__kernel void BruteForceMatch_knnUnrollMatch(
__global T *query,
__global T *train,
//__global float *mask,
__global int2 *bestTrainIdx,
__global float2 *bestDistance,
__local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int step
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
const int queryIdx = groupidx * BLOCK_SIZE + lidy;
__local value_type *s_query = (__local value_type *)sharebuffer;
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
// load the query into local memory.
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
{
int loadx = lidx + i * BLOCK_SIZE;
s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
}
float myBestDistance1 = MAX_FLOAT;
float myBestDistance2 = MAX_FLOAT;
int myBestTrainIdx1 = -1;
int myBestTrainIdx2 = -1;
//loopUnrolledCached
for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
result_type result = 0;
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
{
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = lidx + i * BLOCK_SIZE;
s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
result = DIST_RES(result);
const int trainIdx = t * BLOCK_SIZE + lidx;
if (queryIdx < query_rows && trainIdx < train_rows)
{
if (result < myBestDistance1)
{
myBestDistance2 = myBestDistance1;
myBestTrainIdx2 = myBestTrainIdx1;
myBestDistance1 = result;
myBestTrainIdx1 = trainIdx;
}
else if (result < myBestDistance2)
{
myBestDistance2 = result;
myBestTrainIdx2 = trainIdx;
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (local float *)sharebuffer;
__local int *s_trainIdx = (local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
// find BestMatch
s_distance += lidy * BLOCK_SIZE;
s_trainIdx += lidy * BLOCK_SIZE;
s_distance[lidx] = myBestDistance1;
s_trainIdx[lidx] = myBestTrainIdx1;
float bestDistance1 = MAX_FLOAT;
float bestDistance2 = MAX_FLOAT;
int bestTrainIdx1 = -1;
int bestTrainIdx2 = -1;
barrier(CLK_LOCAL_MEM_FENCE);
if (lidx == 0)
{
for (int i = 0 ; i < BLOCK_SIZE ; i++)
{
float val = s_distance[i];
if (val < bestDistance1)
{
bestDistance2 = bestDistance1;
bestTrainIdx2 = bestTrainIdx1;
bestDistance1 = val;
bestTrainIdx1 = s_trainIdx[i];
}
else if (val < bestDistance2)
{
bestDistance2 = val;
bestTrainIdx2 = s_trainIdx[i];
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
s_distance[lidx] = myBestDistance2;
s_trainIdx[lidx] = myBestTrainIdx2;
barrier(CLK_LOCAL_MEM_FENCE);
if (lidx == 0)
{
for (int i = 0 ; i < BLOCK_SIZE ; i++)
{
float val = s_distance[i];
if (val < bestDistance2)
{
bestDistance2 = val;
bestTrainIdx2 = s_trainIdx[i];
}
}
}
myBestDistance1 = bestDistance1;
myBestDistance2 = bestDistance2;
myBestTrainIdx1 = bestTrainIdx1;
myBestTrainIdx2 = bestTrainIdx2;
if (queryIdx < query_rows && lidx == 0)
{
bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
}
}
__kernel void BruteForceMatch_knnMatch(
__global T *query,
__global T *train,
//__global float *mask,
__global int2 *bestTrainIdx,
__global float2 *bestDistance,
__local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int step
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
const int queryIdx = groupidx * BLOCK_SIZE + lidy;
__local value_type *s_query = (__local value_type *)sharebuffer;
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
float myBestDistance1 = MAX_FLOAT;
float myBestDistance2 = MAX_FLOAT;
int myBestTrainIdx1 = -1;
int myBestTrainIdx2 = -1;
//loop
for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
result_type result = 0.0f;
for (int i = 0 ; i < (query_cols + BLOCK_SIZE -1) / BLOCK_SIZE ; i++)
{
const int loadx = lidx + i * BLOCK_SIZE;
//load query and train into local memory
s_query[lidy * BLOCK_SIZE + lidx] = 0;
s_train[lidx * BLOCK_SIZE + lidy] = 0;
if (loadx < query_cols)
{
s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
}
barrier(CLK_LOCAL_MEM_FENCE);
result += reduce_block_match(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
result = DIST_RES(result);
const int trainIdx = t * BLOCK_SIZE + lidx;
if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)
{
if (result < myBestDistance1)
{
myBestDistance2 = myBestDistance1;
myBestTrainIdx2 = myBestTrainIdx1;
myBestDistance1 = result;
myBestTrainIdx1 = trainIdx;
}
else if (result < myBestDistance2)
{
myBestDistance2 = result;
myBestTrainIdx2 = trainIdx;
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float *)sharebuffer;
__local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
//findBestMatch
s_distance += lidy * BLOCK_SIZE;
s_trainIdx += lidy * BLOCK_SIZE;
s_distance[lidx] = myBestDistance1;
s_trainIdx[lidx] = myBestTrainIdx1;
float bestDistance1 = MAX_FLOAT;
float bestDistance2 = MAX_FLOAT;
int bestTrainIdx1 = -1;
int bestTrainIdx2 = -1;
barrier(CLK_LOCAL_MEM_FENCE);
if (lidx == 0)
{
for (int i = 0 ; i < BLOCK_SIZE ; i++)
{
float val = s_distance[i];
if (val < bestDistance1)
{
bestDistance2 = bestDistance1;
bestTrainIdx2 = bestTrainIdx1;
bestDistance1 = val;
bestTrainIdx1 = s_trainIdx[i];
}
else if (val < bestDistance2)
{
bestDistance2 = val;
bestTrainIdx2 = s_trainIdx[i];
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
s_distance[lidx] = myBestDistance2;
s_trainIdx[lidx] = myBestTrainIdx2;
barrier(CLK_LOCAL_MEM_FENCE);
if (lidx == 0)
{
for (int i = 0 ; i < BLOCK_SIZE ; i++)
{
float val = s_distance[i];
if (val < bestDistance2)
{
bestDistance2 = val;
bestTrainIdx2 = s_trainIdx[i];
}
}
}
myBestDistance1 = bestDistance1;
myBestDistance2 = bestDistance2;
myBestTrainIdx1 = bestTrainIdx1;
myBestTrainIdx2 = bestTrainIdx2;
if (queryIdx < query_rows && lidx == 0)
{
bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
}
}
kernel void BruteForceMatch_calcDistanceUnrolled(
__global T *query,
__global T *train,
//__global float *mask,
__global float *allDist,
__local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int step)
{
/* Todo */
}
kernel void BruteForceMatch_calcDistance(
__global T *query,
__global T *train,
//__global float *mask,
__global float *allDist,
__local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int step)
{
/* Todo */
}
kernel void BruteForceMatch_findBestMatch(
__global float *allDist,
__global int *bestTrainIdx,
__global float *bestDistance,
int k
)
{
/* Todo */
}

View File

@ -48,6 +48,7 @@
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#include <algorithm>

View File

@ -0,0 +1,213 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Zero Lin, Zero.Lin@amd.com
// Zhang Ying, zhangying913@gmail.com
// Yao Wang, bitwangyaoyao@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "cvconfig.h"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
PARAM_TEST_CASE(BruteForceMatcher, int, int)
{
int distType;
int dim;
int queryDescCount;
int countFactor;
Mat query, train;
UMat uquery, utrain;
virtual void SetUp()
{
distType = GET_PARAM(0);
dim = GET_PARAM(1);
queryDescCount = 300; // must be even number because we split train data in some cases in two
countFactor = 4; // do not change it
cv::Mat queryBuf, trainBuf;
// Generate query descriptors randomly.
// Descriptor vector elements are integer values.
queryBuf.create(queryDescCount, dim, CV_32SC1);
rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
queryBuf.convertTo(queryBuf, CV_32FC1);
// Generate train decriptors as follows:
// copy each query descriptor to train set countFactor times
// and perturb some one element of the copied descriptors in
// in ascending order. General boundaries of the perturbation
// are (0.f, 1.f).
trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
float step = 1.f / countFactor;
for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
{
cv::Mat queryDescriptor = queryBuf.row(qIdx);
for (int c = 0; c < countFactor; c++)
{
int tIdx = qIdx * countFactor + c;
cv::Mat trainDescriptor = trainBuf.row(tIdx);
queryDescriptor.copyTo(trainDescriptor);
int elem = rng(dim);
float diff = rng.uniform(step * c, step * (c + 1));
trainDescriptor.at<float>(0, elem) += diff;
}
}
queryBuf.convertTo(query, CV_32F);
trainBuf.convertTo(train, CV_32F);
query.copyTo(uquery);
train.copyTo(utrain);
}
};
#ifdef ANDROID
OCL_TEST_P(BruteForceMatcher, DISABLED_Match_Single)
#else
OCL_TEST_P(BruteForceMatcher, Match_Single)
#endif
{
BFMatcher matcher(distType);
std::vector<cv::DMatch> matches;
matcher.match(uquery, utrain, matches);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
int badCount = 0;
for (size_t i = 0; i < matches.size(); i++)
{
cv::DMatch match = matches[i];
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
badCount++;
}
ASSERT_EQ(0, badCount);
}
#ifdef ANDROID
OCL_TEST_P(BruteForceMatcher, DISABLED_KnnMatch_2_Single)
#else
OCL_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
#endif
{
const int knn = 2;
BFMatcher matcher(distType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(uquery, utrain, matches, knn);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
int badCount = 0;
for (size_t i = 0; i < matches.size(); i++)
{
if ((int)matches[i].size() != knn)
badCount++;
else
{
int localBadCount = 0;
for (int k = 0; k < knn; k++)
{
cv::DMatch match = matches[i][k];
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
localBadCount++;
}
badCount += localBadCount > 0 ? 1 : 0;
}
}
ASSERT_EQ(0, badCount);
}
#ifdef ANDROID
OCL_TEST_P(BruteForceMatcher, DISABLED_RadiusMatch_Single)
#else
OCL_TEST_P(BruteForceMatcher, RadiusMatch_Single)
#endif
{
float radius = 1.f / countFactor;
BFMatcher matcher(distType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(uquery, utrain, matches, radius);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
int badCount = 0;
for (size_t i = 0; i < matches.size(); i++)
{
if ((int)matches[i].size() != 1)
{
badCount++;
}
else
{
cv::DMatch match = matches[i][0];
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
badCount++;
}
}
ASSERT_EQ(0, badCount);
}
OCL_INSTANTIATE_TEST_CASE_P(Matcher, BruteForceMatcher, Combine( Values((int)NORM_L1, (int)NORM_L2),
Values(57, 64, 83, 128, 179, 256, 304) ) );
}//ocl
}//cvtest
#endif //HAVE_OPENCL