mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
Merge pull request #2142 from KonstantinMatskevich:ocl_tapi_bfmatcher
This commit is contained in:
commit
5424c55565
@ -113,6 +113,7 @@ public:
|
||||
virtual Mat getMat(int idx=-1) const;
|
||||
virtual UMat getUMat(int idx=-1) const;
|
||||
virtual void getMatVector(std::vector<Mat>& mv) const;
|
||||
virtual void getUMatVector(std::vector<UMat>& umv) const;
|
||||
virtual cuda::GpuMat getGpuMat() const;
|
||||
virtual ogl::Buffer getOGlBuffer() const;
|
||||
void* getObj() const;
|
||||
@ -134,7 +135,7 @@ public:
|
||||
virtual size_t step(int i=-1) const;
|
||||
bool isMat() const;
|
||||
bool isUMat() const;
|
||||
bool isMatVectot() const;
|
||||
bool isMatVector() const;
|
||||
bool isUMatVector() const;
|
||||
bool isMatx();
|
||||
|
||||
|
@ -110,7 +110,7 @@ inline _InputArray::~_InputArray() {}
|
||||
|
||||
inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; }
|
||||
inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; }
|
||||
inline bool _InputArray::isMatVectot() const { return kind() == _InputArray::STD_VECTOR_MAT; }
|
||||
inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; }
|
||||
inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; }
|
||||
inline bool _InputArray::isMatx() { return kind() == _InputArray::MATX; }
|
||||
|
||||
|
@ -1324,6 +1324,42 @@ void _InputArray::getMatVector(std::vector<Mat>& mv) const
|
||||
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
|
||||
}
|
||||
|
||||
void _InputArray::getUMatVector(std::vector<UMat>& umv) const
|
||||
{
|
||||
int k = kind();
|
||||
int accessFlags = flags & ACCESS_MASK;
|
||||
|
||||
if( k == NONE )
|
||||
{
|
||||
umv.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR_MAT )
|
||||
{
|
||||
const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
|
||||
size_t i, n = v.size();
|
||||
umv.resize(n);
|
||||
|
||||
for( i = 0; i < n; i++ )
|
||||
umv[i] = v[i].getUMat(accessFlags);
|
||||
return;
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR_UMAT )
|
||||
{
|
||||
const std::vector<UMat>& v = *(const std::vector<UMat>*)obj;
|
||||
size_t i, n = v.size();
|
||||
umv.resize(n);
|
||||
|
||||
for( i = 0; i < n; i++ )
|
||||
umv[i] = v[i];
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
|
||||
}
|
||||
|
||||
cuda::GpuMat _InputArray::getGpuMat() const
|
||||
{
|
||||
int k = kind();
|
||||
|
@ -28,7 +28,7 @@ with an image set. ::
|
||||
public:
|
||||
virtual ~DescriptorMatcher();
|
||||
|
||||
virtual void add( const vector<Mat>& descriptors );
|
||||
virtual void add( InputArray descriptors );
|
||||
|
||||
const vector<Mat>& getTrainDescriptors() const;
|
||||
virtual void clear();
|
||||
@ -40,23 +40,23 @@ with an image set. ::
|
||||
/*
|
||||
* Group of methods to match descriptors from an image pair.
|
||||
*/
|
||||
void match( const Mat& queryDescriptors, const Mat& trainDescriptors,
|
||||
vector<DMatch>& matches, const Mat& mask=Mat() ) const;
|
||||
void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors,
|
||||
void match( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
vector<DMatch>& matches, InputArray mask=Mat() ) const;
|
||||
void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
vector<vector<DMatch> >& matches, int k,
|
||||
const Mat& mask=Mat(), bool compactResult=false ) const;
|
||||
void radiusMatch( const Mat& queryDescriptors, const Mat& trainDescriptors,
|
||||
InputArray mask=Mat(), bool compactResult=false ) const;
|
||||
void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
vector<vector<DMatch> >& matches, float maxDistance,
|
||||
const Mat& mask=Mat(), bool compactResult=false ) const;
|
||||
InputArray mask=Mat(), bool compactResult=false ) const;
|
||||
/*
|
||||
* Group of methods to match descriptors from one image to an image set.
|
||||
*/
|
||||
void match( const Mat& queryDescriptors, vector<DMatch>& matches,
|
||||
void match( InputArray queryDescriptors, vector<DMatch>& matches,
|
||||
const vector<Mat>& masks=vector<Mat>() );
|
||||
void knnMatch( const Mat& queryDescriptors, vector<vector<DMatch> >& matches,
|
||||
void knnMatch( InputArray queryDescriptors, vector<vector<DMatch> >& matches,
|
||||
int k, const vector<Mat>& masks=vector<Mat>(),
|
||||
bool compactResult=false );
|
||||
void radiusMatch( const Mat& queryDescriptors, vector<vector<DMatch> >& matches,
|
||||
void radiusMatch( InputArray queryDescriptors, vector<vector<DMatch> >& matches,
|
||||
float maxDistance, const vector<Mat>& masks=vector<Mat>(),
|
||||
bool compactResult=false );
|
||||
|
||||
@ -69,15 +69,16 @@ with an image set. ::
|
||||
|
||||
protected:
|
||||
vector<Mat> trainDescCollection;
|
||||
vector<UMat> utrainDescCollection;
|
||||
...
|
||||
};
|
||||
|
||||
|
||||
DescriptorMatcher::add
|
||||
--------------------------
|
||||
Adds descriptors to train a descriptor collection. If the collection ``trainDescCollectionis`` is not empty, the new descriptors are added to existing train descriptors.
|
||||
Adds descriptors to train a CPU(``trainDescCollectionis``) or GPU(``utrainDescCollectionis``) descriptor collection. If the collection is not empty, the new descriptors are added to existing train descriptors.
|
||||
|
||||
.. ocv:function:: void DescriptorMatcher::add( const vector<Mat>& descriptors )
|
||||
.. ocv:function:: void DescriptorMatcher::add( InputArrayOfArrays descriptors )
|
||||
|
||||
:param descriptors: Descriptors to add. Each ``descriptors[i]`` is a set of descriptors from the same train image.
|
||||
|
||||
@ -94,7 +95,7 @@ Returns a constant link to the train descriptor collection ``trainDescCollection
|
||||
|
||||
DescriptorMatcher::clear
|
||||
----------------------------
|
||||
Clears the train descriptor collection.
|
||||
Clears the train descriptor collections.
|
||||
|
||||
.. ocv:function:: void DescriptorMatcher::clear()
|
||||
|
||||
@ -102,7 +103,7 @@ Clears the train descriptor collection.
|
||||
|
||||
DescriptorMatcher::empty
|
||||
----------------------------
|
||||
Returns true if there are no train descriptors in the collection.
|
||||
Returns true if there are no train descriptors in the both collections.
|
||||
|
||||
.. ocv:function:: bool DescriptorMatcher::empty() const
|
||||
|
||||
@ -130,9 +131,9 @@ DescriptorMatcher::match
|
||||
----------------------------
|
||||
Finds the best match for each descriptor from a query set.
|
||||
|
||||
.. ocv:function:: void DescriptorMatcher::match( const Mat& queryDescriptors, const Mat& trainDescriptors, vector<DMatch>& matches, const Mat& mask=Mat() ) const
|
||||
.. ocv:function:: void DescriptorMatcher::match( InputArray queryDescriptors, InputArray trainDescriptors, vector<DMatch>& matches, InputArray mask=Mat() ) const
|
||||
|
||||
.. ocv:function:: void DescriptorMatcher::match( const Mat& queryDescriptors, vector<DMatch>& matches, const vector<Mat>& masks=vector<Mat>() )
|
||||
.. ocv:function:: void DescriptorMatcher::match(InputArray queryDescriptors, vector<DMatch>& matches, const vector<Mat>& masks=vector<Mat>() )
|
||||
|
||||
:param queryDescriptors: Query set of descriptors.
|
||||
|
||||
@ -152,9 +153,9 @@ DescriptorMatcher::knnMatch
|
||||
-------------------------------
|
||||
Finds the k best matches for each descriptor from a query set.
|
||||
|
||||
.. ocv:function:: void DescriptorMatcher::knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, vector<vector<DMatch> >& matches, int k, const Mat& mask=Mat(), bool compactResult=false ) const
|
||||
.. ocv:function:: void DescriptorMatcher::knnMatch(InputArray queryDescriptors, InputArray trainDescriptors, vector<vector<DMatch> >& matches, int k, InputArray mask=Mat(), bool compactResult=false ) const
|
||||
|
||||
.. ocv:function:: void DescriptorMatcher::knnMatch( const Mat& queryDescriptors, vector<vector<DMatch> >& matches, int k, const vector<Mat>& masks=vector<Mat>(), bool compactResult=false )
|
||||
.. ocv:function:: void DescriptorMatcher::knnMatch( InputArray queryDescriptors, vector<vector<DMatch> >& matches, int k, const vector<Mat>& masks=vector<Mat>(), bool compactResult=false )
|
||||
|
||||
:param queryDescriptors: Query set of descriptors.
|
||||
|
||||
@ -178,9 +179,9 @@ DescriptorMatcher::radiusMatch
|
||||
----------------------------------
|
||||
For each query descriptor, finds the training descriptors not farther than the specified distance.
|
||||
|
||||
.. ocv:function:: void DescriptorMatcher::radiusMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, vector<vector<DMatch> >& matches, float maxDistance, const Mat& mask=Mat(), bool compactResult=false ) const
|
||||
.. ocv:function:: void DescriptorMatcher::radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors, vector<vector<DMatch> >& matches, float maxDistance, InputArray mask=Mat(), bool compactResult=false ) const
|
||||
|
||||
.. ocv:function:: void DescriptorMatcher::radiusMatch( const Mat& queryDescriptors, vector<vector<DMatch> >& matches, float maxDistance, const vector<Mat>& masks=vector<Mat>(), bool compactResult=false )
|
||||
.. ocv:function:: void DescriptorMatcher::radiusMatch( InputArray queryDescriptors, vector<vector<DMatch> >& matches, float maxDistance, const vector<Mat>& masks=vector<Mat>(), bool compactResult=false )
|
||||
|
||||
:param queryDescriptors: Query set of descriptors.
|
||||
|
||||
|
@ -998,7 +998,7 @@ public:
|
||||
* Add descriptors to train descriptor collection.
|
||||
* descriptors Descriptors to add. Each descriptors[i] is a descriptors set from one image.
|
||||
*/
|
||||
CV_WRAP virtual void add( const std::vector<Mat>& descriptors );
|
||||
CV_WRAP virtual void add( InputArrayOfArrays descriptors );
|
||||
/*
|
||||
* Get train descriptors collection.
|
||||
*/
|
||||
@ -1034,29 +1034,29 @@ public:
|
||||
* Method train() is run in this methods.
|
||||
*/
|
||||
// Find one best match for each query descriptor (if mask is empty).
|
||||
CV_WRAP void match( const Mat& queryDescriptors, const Mat& trainDescriptors,
|
||||
CV_OUT std::vector<DMatch>& matches, const Mat& mask=Mat() ) const;
|
||||
CV_WRAP void match( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
CV_OUT std::vector<DMatch>& matches, InputArray mask=Mat() ) const;
|
||||
// Find k best matches for each query descriptor (in increasing order of distances).
|
||||
// compactResult is used when mask is not empty. If compactResult is false matches
|
||||
// vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
// matches vector will not contain matches for fully masked out query descriptors.
|
||||
CV_WRAP void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors,
|
||||
CV_WRAP void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
|
||||
const Mat& mask=Mat(), bool compactResult=false ) const;
|
||||
InputArray mask=Mat(), bool compactResult=false ) const;
|
||||
// Find best matches for each query descriptor which have distance less than
|
||||
// maxDistance (in increasing order of distances).
|
||||
void radiusMatch( const Mat& queryDescriptors, const Mat& trainDescriptors,
|
||||
void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
const Mat& mask=Mat(), bool compactResult=false ) const;
|
||||
InputArray mask=Mat(), bool compactResult=false ) const;
|
||||
/*
|
||||
* Group of methods to match descriptors from one image to image set.
|
||||
* See description of similar methods for matching image pair above.
|
||||
*/
|
||||
CV_WRAP void match( const Mat& queryDescriptors, CV_OUT std::vector<DMatch>& matches,
|
||||
CV_WRAP void match( InputArray queryDescriptors, CV_OUT std::vector<DMatch>& matches,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>() );
|
||||
CV_WRAP void knnMatch( const Mat& queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
|
||||
CV_WRAP void knnMatch( InputArray queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
|
||||
void radiusMatch( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
void radiusMatch( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
|
||||
|
||||
// Reads matcher object from a file node
|
||||
@ -1101,10 +1101,10 @@ protected:
|
||||
// In fact the matching is implemented only by the following two methods. These methods suppose
|
||||
// that the class object has been trained already. Public match methods call these methods
|
||||
// after calling train().
|
||||
virtual void knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false ) = 0;
|
||||
virtual void radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false ) = 0;
|
||||
virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
|
||||
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false ) = 0;
|
||||
virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false ) = 0;
|
||||
|
||||
static bool isPossibleMatch( const Mat& mask, int queryIdx, int trainIdx );
|
||||
static bool isMaskedOut( const std::vector<Mat>& masks, int queryIdx );
|
||||
@ -1114,6 +1114,7 @@ protected:
|
||||
|
||||
// Collection of descriptors from train images.
|
||||
std::vector<Mat> trainDescCollection;
|
||||
std::vector<UMat> utrainDescCollection;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1137,10 +1138,16 @@ public:
|
||||
|
||||
AlgorithmInfo* info() const;
|
||||
protected:
|
||||
virtual void knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
|
||||
virtual void radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
|
||||
virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
|
||||
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false );
|
||||
virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false );
|
||||
|
||||
bool ocl_knnMatch(InputArray query, InputArray train, std::vector< std::vector<DMatch> > &matches,
|
||||
int k, int dstType, bool compactResult=false);
|
||||
bool ocl_radiusMatch(InputArray query, InputArray train, std::vector< std::vector<DMatch> > &matches,
|
||||
float maxDistance, int dstType, bool compactResult=false);
|
||||
bool ocl_match(InputArray query, InputArray train, std::vector< std::vector<DMatch> > &matches, int dstType);
|
||||
|
||||
int normType;
|
||||
bool crossCheck;
|
||||
@ -1175,10 +1182,10 @@ protected:
|
||||
const Mat& indices, const Mat& distances,
|
||||
std::vector<std::vector<DMatch> >& matches );
|
||||
|
||||
virtual void knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
|
||||
virtual void radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<Mat>& masks=std::vector<Mat>(), bool compactResult=false );
|
||||
virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
|
||||
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false );
|
||||
virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
InputArrayOfArrays masks=std::vector<Mat>(), bool compactResult=false );
|
||||
|
||||
Ptr<flann::IndexParams> indexParams;
|
||||
Ptr<flann::SearchParams> searchParams;
|
||||
|
129
modules/features2d/perf/opencl/perf_brute_force_matcher.cpp
Normal file
129
modules/features2d/perf/opencl/perf_brute_force_matcher.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||
// Jin Ma, jin@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
//////////////////// BruteForceMatch /////////////////
|
||||
|
||||
typedef Size_MatType BruteForceMatcherFixture;
|
||||
|
||||
OCL_PERF_TEST_P(BruteForceMatcherFixture, Match, ::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_PERF_ENUM((MatType)CV_32FC1) ) )
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
vector<DMatch> matches;
|
||||
UMat uquery(srcSize, type), utrain(srcSize, type);
|
||||
|
||||
declare.in(uquery, utrain, WARMUP_RNG);
|
||||
|
||||
BFMatcher matcher(NORM_L2);
|
||||
|
||||
OCL_TEST_CYCLE()
|
||||
matcher.match(uquery, utrain, matches);
|
||||
|
||||
SANITY_CHECK_MATCHES(matches, 1e-3);
|
||||
}
|
||||
|
||||
OCL_PERF_TEST_P(BruteForceMatcherFixture, KnnMatch, ::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_PERF_ENUM((MatType)CV_32FC1) ) )
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
vector< vector<DMatch> > matches;
|
||||
UMat uquery(srcSize, type), utrain(srcSize, type);
|
||||
|
||||
declare.in(uquery, utrain, WARMUP_RNG);
|
||||
|
||||
BFMatcher matcher(NORM_L2);
|
||||
|
||||
OCL_TEST_CYCLE()
|
||||
matcher.knnMatch(uquery, utrain, matches, 2);
|
||||
|
||||
vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
|
||||
SANITY_CHECK_MATCHES(matches0, 1e-3);
|
||||
SANITY_CHECK_MATCHES(matches1, 1e-3);
|
||||
|
||||
}
|
||||
|
||||
OCL_PERF_TEST_P(BruteForceMatcherFixture, RadiusMatch, ::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_PERF_ENUM((MatType)CV_32FC1) ) )
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
vector< vector<DMatch> > matches;
|
||||
UMat uquery(srcSize, type), utrain(srcSize, type);
|
||||
|
||||
declare.in(uquery, utrain, WARMUP_RNG);
|
||||
|
||||
BFMatcher matcher(NORM_L2);
|
||||
|
||||
OCL_TEST_CYCLE()
|
||||
matcher.radiusMatch(uquery, utrain, matches, 2.0f);
|
||||
|
||||
vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
|
||||
SANITY_CHECK_MATCHES(matches0, 1e-3);
|
||||
SANITY_CHECK_MATCHES(matches1, 1e-3);
|
||||
}
|
||||
|
||||
}//ocl
|
||||
}//cvtest
|
||||
|
||||
#endif //HAVE_OPENCL
|
@ -41,6 +41,7 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <limits>
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
#if defined(HAVE_EIGEN) && EIGEN_WORLD_VERSION == 2
|
||||
#include <Eigen/Array>
|
||||
@ -68,6 +69,533 @@ Mat windowedMatchingMask( const std::vector<KeyPoint>& keypoints1, const std::ve
|
||||
return mask;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////ocl functions for BFMatcher ///////////////////////////////////////////////////////////////
|
||||
|
||||
static void ensureSizeIsEnough(int rows, int cols, int type, UMat &m)
|
||||
{
|
||||
if (m.type() == type && m.rows >= rows && m.cols >= cols)
|
||||
m = m(Rect(0, 0, cols, rows));
|
||||
else
|
||||
m.create(rows, cols, type);
|
||||
}
|
||||
|
||||
|
||||
template < int BLOCK_SIZE, int MAX_DESC_LEN >
|
||||
static bool ocl_matchUnrolledCached(InputArray _query, InputArray _train,
|
||||
const UMat &trainIdx, const UMat &distance, int distType)
|
||||
{
|
||||
int depth = _query.depth();
|
||||
cv::String opts;
|
||||
opts = cv::format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE, (int)MAX_DESC_LEN );
|
||||
ocl::Kernel k("BruteForceMatch_UnrollMatch", ocl::features2d::brute_force_match_oclsrc, opts);
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
size_t globalSize[] = {(_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
UMat query = _query.getUMat(), train = _train.getUMat();
|
||||
|
||||
int idx = 0;
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
|
||||
idx = k.set(idx, (void *)NULL, smemSize);
|
||||
idx = k.set(idx, query.rows);
|
||||
idx = k.set(idx, query.cols);
|
||||
idx = k.set(idx, train.rows);
|
||||
idx = k.set(idx, train.cols);
|
||||
idx = k.set(idx, (int)query.step);
|
||||
|
||||
return k.run(2, globalSize, localSize, false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE >
|
||||
static bool ocl_match(InputArray _query, InputArray _train,
|
||||
const UMat &trainIdx, const UMat &distance, int distType)
|
||||
{
|
||||
int depth = _query.depth();
|
||||
cv::String opts;
|
||||
opts = cv::format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE);
|
||||
ocl::Kernel k("BruteForceMatch_Match", ocl::features2d::brute_force_match_oclsrc, opts);
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
size_t globalSize[] = {(_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
UMat query = _query.getUMat(), train = _train.getUMat();
|
||||
|
||||
int idx = 0;
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
|
||||
idx = k.set(idx, (void *)NULL, smemSize);
|
||||
idx = k.set(idx, query.rows);
|
||||
idx = k.set(idx, query.cols);
|
||||
idx = k.set(idx, train.rows);
|
||||
idx = k.set(idx, train.cols);
|
||||
idx = k.set(idx, (int)query.step);
|
||||
|
||||
return k.run(2, globalSize, localSize, false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_matchDispatcher(InputArray query, InputArray train,
|
||||
const UMat &trainIdx, const UMat &distance, int distType)
|
||||
{
|
||||
int query_cols = query.size().width;
|
||||
bool is_cpu = ocl::Device::getDefault().type() == ocl::Device::TYPE_CPU;
|
||||
if (query_cols <= 64)
|
||||
{
|
||||
if(!ocl_matchUnrolledCached<16, 64>(query, train, trainIdx, distance, distType)) return false;
|
||||
}
|
||||
else if (query_cols <= 128 && !is_cpu)
|
||||
{
|
||||
if(!ocl_matchUnrolledCached<16, 128>(query, train, trainIdx, distance, distType)) return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!ocl_match<16>(query, train, trainIdx, distance, distType)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_matchSingle(InputArray query, InputArray train,
|
||||
UMat &trainIdx, UMat &distance, int dstType)
|
||||
{
|
||||
if (query.empty() || train.empty())
|
||||
return false;
|
||||
|
||||
int query_rows = query.size().height;
|
||||
|
||||
ensureSizeIsEnough(1, query_rows, CV_32S, trainIdx);
|
||||
ensureSizeIsEnough(1, query_rows, CV_32F, distance);
|
||||
|
||||
return ocl_matchDispatcher(query, train, trainIdx, distance, dstType);
|
||||
}
|
||||
|
||||
static bool ocl_matchConvert(const Mat &trainIdx, const Mat &distance, std::vector< std::vector<DMatch> > &matches)
|
||||
{
|
||||
if (trainIdx.empty() || distance.empty())
|
||||
return false;
|
||||
|
||||
if( (trainIdx.type() != CV_32SC1) || (distance.type() != CV_32FC1 || distance.cols != trainIdx.cols) )
|
||||
return false;
|
||||
|
||||
const int nQuery = trainIdx.cols;
|
||||
|
||||
matches.clear();
|
||||
matches.reserve(nQuery);
|
||||
|
||||
const int *trainIdx_ptr = trainIdx.ptr<int>();
|
||||
const float *distance_ptr = distance.ptr<float>();
|
||||
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
|
||||
{
|
||||
int trainIndex = *trainIdx_ptr;
|
||||
|
||||
if (trainIndex == -1)
|
||||
continue;
|
||||
|
||||
float dst = *distance_ptr;
|
||||
|
||||
DMatch m(queryIdx, trainIndex, 0, dst);
|
||||
|
||||
std::vector<DMatch> temp;
|
||||
temp.push_back(m);
|
||||
matches.push_back(temp);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_matchDownload(const UMat &trainIdx, const UMat &distance, std::vector< std::vector<DMatch> > &matches)
|
||||
{
|
||||
if (trainIdx.empty() || distance.empty())
|
||||
return false;
|
||||
|
||||
Mat trainIdxCPU = trainIdx.getMat(ACCESS_READ);
|
||||
Mat distanceCPU = distance.getMat(ACCESS_READ);
|
||||
|
||||
return ocl_matchConvert(trainIdxCPU, distanceCPU, matches);
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE, int MAX_DESC_LEN >
|
||||
static bool ocl_knn_matchUnrolledCached(InputArray _query, InputArray _train,
|
||||
const UMat &trainIdx, const UMat &distance, int distType)
|
||||
{
|
||||
int depth = _query.depth();
|
||||
cv::String opts;
|
||||
opts = cv::format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE, (int)MAX_DESC_LEN );
|
||||
ocl::Kernel k("BruteForceMatch_knnUnrollMatch", ocl::features2d::brute_force_match_oclsrc, opts);
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
size_t globalSize[] = {(_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
UMat query = _query.getUMat(), train = _train.getUMat();
|
||||
|
||||
int idx = 0;
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
|
||||
idx = k.set(idx, (void *)NULL, smemSize);
|
||||
idx = k.set(idx, query.rows);
|
||||
idx = k.set(idx, query.cols);
|
||||
idx = k.set(idx, train.rows);
|
||||
idx = k.set(idx, train.cols);
|
||||
idx = k.set(idx, (int)query.step);
|
||||
|
||||
return k.run(2, globalSize, localSize, false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE >
|
||||
static bool ocl_knn_match(InputArray _query, InputArray _train,
|
||||
const UMat &trainIdx, const UMat &distance, int distType)
|
||||
{
|
||||
int depth = _query.depth();
|
||||
cv::String opts;
|
||||
opts = format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
|
||||
ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE);
|
||||
ocl::Kernel k("BruteForceMatch_knnMatch", ocl::features2d::brute_force_match_oclsrc, opts);
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
size_t globalSize[] = {(_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
UMat query = _query.getUMat(), train = _train.getUMat();
|
||||
|
||||
int idx = 0;
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
|
||||
idx = k.set(idx, (void*)NULL, smemSize);
|
||||
idx = k.set(idx, query.rows);
|
||||
idx = k.set(idx, query.cols);
|
||||
idx = k.set(idx, train.rows);
|
||||
idx = k.set(idx, train.cols);
|
||||
idx = k.set(idx, (int)query.step);
|
||||
|
||||
return k.run(2, globalSize, localSize, false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_match2Dispatcher(InputArray query, InputArray train, const UMat &trainIdx, const UMat &distance, int distType)
|
||||
{
|
||||
bool is_cpu = ocl::Device::getDefault().type() == ocl::Device::TYPE_CPU;
|
||||
if (query.size().width <= 64)
|
||||
{
|
||||
if(!ocl_knn_matchUnrolledCached<16, 64>(query, train, trainIdx, distance, distType))
|
||||
return false;
|
||||
}
|
||||
else if (query.size().width <= 128 && !is_cpu)
|
||||
{
|
||||
if(!ocl_knn_matchUnrolledCached<16, 128>(query, train, trainIdx, distance, distType))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!ocl_knn_match<16>(query, train, trainIdx, distance, distType))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_kmatchDispatcher(InputArray query, InputArray train, const UMat &trainIdx,
|
||||
const UMat &distance, int distType)
|
||||
{
|
||||
return ocl_match2Dispatcher(query, train, trainIdx, distance, distType);
|
||||
}
|
||||
|
||||
static bool ocl_knnMatchSingle(InputArray query, InputArray train, UMat &trainIdx,
|
||||
UMat &distance, int dstType)
|
||||
{
|
||||
if (query.empty() || train.empty())
|
||||
return false;
|
||||
|
||||
const int nQuery = query.size().height;
|
||||
|
||||
ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
|
||||
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
|
||||
|
||||
trainIdx.setTo(Scalar::all(-1));
|
||||
|
||||
return ocl_kmatchDispatcher(query, train, trainIdx, distance, dstType);
|
||||
}
|
||||
|
||||
static bool ocl_knnMatchConvert(const Mat &trainIdx, const Mat &distance, std::vector< std::vector<DMatch> > &matches, bool compactResult)
|
||||
{
|
||||
if (trainIdx.empty() || distance.empty())
|
||||
return false;
|
||||
|
||||
if(trainIdx.type() != CV_32SC2 && trainIdx.type() != CV_32SC1) return false;
|
||||
if(distance.type() != CV_32FC2 && distance.type() != CV_32FC1)return false;
|
||||
if(distance.size() != trainIdx.size()) return false;
|
||||
if(!trainIdx.isContinuous() || !distance.isContinuous()) return false;
|
||||
|
||||
const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;
|
||||
const int k = trainIdx.type() == CV_32SC2 ? 2 : trainIdx.cols;
|
||||
|
||||
matches.clear();
|
||||
matches.reserve(nQuery);
|
||||
|
||||
const int *trainIdx_ptr = trainIdx.ptr<int>();
|
||||
const float *distance_ptr = distance.ptr<float>();
|
||||
|
||||
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
|
||||
{
|
||||
matches.push_back(std::vector<DMatch>());
|
||||
std::vector<DMatch> &curMatches = matches.back();
|
||||
curMatches.reserve(k);
|
||||
|
||||
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
|
||||
{
|
||||
int trainIndex = *trainIdx_ptr;
|
||||
|
||||
if (trainIndex != -1)
|
||||
{
|
||||
float dst = *distance_ptr;
|
||||
|
||||
DMatch m(queryIdx, trainIndex, 0, dst);
|
||||
|
||||
curMatches.push_back(m);
|
||||
}
|
||||
}
|
||||
|
||||
if (compactResult && curMatches.empty())
|
||||
matches.pop_back();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_knnMatchDownload(const UMat &trainIdx, const UMat &distance, std::vector< std::vector<DMatch> > &matches, bool compactResult)
|
||||
{
|
||||
if (trainIdx.empty() || distance.empty())
|
||||
return false;
|
||||
|
||||
Mat trainIdxCPU = trainIdx.getMat(ACCESS_READ);
|
||||
Mat distanceCPU = distance.getMat(ACCESS_READ);
|
||||
|
||||
if (ocl_knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult) )
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
template < int BLOCK_SIZE, int MAX_DESC_LEN >
|
||||
static bool ocl_matchUnrolledCached(InputArray _query, InputArray _train, float maxDistance,
|
||||
const UMat &trainIdx, const UMat &distance, const UMat &nMatches, int distType)
|
||||
{
|
||||
int depth = _query.depth();
|
||||
cv::String opts;
|
||||
opts = format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
|
||||
ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE, (int)MAX_DESC_LEN);
|
||||
ocl::Kernel k("BruteForceMatch_RadiusUnrollMatch", ocl::features2d::brute_force_match_oclsrc, opts);
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
size_t globalSize[] = {(_train.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
|
||||
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
UMat query = _query.getUMat(), train = _train.getUMat();
|
||||
|
||||
int idx = 0;
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
|
||||
idx = k.set(idx, maxDistance);
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(nMatches));
|
||||
idx = k.set(idx, (void*)NULL, smemSize);
|
||||
idx = k.set(idx, query.rows);
|
||||
idx = k.set(idx, query.cols);
|
||||
idx = k.set(idx, train.rows);
|
||||
idx = k.set(idx, train.cols);
|
||||
idx = k.set(idx, trainIdx.cols);
|
||||
idx = k.set(idx, (int)query.step);
|
||||
idx = k.set(idx, (int)trainIdx.step);
|
||||
|
||||
return k.run(2, globalSize, localSize, false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
//radius_match
|
||||
template < int BLOCK_SIZE >
|
||||
static bool ocl_radius_match(InputArray _query, InputArray _train, float maxDistance,
|
||||
const UMat &trainIdx, const UMat &distance, const UMat &nMatches, int distType)
|
||||
{
|
||||
int depth = _query.depth();
|
||||
cv::String opts;
|
||||
opts = format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d", ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE);
|
||||
ocl::Kernel k("BruteForceMatch_RadiusMatch", ocl::features2d::brute_force_match_oclsrc, opts);
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
size_t globalSize[] = {(_train.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
|
||||
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
if(globalSize[0] != 0)
|
||||
{
|
||||
UMat query = _query.getUMat(), train = _train.getUMat();
|
||||
|
||||
int idx = 0;
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
|
||||
idx = k.set(idx, maxDistance);
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
|
||||
idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(nMatches));
|
||||
idx = k.set(idx, (void*)NULL, smemSize);
|
||||
idx = k.set(idx, query.rows);
|
||||
idx = k.set(idx, query.cols);
|
||||
idx = k.set(idx, train.rows);
|
||||
idx = k.set(idx, train.cols);
|
||||
idx = k.set(idx, trainIdx.cols);
|
||||
idx = k.set(idx, (int)query.step);
|
||||
idx = k.set(idx, (int)trainIdx.step);
|
||||
|
||||
return k.run(2, globalSize, localSize, false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_rmatchDispatcher(InputArray query, InputArray train,
|
||||
UMat &trainIdx, UMat &distance, UMat &nMatches, float maxDistance, int distType)
|
||||
{
|
||||
bool is_cpu = ocl::Device::getDefault().type() == ocl::Device::TYPE_CPU;
|
||||
int query_cols = query.size().width;
|
||||
if (query_cols <= 64)
|
||||
{
|
||||
if(!ocl_matchUnrolledCached<16, 64>(query, train, maxDistance, trainIdx, distance, nMatches, distType)) return false;
|
||||
}
|
||||
else if (query_cols <= 128 && !is_cpu)
|
||||
{
|
||||
if(!ocl_matchUnrolledCached<16, 128>(query, train, maxDistance, trainIdx, distance, nMatches, distType)) return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!ocl_radius_match<16>(query, train, maxDistance, trainIdx, distance, nMatches, distType)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool ocl_radiusMatchSingle(InputArray query, InputArray train,
|
||||
UMat &trainIdx, UMat &distance, UMat &nMatches, float maxDistance, int distType)
|
||||
{
|
||||
if (query.empty() || train.empty())
|
||||
return false;
|
||||
|
||||
const int nQuery = query.size().height;
|
||||
const int nTrain = train.size().height;
|
||||
|
||||
ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
|
||||
|
||||
if (trainIdx.empty())
|
||||
{
|
||||
ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx);
|
||||
ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
|
||||
}
|
||||
|
||||
nMatches.setTo(Scalar::all(0));
|
||||
|
||||
return ocl_rmatchDispatcher(query, train, trainIdx, distance, nMatches, maxDistance, distType);
|
||||
}
|
||||
|
||||
static bool ocl_radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &_nMatches,
|
||||
std::vector< std::vector<DMatch> > &matches, bool compactResult)
|
||||
{
|
||||
if (trainIdx.empty() || distance.empty() || _nMatches.empty())
|
||||
return false;
|
||||
|
||||
if( (trainIdx.type() != CV_32SC1) ||
|
||||
(distance.type() != CV_32FC1 || distance.size() != trainIdx.size()) ||
|
||||
(_nMatches.type() != CV_32SC1 || _nMatches.cols != trainIdx.rows) )
|
||||
return false;
|
||||
|
||||
const int nQuery = trainIdx.rows;
|
||||
|
||||
matches.clear();
|
||||
matches.reserve(nQuery);
|
||||
|
||||
const int *nMatches_ptr = _nMatches.ptr<int>();
|
||||
|
||||
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
|
||||
{
|
||||
const int *trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
|
||||
const float *distance_ptr = distance.ptr<float>(queryIdx);
|
||||
|
||||
const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
|
||||
|
||||
if (nMatches == 0)
|
||||
{
|
||||
if (!compactResult)
|
||||
matches.push_back(std::vector<DMatch>());
|
||||
continue;
|
||||
}
|
||||
|
||||
matches.push_back(std::vector<DMatch>(nMatches));
|
||||
std::vector<DMatch> &curMatches = matches.back();
|
||||
|
||||
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr)
|
||||
{
|
||||
int trainIndex = *trainIdx_ptr;
|
||||
|
||||
float dst = *distance_ptr;
|
||||
|
||||
DMatch m(queryIdx, trainIndex, 0, dst);
|
||||
|
||||
curMatches[i] = m;
|
||||
}
|
||||
|
||||
std::sort(curMatches.begin(), curMatches.end());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_radiusMatchDownload(const UMat &trainIdx, const UMat &distance, const UMat &nMatches,
|
||||
std::vector< std::vector<DMatch> > &matches, bool compactResult)
|
||||
{
|
||||
if (trainIdx.empty() || distance.empty() || nMatches.empty())
|
||||
return false;
|
||||
|
||||
Mat trainIdxCPU = trainIdx.getMat(ACCESS_READ);
|
||||
Mat distanceCPU = distance.getMat(ACCESS_READ);
|
||||
Mat nMatchesCPU = nMatches.getMat(ACCESS_READ);
|
||||
|
||||
return ocl_radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
|
||||
}
|
||||
|
||||
/****************************************************************************************\
|
||||
* DescriptorMatcher *
|
||||
\****************************************************************************************/
|
||||
@ -190,9 +718,32 @@ static void convertMatches( const std::vector<std::vector<DMatch> >& knnMatches,
|
||||
DescriptorMatcher::~DescriptorMatcher()
|
||||
{}
|
||||
|
||||
void DescriptorMatcher::add( const std::vector<Mat>& descriptors )
|
||||
void DescriptorMatcher::add( InputArrayOfArrays _descriptors )
|
||||
{
|
||||
trainDescCollection.insert( trainDescCollection.end(), descriptors.begin(), descriptors.end() );
|
||||
if(_descriptors.isUMatVector())
|
||||
{
|
||||
std::vector<UMat> descriptors;
|
||||
_descriptors.getUMatVector(descriptors);
|
||||
utrainDescCollection.insert( utrainDescCollection.end(), descriptors.begin(), descriptors.end() );
|
||||
}
|
||||
else if(_descriptors.isUMat())
|
||||
{
|
||||
std::vector<UMat> descriptors = std::vector<UMat>(1, _descriptors.getUMat());
|
||||
utrainDescCollection.insert( utrainDescCollection.end(), descriptors.begin(), descriptors.end() );
|
||||
}
|
||||
else if(_descriptors.isMatVector())
|
||||
{
|
||||
std::vector<Mat> descriptors;
|
||||
_descriptors.getMatVector(descriptors);
|
||||
trainDescCollection.insert( trainDescCollection.end(), descriptors.begin(), descriptors.end() );
|
||||
}
|
||||
else if(_descriptors.isMat())
|
||||
{
|
||||
std::vector<Mat> descriptors = std::vector<Mat>(1, _descriptors.getMat());
|
||||
trainDescCollection.insert( trainDescCollection.end(), descriptors.begin(), descriptors.end() );
|
||||
}
|
||||
else
|
||||
CV_Assert( _descriptors.isUMat() || _descriptors.isUMatVector() || _descriptors.isMat() || _descriptors.isMatVector() );
|
||||
}
|
||||
|
||||
const std::vector<Mat>& DescriptorMatcher::getTrainDescriptors() const
|
||||
@ -202,41 +753,45 @@ const std::vector<Mat>& DescriptorMatcher::getTrainDescriptors() const
|
||||
|
||||
void DescriptorMatcher::clear()
|
||||
{
|
||||
utrainDescCollection.clear();
|
||||
trainDescCollection.clear();
|
||||
}
|
||||
|
||||
bool DescriptorMatcher::empty() const
|
||||
{
|
||||
return trainDescCollection.empty();
|
||||
return trainDescCollection.empty() && utrainDescCollection.empty();
|
||||
}
|
||||
|
||||
void DescriptorMatcher::train()
|
||||
{}
|
||||
|
||||
void DescriptorMatcher::match( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector<DMatch>& matches, const Mat& mask ) const
|
||||
void DescriptorMatcher::match( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<DMatch>& matches, InputArray mask ) const
|
||||
{
|
||||
Ptr<DescriptorMatcher> tempMatcher = clone(true);
|
||||
tempMatcher->add( std::vector<Mat>(1, trainDescriptors) );
|
||||
tempMatcher->match( queryDescriptors, matches, std::vector<Mat>(1, mask) );
|
||||
tempMatcher->add(trainDescriptors);
|
||||
tempMatcher->match( queryDescriptors, matches, std::vector<Mat>(1, mask.getMat()) );
|
||||
}
|
||||
|
||||
void DescriptorMatcher::knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector<std::vector<DMatch> >& matches, int knn,
|
||||
const Mat& mask, bool compactResult ) const
|
||||
void DescriptorMatcher::knnMatch( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches, int knn,
|
||||
InputArray mask, bool compactResult ) const
|
||||
{
|
||||
Ptr<DescriptorMatcher> tempMatcher = clone(true);
|
||||
tempMatcher->add( std::vector<Mat>(1, trainDescriptors) );
|
||||
tempMatcher->knnMatch( queryDescriptors, matches, knn, std::vector<Mat>(1, mask), compactResult );
|
||||
tempMatcher->add(trainDescriptors);
|
||||
tempMatcher->knnMatch( queryDescriptors, matches, knn, std::vector<Mat>(1, mask.getMat()), compactResult );
|
||||
}
|
||||
|
||||
void DescriptorMatcher::radiusMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
const Mat& mask, bool compactResult ) const
|
||||
void DescriptorMatcher::radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches, float maxDistance, InputArray mask,
|
||||
bool compactResult ) const
|
||||
{
|
||||
Ptr<DescriptorMatcher> tempMatcher = clone(true);
|
||||
tempMatcher->add( std::vector<Mat>(1, trainDescriptors) );
|
||||
tempMatcher->radiusMatch( queryDescriptors, matches, maxDistance, std::vector<Mat>(1, mask), compactResult );
|
||||
tempMatcher->add(trainDescriptors);
|
||||
tempMatcher->radiusMatch( queryDescriptors, matches, maxDistance, std::vector<Mat>(1, mask.getMat()), compactResult );
|
||||
}
|
||||
|
||||
void DescriptorMatcher::match( const Mat& queryDescriptors, std::vector<DMatch>& matches, const std::vector<Mat>& masks )
|
||||
void DescriptorMatcher::match( InputArray queryDescriptors, std::vector<DMatch>& matches, const std::vector<Mat>& masks )
|
||||
{
|
||||
std::vector<std::vector<DMatch> > knnMatches;
|
||||
knnMatch( queryDescriptors, knnMatches, 1, masks, true /*compactResult*/ );
|
||||
@ -248,36 +803,36 @@ void DescriptorMatcher::checkMasks( const std::vector<Mat>& masks, int queryDesc
|
||||
if( isMaskSupported() && !masks.empty() )
|
||||
{
|
||||
// Check masks
|
||||
size_t imageCount = trainDescCollection.size();
|
||||
size_t imageCount = std::max(trainDescCollection.size(), utrainDescCollection.size() );
|
||||
CV_Assert( masks.size() == imageCount );
|
||||
for( size_t i = 0; i < imageCount; i++ )
|
||||
{
|
||||
if( !masks[i].empty() && !trainDescCollection[i].empty() )
|
||||
if( !masks[i].empty() && (!trainDescCollection[i].empty() || !utrainDescCollection[i].empty() ) )
|
||||
{
|
||||
int rows = trainDescCollection[i].empty() ? utrainDescCollection[i].rows : trainDescCollection[i].rows;
|
||||
CV_Assert( masks[i].rows == queryDescriptorsCount &&
|
||||
masks[i].cols == trainDescCollection[i].rows &&
|
||||
masks[i].type() == CV_8UC1 );
|
||||
(masks[i].cols == rows || masks[i].cols == rows) &&
|
||||
masks[i].type() == CV_8UC1 );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DescriptorMatcher::knnMatch( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int knn,
|
||||
void DescriptorMatcher::knnMatch( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int knn,
|
||||
const std::vector<Mat>& masks, bool compactResult )
|
||||
{
|
||||
matches.clear();
|
||||
if( empty() || queryDescriptors.empty() )
|
||||
return;
|
||||
|
||||
CV_Assert( knn > 0 );
|
||||
|
||||
checkMasks( masks, queryDescriptors.rows );
|
||||
checkMasks( masks, queryDescriptors.size().height );
|
||||
|
||||
train();
|
||||
knnMatchImpl( queryDescriptors, matches, knn, masks, compactResult );
|
||||
}
|
||||
|
||||
void DescriptorMatcher::radiusMatch( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
void DescriptorMatcher::radiusMatch( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<Mat>& masks, bool compactResult )
|
||||
{
|
||||
matches.clear();
|
||||
@ -286,7 +841,7 @@ void DescriptorMatcher::radiusMatch( const Mat& queryDescriptors, std::vector<st
|
||||
|
||||
CV_Assert( maxDistance > std::numeric_limits<float>::epsilon() );
|
||||
|
||||
checkMasks( masks, queryDescriptors.rows );
|
||||
checkMasks( masks, queryDescriptors.size().height );
|
||||
|
||||
train();
|
||||
radiusMatchImpl( queryDescriptors, matches, maxDistance, masks, compactResult );
|
||||
@ -316,7 +871,7 @@ bool DescriptorMatcher::isMaskedOut( const std::vector<Mat>& masks, int queryIdx
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////// BruteForceMatcher /////////////////////////////////////////////////
|
||||
|
||||
BFMatcher::BFMatcher( int _normType, bool _crossCheck )
|
||||
{
|
||||
@ -336,19 +891,100 @@ Ptr<DescriptorMatcher> BFMatcher::clone( bool emptyTrainData ) const
|
||||
return matcher;
|
||||
}
|
||||
|
||||
|
||||
void BFMatcher::knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int knn,
|
||||
const std::vector<Mat>& masks, bool compactResult )
|
||||
bool BFMatcher::ocl_match(InputArray query, InputArray _train, std::vector< std::vector<DMatch> > &matches, int dstType)
|
||||
{
|
||||
UMat trainIdx, distance;
|
||||
if(!ocl_matchSingle(query, _train, trainIdx, distance, dstType)) return false;
|
||||
if(!ocl_matchDownload(trainIdx, distance, matches)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BFMatcher::ocl_knnMatch(InputArray query, InputArray _train, std::vector< std::vector<DMatch> > &matches, int k, int dstType, bool compactResult)
|
||||
{
|
||||
UMat trainIdx, distance;
|
||||
if (k != 2)
|
||||
return false;
|
||||
if (!ocl_knnMatchSingle(query, _train, trainIdx, distance, dstType)) return false;
|
||||
if( !ocl_knnMatchDownload(trainIdx, distance, matches, compactResult) ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void BFMatcher::knnMatchImpl( InputArray _queryDescriptors, std::vector<std::vector<DMatch> >& matches, int knn,
|
||||
InputArrayOfArrays _masks, bool compactResult )
|
||||
{
|
||||
int trainDescType = trainDescCollection.empty() ? utrainDescCollection[0].type() : trainDescCollection[0].type();
|
||||
CV_Assert( _queryDescriptors.type() == trainDescType );
|
||||
|
||||
const int IMGIDX_SHIFT = 18;
|
||||
const int IMGIDX_ONE = (1 << IMGIDX_SHIFT);
|
||||
|
||||
if( queryDescriptors.empty() || trainDescCollection.empty() )
|
||||
if( _queryDescriptors.empty() || (trainDescCollection.empty() && utrainDescCollection.empty()))
|
||||
{
|
||||
matches.clear();
|
||||
return;
|
||||
}
|
||||
CV_Assert( queryDescriptors.type() == trainDescCollection[0].type() );
|
||||
|
||||
std::vector<Mat> masks;
|
||||
_masks.getMatVector(masks);
|
||||
|
||||
if(!trainDescCollection.empty() && !utrainDescCollection.empty())
|
||||
{
|
||||
for(int i = 0; i < (int)utrainDescCollection.size(); i++)
|
||||
{
|
||||
Mat tempMat;
|
||||
utrainDescCollection[i].copyTo(tempMat);
|
||||
trainDescCollection.push_back(tempMat);
|
||||
}
|
||||
utrainDescCollection.clear();
|
||||
}
|
||||
|
||||
int trainDescVectorSize = trainDescCollection.empty() ? (int)utrainDescCollection.size() : (int)trainDescCollection.size();
|
||||
Size trainDescSize = trainDescCollection.empty() ? utrainDescCollection[0].size() : trainDescCollection[0].size();
|
||||
int trainDescOffset = trainDescCollection.empty() ? (int)utrainDescCollection[0].offset : 0;
|
||||
|
||||
if ( ocl::useOpenCL() && _queryDescriptors.isUMat() && _queryDescriptors.dims()<=2 && trainDescVectorSize == 1 &&
|
||||
_queryDescriptors.type() == CV_32FC1 && _queryDescriptors.offset() == 0 && trainDescOffset == 0 &&
|
||||
trainDescSize.width == _queryDescriptors.size().width && masks.size() == 1 && masks[0].total() == 0 )
|
||||
{
|
||||
if(knn == 1)
|
||||
{
|
||||
if(trainDescCollection.empty())
|
||||
{
|
||||
if(ocl_match(_queryDescriptors, utrainDescCollection[0], matches, normType))
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(ocl_match(_queryDescriptors, trainDescCollection[0], matches, normType))
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(trainDescCollection.empty())
|
||||
{
|
||||
if(ocl_knnMatch(_queryDescriptors, utrainDescCollection[0], matches, knn, normType, compactResult) )
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(ocl_knnMatch(_queryDescriptors, trainDescCollection[0], matches, knn, normType, compactResult) )
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Mat queryDescriptors = _queryDescriptors.getMat();
|
||||
if(trainDescCollection.empty() && !utrainDescCollection.empty())
|
||||
{
|
||||
for(int i = 0; i < (int)utrainDescCollection.size(); i++)
|
||||
{
|
||||
Mat tempMat;
|
||||
utrainDescCollection[i].copyTo(tempMat);
|
||||
trainDescCollection.push_back(tempMat);
|
||||
}
|
||||
utrainDescCollection.clear();
|
||||
}
|
||||
|
||||
matches.reserve(queryDescriptors.rows);
|
||||
|
||||
@ -397,16 +1033,72 @@ void BFMatcher::knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vect
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void BFMatcher::radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches,
|
||||
float maxDistance, const std::vector<Mat>& masks, bool compactResult )
|
||||
bool BFMatcher::ocl_radiusMatch(InputArray query, InputArray _train, std::vector< std::vector<DMatch> > &matches,
|
||||
float maxDistance, int dstType, bool compactResult)
|
||||
{
|
||||
if( queryDescriptors.empty() || trainDescCollection.empty() )
|
||||
UMat trainIdx, distance, nMatches;
|
||||
if(!ocl_radiusMatchSingle(query, _train, trainIdx, distance, nMatches, maxDistance, dstType)) return false;
|
||||
if(!ocl_radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void BFMatcher::radiusMatchImpl( InputArray _queryDescriptors, std::vector<std::vector<DMatch> >& matches,
|
||||
float maxDistance, InputArrayOfArrays _masks, bool compactResult )
|
||||
{
|
||||
int trainDescType = trainDescCollection.empty() ? utrainDescCollection[0].type() : trainDescCollection[0].type();
|
||||
CV_Assert( _queryDescriptors.type() == trainDescType );
|
||||
|
||||
if( _queryDescriptors.empty() || (trainDescCollection.empty() && utrainDescCollection.empty()))
|
||||
{
|
||||
matches.clear();
|
||||
return;
|
||||
}
|
||||
CV_Assert( queryDescriptors.type() == trainDescCollection[0].type() );
|
||||
|
||||
std::vector<Mat> masks;
|
||||
_masks.getMatVector(masks);
|
||||
|
||||
if(!trainDescCollection.empty() && !utrainDescCollection.empty())
|
||||
{
|
||||
for(int i = 0; i < (int)utrainDescCollection.size(); i++)
|
||||
{
|
||||
Mat tempMat;
|
||||
utrainDescCollection[i].copyTo(tempMat);
|
||||
trainDescCollection.push_back(tempMat);
|
||||
}
|
||||
utrainDescCollection.clear();
|
||||
}
|
||||
|
||||
int trainDescVectorSize = trainDescCollection.empty() ? (int)utrainDescCollection.size() : (int)trainDescCollection.size();
|
||||
Size trainDescSize = trainDescCollection.empty() ? utrainDescCollection[0].size() : trainDescCollection[0].size();
|
||||
int trainDescOffset = trainDescCollection.empty() ? (int)utrainDescCollection[0].offset : 0;
|
||||
|
||||
if ( ocl::useOpenCL() && _queryDescriptors.isUMat() && _queryDescriptors.dims()<=2 && trainDescVectorSize == 1 &&
|
||||
_queryDescriptors.type() == CV_32FC1 && _queryDescriptors.offset() == 0 && trainDescOffset == 0 &&
|
||||
trainDescSize.width == _queryDescriptors.size().width && masks.size() == 1 && masks[0].total() == 0 )
|
||||
{
|
||||
if(trainDescCollection.empty())
|
||||
{
|
||||
if(ocl_radiusMatch(_queryDescriptors, utrainDescCollection[0], matches, maxDistance, normType, compactResult) )
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(ocl_radiusMatch(_queryDescriptors, trainDescCollection[0], matches, maxDistance, normType, compactResult) )
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Mat queryDescriptors = _queryDescriptors.getMat();
|
||||
if(trainDescCollection.empty() && !utrainDescCollection.empty())
|
||||
{
|
||||
for(int i = 0; i < (int)utrainDescCollection.size(); i++)
|
||||
{
|
||||
Mat tempMat;
|
||||
utrainDescCollection[i].copyTo(tempMat);
|
||||
trainDescCollection.push_back(tempMat);
|
||||
}
|
||||
utrainDescCollection.clear();
|
||||
}
|
||||
|
||||
matches.resize(queryDescriptors.rows);
|
||||
Mat dist, distf;
|
||||
@ -763,9 +1455,10 @@ void FlannBasedMatcher::convertToDMatches( const DescriptorCollection& collectio
|
||||
}
|
||||
}
|
||||
|
||||
void FlannBasedMatcher::knnMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int knn,
|
||||
const std::vector<Mat>& /*masks*/, bool /*compactResult*/ )
|
||||
void FlannBasedMatcher::knnMatchImpl( InputArray _queryDescriptors, std::vector<std::vector<DMatch> >& matches, int knn,
|
||||
InputArrayOfArrays /*masks*/, bool /*compactResult*/ )
|
||||
{
|
||||
Mat queryDescriptors = _queryDescriptors.getMat();
|
||||
Mat indices( queryDescriptors.rows, knn, CV_32SC1 );
|
||||
Mat dists( queryDescriptors.rows, knn, CV_32FC1);
|
||||
flannIndex->knnSearch( queryDescriptors, indices, dists, knn, *searchParams );
|
||||
@ -773,9 +1466,10 @@ void FlannBasedMatcher::knnMatchImpl( const Mat& queryDescriptors, std::vector<s
|
||||
convertToDMatches( mergedDescriptors, indices, dists, matches );
|
||||
}
|
||||
|
||||
void FlannBasedMatcher::radiusMatchImpl( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<Mat>& /*masks*/, bool /*compactResult*/ )
|
||||
void FlannBasedMatcher::radiusMatchImpl( InputArray _queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
|
||||
InputArrayOfArrays /*masks*/, bool /*compactResult*/ )
|
||||
{
|
||||
Mat queryDescriptors = _queryDescriptors.getMat();
|
||||
const int count = mergedDescriptors.size(); // TODO do count as param?
|
||||
Mat indices( queryDescriptors.rows, count, CV_32SC1, Scalar::all(-1) );
|
||||
Mat dists( queryDescriptors.rows, count, CV_32FC1, Scalar::all(-1) );
|
||||
|
789
modules/features2d/src/opencl/brute_force_match.cl
Normal file
789
modules/features2d/src/opencl/brute_force_match.cl
Normal file
@ -0,0 +1,789 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Nathan, liujun@multicorewareinc.com
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
// Baichuan Su, baichuan@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
|
||||
#define MAX_FLOAT 3.40282e+038f
|
||||
|
||||
#ifndef T
|
||||
#define T float
|
||||
#endif
|
||||
|
||||
#ifndef BLOCK_SIZE
|
||||
#define BLOCK_SIZE 16
|
||||
#endif
|
||||
#ifndef MAX_DESC_LEN
|
||||
#define MAX_DESC_LEN 64
|
||||
#endif
|
||||
|
||||
#ifndef DIST_TYPE
|
||||
#define DIST_TYPE 2
|
||||
#endif
|
||||
|
||||
// dirty fix for non-template support
|
||||
#if (DIST_TYPE == 2) // L1Dist
|
||||
# ifdef T_FLOAT
|
||||
# define DIST(x, y) fabs((x) - (y))
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
# else
|
||||
# define DIST(x, y) abs((x) - (y))
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
# endif
|
||||
#define DIST_RES(x) (x)
|
||||
#elif (DIST_TYPE == 4) // L2Dist
|
||||
#define DIST(x, y) (((x) - (y)) * ((x) - (y)))
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
#define DIST_RES(x) sqrt(x)
|
||||
#elif (DIST_TYPE == 6) // Hamming
|
||||
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
inline int bit1Count(int v)
|
||||
{
|
||||
v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
|
||||
v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
|
||||
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
|
||||
}
|
||||
#define DIST(x, y) bit1Count( (x) ^ (y) )
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
#define DIST_RES(x) (x)
|
||||
#endif
|
||||
|
||||
inline result_type reduce_block(
|
||||
__local value_type *s_query,
|
||||
__local value_type *s_train,
|
||||
int lidx,
|
||||
int lidy
|
||||
)
|
||||
{
|
||||
result_type result = 0;
|
||||
#pragma unroll
|
||||
for (int j = 0 ; j < BLOCK_SIZE ; j++)
|
||||
{
|
||||
result += DIST(
|
||||
s_query[lidy * BLOCK_SIZE + j],
|
||||
s_train[j * BLOCK_SIZE + lidx]);
|
||||
}
|
||||
return DIST_RES(result);
|
||||
}
|
||||
|
||||
inline result_type reduce_block_match(
|
||||
__local value_type *s_query,
|
||||
__local value_type *s_train,
|
||||
int lidx,
|
||||
int lidy
|
||||
)
|
||||
{
|
||||
result_type result = 0;
|
||||
#pragma unroll
|
||||
for (int j = 0 ; j < BLOCK_SIZE ; j++)
|
||||
{
|
||||
result += DIST(
|
||||
s_query[lidy * BLOCK_SIZE + j],
|
||||
s_train[j * BLOCK_SIZE + lidx]);
|
||||
}
|
||||
return (result);
|
||||
}
|
||||
|
||||
inline result_type reduce_multi_block(
|
||||
__local value_type *s_query,
|
||||
__local value_type *s_train,
|
||||
int block_index,
|
||||
int lidx,
|
||||
int lidy
|
||||
)
|
||||
{
|
||||
result_type result = 0;
|
||||
#pragma unroll
|
||||
for (int j = 0 ; j < BLOCK_SIZE ; j++)
|
||||
{
|
||||
result += DIST(
|
||||
s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j],
|
||||
s_train[j * BLOCK_SIZE + lidx]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* 2dim launch, global size: dim0 is (query rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, dim1 is BLOCK_SIZE
|
||||
local size: dim0 is BLOCK_SIZE, dim1 is BLOCK_SIZE.
|
||||
*/
|
||||
__kernel void BruteForceMatch_UnrollMatch(
|
||||
__global T *query,
|
||||
__global T *train,
|
||||
//__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
|
||||
__local value_type *s_query = (__local value_type *)sharebuffer;
|
||||
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
|
||||
|
||||
int queryIdx = groupidx * BLOCK_SIZE + lidy;
|
||||
// load the query into local memory.
|
||||
#pragma unroll
|
||||
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
|
||||
{
|
||||
int loadx = lidx + i * BLOCK_SIZE;
|
||||
s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
}
|
||||
|
||||
float myBestDistance = MAX_FLOAT;
|
||||
int myBestTrainIdx = -1;
|
||||
|
||||
// loopUnrolledCached to find the best trainIdx and best distance.
|
||||
for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; t++)
|
||||
{
|
||||
result_type result = 0;
|
||||
#pragma unroll
|
||||
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
|
||||
{
|
||||
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
|
||||
const int loadx = lidx + i * BLOCK_SIZE;
|
||||
s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
|
||||
//synchronize to make sure each elem for reduceIteration in share memory is written already.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
result = DIST_RES(result);
|
||||
|
||||
int trainIdx = t * BLOCK_SIZE + lidx;
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
myBestDistance = result;
|
||||
myBestTrainIdx = trainIdx;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
__local float *s_distance = (__local float*)(sharebuffer);
|
||||
__local int* s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
//find BestMatch
|
||||
s_distance += lidy * BLOCK_SIZE;
|
||||
s_trainIdx += lidy * BLOCK_SIZE;
|
||||
s_distance[lidx] = myBestDistance;
|
||||
s_trainIdx[lidx] = myBestTrainIdx;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//reduce -- now all reduce implement in each threads.
|
||||
#pragma unroll
|
||||
for (int k = 0 ; k < BLOCK_SIZE; k++)
|
||||
{
|
||||
if (myBestDistance > s_distance[k])
|
||||
{
|
||||
myBestDistance = s_distance[k];
|
||||
myBestTrainIdx = s_trainIdx[k];
|
||||
}
|
||||
}
|
||||
|
||||
if (queryIdx < query_rows && lidx == 0)
|
||||
{
|
||||
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
||||
bestDistance[queryIdx] = myBestDistance;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BruteForceMatch_Match(
|
||||
__global T *query,
|
||||
__global T *train,
|
||||
//__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
|
||||
const int queryIdx = groupidx * BLOCK_SIZE + lidy;
|
||||
|
||||
float myBestDistance = MAX_FLOAT;
|
||||
int myBestTrainIdx = -1;
|
||||
|
||||
__local value_type *s_query = (__local value_type *)sharebuffer;
|
||||
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
|
||||
|
||||
// loop
|
||||
for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
|
||||
{
|
||||
result_type result = 0;
|
||||
for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; i++)
|
||||
{
|
||||
const int loadx = lidx + i * BLOCK_SIZE;
|
||||
//load query and train into local memory
|
||||
s_query[lidy * BLOCK_SIZE + lidx] = 0;
|
||||
s_train[lidx * BLOCK_SIZE + lidy] = 0;
|
||||
|
||||
if (loadx < query_cols)
|
||||
{
|
||||
s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
|
||||
s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
result += reduce_block_match(s_query, s_train, lidx, lidy);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
result = DIST_RES(result);
|
||||
|
||||
const int trainIdx = t * BLOCK_SIZE + lidx;
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
myBestDistance = result;
|
||||
myBestTrainIdx = trainIdx;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
__local float *s_distance = (__local float *)sharebuffer;
|
||||
__local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
//findBestMatch
|
||||
s_distance += lidy * BLOCK_SIZE;
|
||||
s_trainIdx += lidy * BLOCK_SIZE;
|
||||
s_distance[lidx] = myBestDistance;
|
||||
s_trainIdx[lidx] = myBestTrainIdx;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//reduce -- now all reduce implement in each threads.
|
||||
for (int k = 0 ; k < BLOCK_SIZE; k++)
|
||||
{
|
||||
if (myBestDistance > s_distance[k])
|
||||
{
|
||||
myBestDistance = s_distance[k];
|
||||
myBestTrainIdx = s_trainIdx[k];
|
||||
}
|
||||
}
|
||||
|
||||
if (queryIdx < query_rows && lidx == 0)
|
||||
{
|
||||
bestTrainIdx[queryIdx] = myBestTrainIdx;
|
||||
bestDistance[queryIdx] = myBestDistance;
|
||||
}
|
||||
}
|
||||
|
||||
//radius_unrollmatch
|
||||
__kernel void BruteForceMatch_RadiusUnrollMatch(
|
||||
__global T *query,
|
||||
__global T *train,
|
||||
float maxDistance,
|
||||
//__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__global int *nMatches,
|
||||
__local float *sharebuffer,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int bestTrainIdx_cols,
|
||||
int step,
|
||||
int ostep
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
const int groupidy = get_group_id(1);
|
||||
|
||||
const int queryIdx = groupidy * BLOCK_SIZE + lidy;
|
||||
const int trainIdx = groupidx * BLOCK_SIZE + lidx;
|
||||
|
||||
__local value_type *s_query = (__local value_type *)sharebuffer;
|
||||
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
|
||||
|
||||
result_type result = 0;
|
||||
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; ++i)
|
||||
{
|
||||
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
|
||||
const int loadx = lidx + i * BLOCK_SIZE;
|
||||
|
||||
s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
|
||||
//synchronize to make sure each elem for reduceIteration in share memory is written already.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
result += reduce_block(s_query, s_train, lidx, lidy);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows &&
|
||||
convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
|
||||
|
||||
if(ind < bestTrainIdx_cols)
|
||||
{
|
||||
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
|
||||
bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//radius_match
|
||||
__kernel void BruteForceMatch_RadiusMatch(
|
||||
__global T *query,
|
||||
__global T *train,
|
||||
float maxDistance,
|
||||
//__global float *mask,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
__global int *nMatches,
|
||||
__local float *sharebuffer,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int bestTrainIdx_cols,
|
||||
int step,
|
||||
int ostep
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
const int groupidy = get_group_id(1);
|
||||
|
||||
const int queryIdx = groupidy * BLOCK_SIZE + lidy;
|
||||
const int trainIdx = groupidx * BLOCK_SIZE + lidx;
|
||||
|
||||
__local value_type *s_query = (__local value_type *)sharebuffer;
|
||||
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
|
||||
|
||||
result_type result = 0;
|
||||
for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i)
|
||||
{
|
||||
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
|
||||
const int loadx = lidx + i * BLOCK_SIZE;
|
||||
|
||||
s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
|
||||
//synchronize to make sure each elem for reduceIteration in share memory is written already.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
result += reduce_block(s_query, s_train, lidx, lidy);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows &&
|
||||
convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
int ind = atom_inc(nMatches + queryIdx);
|
||||
|
||||
if(ind < bestTrainIdx_cols)
|
||||
{
|
||||
bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
|
||||
bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void BruteForceMatch_knnUnrollMatch(
|
||||
__global T *query,
|
||||
__global T *train,
|
||||
//__global float *mask,
|
||||
__global int2 *bestTrainIdx,
|
||||
__global float2 *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
|
||||
const int queryIdx = groupidx * BLOCK_SIZE + lidy;
|
||||
__local value_type *s_query = (__local value_type *)sharebuffer;
|
||||
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
|
||||
|
||||
// load the query into local memory.
|
||||
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
|
||||
{
|
||||
int loadx = lidx + i * BLOCK_SIZE;
|
||||
s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
}
|
||||
|
||||
float myBestDistance1 = MAX_FLOAT;
|
||||
float myBestDistance2 = MAX_FLOAT;
|
||||
int myBestTrainIdx1 = -1;
|
||||
int myBestTrainIdx2 = -1;
|
||||
|
||||
//loopUnrolledCached
|
||||
for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
|
||||
{
|
||||
result_type result = 0;
|
||||
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
|
||||
{
|
||||
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
|
||||
const int loadx = lidx + i * BLOCK_SIZE;
|
||||
s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
|
||||
|
||||
//synchronize to make sure each elem for reduceIteration in share memory is written already.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
result = DIST_RES(result);
|
||||
|
||||
const int trainIdx = t * BLOCK_SIZE + lidx;
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows)
|
||||
{
|
||||
if (result < myBestDistance1)
|
||||
{
|
||||
myBestDistance2 = myBestDistance1;
|
||||
myBestTrainIdx2 = myBestTrainIdx1;
|
||||
myBestDistance1 = result;
|
||||
myBestTrainIdx1 = trainIdx;
|
||||
}
|
||||
else if (result < myBestDistance2)
|
||||
{
|
||||
myBestDistance2 = result;
|
||||
myBestTrainIdx2 = trainIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
__local float *s_distance = (local float *)sharebuffer;
|
||||
__local int *s_trainIdx = (local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
// find BestMatch
|
||||
s_distance += lidy * BLOCK_SIZE;
|
||||
s_trainIdx += lidy * BLOCK_SIZE;
|
||||
|
||||
s_distance[lidx] = myBestDistance1;
|
||||
s_trainIdx[lidx] = myBestTrainIdx1;
|
||||
|
||||
float bestDistance1 = MAX_FLOAT;
|
||||
float bestDistance2 = MAX_FLOAT;
|
||||
int bestTrainIdx1 = -1;
|
||||
int bestTrainIdx2 = -1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lidx == 0)
|
||||
{
|
||||
for (int i = 0 ; i < BLOCK_SIZE ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
if (val < bestDistance1)
|
||||
{
|
||||
bestDistance2 = bestDistance1;
|
||||
bestTrainIdx2 = bestTrainIdx1;
|
||||
|
||||
bestDistance1 = val;
|
||||
bestTrainIdx1 = s_trainIdx[i];
|
||||
}
|
||||
else if (val < bestDistance2)
|
||||
{
|
||||
bestDistance2 = val;
|
||||
bestTrainIdx2 = s_trainIdx[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
s_distance[lidx] = myBestDistance2;
|
||||
s_trainIdx[lidx] = myBestTrainIdx2;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lidx == 0)
|
||||
{
|
||||
for (int i = 0 ; i < BLOCK_SIZE ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
|
||||
if (val < bestDistance2)
|
||||
{
|
||||
bestDistance2 = val;
|
||||
bestTrainIdx2 = s_trainIdx[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
myBestDistance1 = bestDistance1;
|
||||
myBestDistance2 = bestDistance2;
|
||||
|
||||
myBestTrainIdx1 = bestTrainIdx1;
|
||||
myBestTrainIdx2 = bestTrainIdx2;
|
||||
|
||||
if (queryIdx < query_rows && lidx == 0)
|
||||
{
|
||||
bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
|
||||
bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void BruteForceMatch_knnMatch(
|
||||
__global T *query,
|
||||
__global T *train,
|
||||
//__global float *mask,
|
||||
__global int2 *bestTrainIdx,
|
||||
__global float2 *bestDistance,
|
||||
__local float *sharebuffer,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step
|
||||
)
|
||||
{
|
||||
const int lidx = get_local_id(0);
|
||||
const int lidy = get_local_id(1);
|
||||
const int groupidx = get_group_id(0);
|
||||
|
||||
const int queryIdx = groupidx * BLOCK_SIZE + lidy;
|
||||
__local value_type *s_query = (__local value_type *)sharebuffer;
|
||||
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
|
||||
|
||||
float myBestDistance1 = MAX_FLOAT;
|
||||
float myBestDistance2 = MAX_FLOAT;
|
||||
int myBestTrainIdx1 = -1;
|
||||
int myBestTrainIdx2 = -1;
|
||||
|
||||
//loop
|
||||
for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
|
||||
{
|
||||
result_type result = 0.0f;
|
||||
for (int i = 0 ; i < (query_cols + BLOCK_SIZE -1) / BLOCK_SIZE ; i++)
|
||||
{
|
||||
const int loadx = lidx + i * BLOCK_SIZE;
|
||||
//load query and train into local memory
|
||||
s_query[lidy * BLOCK_SIZE + lidx] = 0;
|
||||
s_train[lidx * BLOCK_SIZE + lidy] = 0;
|
||||
|
||||
if (loadx < query_cols)
|
||||
{
|
||||
s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
|
||||
s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
result += reduce_block_match(s_query, s_train, lidx, lidy);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
result = DIST_RES(result);
|
||||
|
||||
const int trainIdx = t * BLOCK_SIZE + lidx;
|
||||
|
||||
if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)
|
||||
{
|
||||
if (result < myBestDistance1)
|
||||
{
|
||||
myBestDistance2 = myBestDistance1;
|
||||
myBestTrainIdx2 = myBestTrainIdx1;
|
||||
myBestDistance1 = result;
|
||||
myBestTrainIdx1 = trainIdx;
|
||||
}
|
||||
else if (result < myBestDistance2)
|
||||
{
|
||||
myBestDistance2 = result;
|
||||
myBestTrainIdx2 = trainIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
__local float *s_distance = (__local float *)sharebuffer;
|
||||
__local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
//findBestMatch
|
||||
s_distance += lidy * BLOCK_SIZE;
|
||||
s_trainIdx += lidy * BLOCK_SIZE;
|
||||
|
||||
s_distance[lidx] = myBestDistance1;
|
||||
s_trainIdx[lidx] = myBestTrainIdx1;
|
||||
|
||||
float bestDistance1 = MAX_FLOAT;
|
||||
float bestDistance2 = MAX_FLOAT;
|
||||
int bestTrainIdx1 = -1;
|
||||
int bestTrainIdx2 = -1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lidx == 0)
|
||||
{
|
||||
for (int i = 0 ; i < BLOCK_SIZE ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
if (val < bestDistance1)
|
||||
{
|
||||
bestDistance2 = bestDistance1;
|
||||
bestTrainIdx2 = bestTrainIdx1;
|
||||
|
||||
bestDistance1 = val;
|
||||
bestTrainIdx1 = s_trainIdx[i];
|
||||
}
|
||||
else if (val < bestDistance2)
|
||||
{
|
||||
bestDistance2 = val;
|
||||
bestTrainIdx2 = s_trainIdx[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
s_distance[lidx] = myBestDistance2;
|
||||
s_trainIdx[lidx] = myBestTrainIdx2;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (lidx == 0)
|
||||
{
|
||||
for (int i = 0 ; i < BLOCK_SIZE ; i++)
|
||||
{
|
||||
float val = s_distance[i];
|
||||
|
||||
if (val < bestDistance2)
|
||||
{
|
||||
bestDistance2 = val;
|
||||
bestTrainIdx2 = s_trainIdx[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
myBestDistance1 = bestDistance1;
|
||||
myBestDistance2 = bestDistance2;
|
||||
|
||||
myBestTrainIdx1 = bestTrainIdx1;
|
||||
myBestTrainIdx2 = bestTrainIdx2;
|
||||
|
||||
if (queryIdx < query_rows && lidx == 0)
|
||||
{
|
||||
bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
|
||||
bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
|
||||
}
|
||||
}
|
||||
|
||||
kernel void BruteForceMatch_calcDistanceUnrolled(
|
||||
__global T *query,
|
||||
__global T *train,
|
||||
//__global float *mask,
|
||||
__global float *allDist,
|
||||
__local float *sharebuffer,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step)
|
||||
{
|
||||
/* Todo */
|
||||
}
|
||||
|
||||
kernel void BruteForceMatch_calcDistance(
|
||||
__global T *query,
|
||||
__global T *train,
|
||||
//__global float *mask,
|
||||
__global float *allDist,
|
||||
__local float *sharebuffer,
|
||||
int query_rows,
|
||||
int query_cols,
|
||||
int train_rows,
|
||||
int train_cols,
|
||||
int step)
|
||||
{
|
||||
/* Todo */
|
||||
}
|
||||
|
||||
kernel void BruteForceMatch_findBestMatch(
|
||||
__global float *allDist,
|
||||
__global int *bestTrainIdx,
|
||||
__global float *bestDistance,
|
||||
int k
|
||||
)
|
||||
{
|
||||
/* Todo */
|
||||
}
|
@ -48,6 +48,7 @@
|
||||
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include "opencv2/core/private.hpp"
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
213
modules/features2d/test/ocl/test_brute_force_matcher.cpp
Normal file
213
modules/features2d/test/ocl/test_brute_force_matcher.cpp
Normal file
@ -0,0 +1,213 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Zero Lin, Zero.Lin@amd.com
|
||||
// Zhang Ying, zhangying913@gmail.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "cvconfig.h"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
PARAM_TEST_CASE(BruteForceMatcher, int, int)
|
||||
{
|
||||
int distType;
|
||||
int dim;
|
||||
|
||||
int queryDescCount;
|
||||
int countFactor;
|
||||
|
||||
Mat query, train;
|
||||
UMat uquery, utrain;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
distType = GET_PARAM(0);
|
||||
dim = GET_PARAM(1);
|
||||
|
||||
queryDescCount = 300; // must be even number because we split train data in some cases in two
|
||||
countFactor = 4; // do not change it
|
||||
|
||||
cv::Mat queryBuf, trainBuf;
|
||||
|
||||
// Generate query descriptors randomly.
|
||||
// Descriptor vector elements are integer values.
|
||||
queryBuf.create(queryDescCount, dim, CV_32SC1);
|
||||
rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
|
||||
queryBuf.convertTo(queryBuf, CV_32FC1);
|
||||
|
||||
// Generate train decriptors as follows:
|
||||
// copy each query descriptor to train set countFactor times
|
||||
// and perturb some one element of the copied descriptors in
|
||||
// in ascending order. General boundaries of the perturbation
|
||||
// are (0.f, 1.f).
|
||||
trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
|
||||
float step = 1.f / countFactor;
|
||||
for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
|
||||
{
|
||||
cv::Mat queryDescriptor = queryBuf.row(qIdx);
|
||||
for (int c = 0; c < countFactor; c++)
|
||||
{
|
||||
int tIdx = qIdx * countFactor + c;
|
||||
cv::Mat trainDescriptor = trainBuf.row(tIdx);
|
||||
queryDescriptor.copyTo(trainDescriptor);
|
||||
int elem = rng(dim);
|
||||
float diff = rng.uniform(step * c, step * (c + 1));
|
||||
trainDescriptor.at<float>(0, elem) += diff;
|
||||
}
|
||||
}
|
||||
|
||||
queryBuf.convertTo(query, CV_32F);
|
||||
trainBuf.convertTo(train, CV_32F);
|
||||
query.copyTo(uquery);
|
||||
train.copyTo(utrain);
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef ANDROID
|
||||
OCL_TEST_P(BruteForceMatcher, DISABLED_Match_Single)
|
||||
#else
|
||||
OCL_TEST_P(BruteForceMatcher, Match_Single)
|
||||
#endif
|
||||
{
|
||||
BFMatcher matcher(distType);
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
matcher.match(uquery, utrain, matches);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
cv::DMatch match = matches[i];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
|
||||
badCount++;
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
#ifdef ANDROID
|
||||
OCL_TEST_P(BruteForceMatcher, DISABLED_KnnMatch_2_Single)
|
||||
#else
|
||||
OCL_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||
#endif
|
||||
{
|
||||
const int knn = 2;
|
||||
|
||||
BFMatcher matcher(distType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.knnMatch(uquery, utrain, matches, knn);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
if ((int)matches[i].size() != knn)
|
||||
badCount++;
|
||||
else
|
||||
{
|
||||
int localBadCount = 0;
|
||||
for (int k = 0; k < knn; k++)
|
||||
{
|
||||
cv::DMatch match = matches[i][k];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
|
||||
localBadCount++;
|
||||
}
|
||||
badCount += localBadCount > 0 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
#ifdef ANDROID
|
||||
OCL_TEST_P(BruteForceMatcher, DISABLED_RadiusMatch_Single)
|
||||
#else
|
||||
OCL_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
#endif
|
||||
{
|
||||
float radius = 1.f / countFactor;
|
||||
|
||||
BFMatcher matcher(distType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(uquery, utrain, matches, radius);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
if ((int)matches[i].size() != 1)
|
||||
{
|
||||
badCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::DMatch match = matches[i][0];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
|
||||
badCount++;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Matcher, BruteForceMatcher, Combine( Values((int)NORM_L1, (int)NORM_L2),
|
||||
Values(57, 64, 83, 128, 179, 256, 304) ) );
|
||||
|
||||
}//ocl
|
||||
}//cvtest
|
||||
|
||||
#endif //HAVE_OPENCL
|
Loading…
Reference in New Issue
Block a user