mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
optimized memory usage in BruteForceMatcher_GPU_base::knnMatch when k==2
This commit is contained in:
parent
837f65784e
commit
0f5f57561e
@ -446,10 +446,17 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
|
||||
const int nQuery = queryDescs.rows;
|
||||
const int nTrain = trainDescs.rows;
|
||||
|
||||
ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
|
||||
ensureSizeIsEnough(nQuery, k, CV_32F, distance);
|
||||
if (k != 2)
|
||||
if (k == 2)
|
||||
{
|
||||
ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
|
||||
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
|
||||
ensureSizeIsEnough(nQuery, k, CV_32F, distance);
|
||||
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
|
||||
}
|
||||
|
||||
if (stream)
|
||||
{
|
||||
@ -491,14 +498,19 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c
|
||||
if (trainIdx.empty() || distance.empty())
|
||||
return;
|
||||
|
||||
CV_Assert(trainIdx.type() == CV_32SC1);
|
||||
CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
|
||||
CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1);
|
||||
CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1);
|
||||
CV_Assert(distance.size() == trainIdx.size());
|
||||
CV_Assert(trainIdx.isContinuous() && distance.isContinuous());
|
||||
|
||||
const int nQuery = distance.rows;
|
||||
const int k = trainIdx.cols;
|
||||
const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;
|
||||
const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols;
|
||||
|
||||
matches.clear();
|
||||
matches.reserve(nQuery);
|
||||
|
||||
const int* trainIdx_ptr = trainIdx.ptr<int>();
|
||||
const float* distance_ptr = distance.ptr<float>();
|
||||
|
||||
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
|
||||
{
|
||||
@ -506,8 +518,6 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c
|
||||
vector<DMatch>& curMatches = matches.back();
|
||||
curMatches.reserve(k);
|
||||
|
||||
const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
|
||||
const float* distance_ptr = distance.ptr<float>(queryIdx);
|
||||
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
|
||||
{
|
||||
int trainIdx = *trainIdx_ptr;
|
||||
|
@ -452,7 +452,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
}
|
||||
|
||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename VecDiff, typename Dist, typename T, typename Mask>
|
||||
__global__ void knnMatch2(const PtrStep_<T> query, const DevMem2D_<T> train, const Mask m, PtrStep_<int2> trainIdx, PtrStep_<float2> distance)
|
||||
__global__ void knnMatch2(const PtrStep_<T> query, const DevMem2D_<T> train, const Mask m, int2* trainIdx, float2* distance)
|
||||
{
|
||||
typedef typename Dist::result_type result_type;
|
||||
typedef typename Dist::value_type value_type;
|
||||
@ -520,8 +520,8 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
}
|
||||
}
|
||||
|
||||
trainIdx.ptr(queryIdx)[0] = make_int2(bestTrainIdx1, bestTrainIdx2);
|
||||
distance.ptr(queryIdx)[0] = make_float2(distMin1, distMin2);
|
||||
trainIdx[queryIdx] = make_int2(bestTrainIdx1, bestTrainIdx2);
|
||||
distance[queryIdx] = make_float2(distMin1, distMin2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -556,7 +556,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
const dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
|
||||
|
||||
knnMatch2<BLOCK_DIM_X, BLOCK_DIM_Y, VecDiffCachedRegister<BLOCK_DIM_X, MAX_LEN, LEN_EQ_MAX_LEN, typename Dist::value_type>, Dist, T>
|
||||
<<<grid, threads, 0, stream>>>(query, train, mask, trainIdx, distance);
|
||||
<<<grid, threads, 0, stream>>>(query, train, mask, trainIdx.data, distance.data);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
|
Loading…
Reference in New Issue
Block a user