optimized memory usage in BruteForceMatcher_GPU_base::knnMatch when k==2

This commit is contained in:
Vladislav Vinogradov 2011-08-30 08:49:11 +00:00
parent 837f65784e
commit 0f5f57561e
2 changed files with 23 additions and 13 deletions

View File

@ -446,10 +446,17 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& queryDescs, con
const int nQuery = queryDescs.rows;
const int nTrain = trainDescs.rows;
ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
ensureSizeIsEnough(nQuery, k, CV_32F, distance);
if (k != 2)
if (k == 2)
{
ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
}
else
{
ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
ensureSizeIsEnough(nQuery, k, CV_32F, distance);
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
}
if (stream)
{
@ -491,14 +498,19 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c
if (trainIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1);
CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1);
CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1);
CV_Assert(distance.size() == trainIdx.size());
CV_Assert(trainIdx.isContinuous() && distance.isContinuous());
const int nQuery = distance.rows;
const int k = trainIdx.cols;
const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;
const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols;
matches.clear();
matches.reserve(nQuery);
const int* trainIdx_ptr = trainIdx.ptr<int>();
const float* distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
@ -506,8 +518,6 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c
vector<DMatch>& curMatches = matches.back();
curMatches.reserve(k);
const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
const float* distance_ptr = distance.ptr<float>(queryIdx);
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;

View File

@ -452,7 +452,7 @@ namespace cv { namespace gpu { namespace bfmatcher
}
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename VecDiff, typename Dist, typename T, typename Mask>
__global__ void knnMatch2(const PtrStep_<T> query, const DevMem2D_<T> train, const Mask m, PtrStep_<int2> trainIdx, PtrStep_<float2> distance)
__global__ void knnMatch2(const PtrStep_<T> query, const DevMem2D_<T> train, const Mask m, int2* trainIdx, float2* distance)
{
typedef typename Dist::result_type result_type;
typedef typename Dist::value_type value_type;
@ -520,8 +520,8 @@ namespace cv { namespace gpu { namespace bfmatcher
}
}
trainIdx.ptr(queryIdx)[0] = make_int2(bestTrainIdx1, bestTrainIdx2);
distance.ptr(queryIdx)[0] = make_float2(distMin1, distMin2);
trainIdx[queryIdx] = make_int2(bestTrainIdx1, bestTrainIdx2);
distance[queryIdx] = make_float2(distMin1, distMin2);
}
}
@ -556,7 +556,7 @@ namespace cv { namespace gpu { namespace bfmatcher
const dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
knnMatch2<BLOCK_DIM_X, BLOCK_DIM_Y, VecDiffCachedRegister<BLOCK_DIM_X, MAX_LEN, LEN_EQ_MAX_LEN, typename Dist::value_type>, Dist, T>
<<<grid, threads, 0, stream>>>(query, train, mask, trainIdx, distance);
<<<grid, threads, 0, stream>>>(query, train, mask, trainIdx.data, distance.data);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)