From eccfc90b7724da1e1daf28e963721e08e5636ea9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 28 Dec 2011 07:56:19 +0000 Subject: [PATCH] bug fix --- modules/gpu/src/cuda/fast.cu | 8 ++++++++ modules/gpu/src/fast.cpp | 3 +++ modules/gpu/src/orb.cpp | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/gpu/src/cuda/fast.cu b/modules/gpu/src/cuda/fast.cu index 23d36d52dc..1383db150f 100644 --- a/modules/gpu/src/cuda/fast.cu +++ b/modules/gpu/src/cuda/fast.cu @@ -223,6 +223,8 @@ namespace cv { namespace gpu { namespace device template __global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold) { + #if __CUDA_ARCH__ >= 110 + const int j = threadIdx.x + blockIdx.x * blockDim.x + 3; const int i = threadIdx.y + blockIdx.y * blockDim.y + 3; @@ -276,6 +278,8 @@ namespace cv { namespace gpu { namespace device kpLoc[ind] = make_short2(j, i); } } + + #endif } int calcKeypoints_gpu(DevMem2Db img, DevMem2Db mask, short2* kpLoc, int maxKeypoints, DevMem2Di score, int threshold) @@ -321,6 +325,8 @@ namespace cv { namespace gpu { namespace device __global__ void nonmaxSupression(const short2* kpLoc, int count, const DevMem2Di scoreMat, short2* locFinal, float* responseFinal) { + #if __CUDA_ARCH__ >= 110 + const int kpIdx = threadIdx.x + blockIdx.x * blockDim.x; if (kpIdx < count) @@ -349,6 +355,8 @@ namespace cv { namespace gpu { namespace device responseFinal[ind] = static_cast(score); } } + + #endif } int nonmaxSupression_gpu(const short2* kpLoc, int count, DevMem2Di score, short2* loc, float* response) diff --git a/modules/gpu/src/fast.cpp b/modules/gpu/src/fast.cpp index 7e7a0372c8..5a7c73b214 100644 --- a/modules/gpu/src/fast.cpp +++ b/modules/gpu/src/fast.cpp @@ -124,6 +124,7 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma CV_Assert(img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size())); + CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)); int maxKeypoints = static_cast(keypointsRatio * img.size().area()); @@ -145,6 +146,8 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints) { using namespace cv::gpu::device::fast; + CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)); + if (count_ == 0) return 0; diff --git a/modules/gpu/src/orb.cpp b/modules/gpu/src/orb.cpp index d038efabc5..2854404b4e 100644 --- a/modules/gpu/src/orb.cpp +++ b/modules/gpu/src/orb.cpp @@ -666,7 +666,7 @@ void cv::gpu::ORB_GPU::mergeKeyPoints(GpuMat& keypoints) mergeLocation_gpu(keyPointsPyr_[level].ptr(0), keyPointsRange.ptr(0), keyPointsRange.ptr(1), keyPointsCount_[level], locScale, 0); GpuMat range = keyPointsRange.rowRange(2, 4); - keyPointsPyr_[level].rowRange(1, 3).copyTo(range); + keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range); keyPointsRange.row(4).setTo(Scalar::all(level)); keyPointsRange.row(5).setTo(Scalar::all(params_.patch_size_ * sf));