From e7cf541f5faf6df3a7e79374f13f79f95017823b Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Tue, 14 Dec 2010 09:53:17 +0000 Subject: [PATCH] fixed bug in matchTemplate when template size is (1,1), refactored --- modules/gpu/src/cuda/match_template.cu | 142 ++++++++++++------------- tests/gpu/src/gputest_main.cpp | 1 - 2 files changed, 71 insertions(+), 72 deletions(-) diff --git a/modules/gpu/src/cuda/match_template.cu b/modules/gpu/src/cuda/match_template.cu index 913d3399f8..2a6d85ad62 100644 --- a/modules/gpu/src/cuda/match_template.cu +++ b/modules/gpu/src/cuda/match_template.cu @@ -47,50 +47,6 @@ using namespace cv::gpu; namespace cv { namespace gpu { namespace imgproc { -texture imageTex_8U_CCORR; -texture templTex_8U_CCORR; - - -__global__ void matchTemplateNaiveKernel_8U_CCORR(int w, int h, - DevMem2Df result) -{ - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float sum = 0.f; - - for (int i = 0; i < h; ++i) - for (int j = 0; j < w; ++j) - sum += (float)tex2D(imageTex_8U_CCORR, x + j, y + i) * - (float)tex2D(templTex_8U_CCORR, j, i); - - result.ptr(y)[x] = sum; - } -} - - -void matchTemplateNaive_8U_CCORR(const DevMem2D image, const DevMem2D templ, - DevMem2Df result) -{ - dim3 threads(32, 8); - dim3 grid(divUp(image.cols - templ.cols + 1, threads.x), - divUp(image.rows - templ.rows + 1, threads.y)); - - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - cudaBindTexture2D(0, imageTex_8U_CCORR, image.data, desc, image.cols, image.rows, image.step); - cudaBindTexture2D(0, templTex_8U_CCORR, templ.data, desc, templ.cols, templ.rows, templ.step); - imageTex_8U_CCORR.filterMode = cudaFilterModePoint; - templTex_8U_CCORR.filterMode = cudaFilterModePoint; - - matchTemplateNaiveKernel_8U_CCORR<<>>(templ.cols, templ.rows, result); - cudaSafeCall(cudaThreadSynchronize()); - cudaSafeCall(cudaUnbindTexture(imageTex_8U_CCORR)); - cudaSafeCall(cudaUnbindTexture(templTex_8U_CCORR)); -} - - texture imageTex_32F_CCORR; texture templTex_32F_CCORR; @@ -135,6 +91,56 @@ void matchTemplateNaive_32F_CCORR(const DevMem2D image, const DevMem2D templ, } +texture imageTex_32F_SQDIFF; +texture templTex_32F_SQDIFF; + + +__global__ void matchTemplateNaiveKernel_32F_SQDIFF(int w, int h, + DevMem2Df result) +{ + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < result.cols && y < result.rows) + { + float sum = 0.f; + float delta; + + for (int i = 0; i < h; ++i) + { + for (int j = 0; j < w; ++j) + { + delta = tex2D(imageTex_32F_SQDIFF, x + j, y + i) - + tex2D(templTex_32F_SQDIFF, j, i); + sum += delta * delta; + } + } + + result.ptr(y)[x] = sum; + } +} + + +void matchTemplateNaive_32F_SQDIFF(const DevMem2D image, const DevMem2D templ, + DevMem2Df result) +{ + dim3 threads(32, 8); + dim3 grid(divUp(image.cols - templ.cols + 1, threads.x), + divUp(image.rows - templ.rows + 1, threads.y)); + + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaBindTexture2D(0, imageTex_32F_SQDIFF, image.data, desc, image.cols, image.rows, image.step); + cudaBindTexture2D(0, templTex_32F_SQDIFF, templ.data, desc, templ.cols, templ.rows, templ.step); + imageTex_32F_SQDIFF.filterMode = cudaFilterModePoint; + templTex_32F_SQDIFF.filterMode = cudaFilterModePoint; + + matchTemplateNaiveKernel_32F_SQDIFF<<>>(templ.cols, templ.rows, result); + cudaSafeCall(cudaThreadSynchronize()); + cudaSafeCall(cudaUnbindTexture(imageTex_32F_SQDIFF)); + cudaSafeCall(cudaUnbindTexture(templTex_32F_SQDIFF)); +} + + texture imageTex_8U_SQDIFF; texture templTex_8U_SQDIFF; @@ -185,12 +191,12 @@ void matchTemplateNaive_8U_SQDIFF(const DevMem2D image, const DevMem2D templ, } -texture imageTex_32F_SQDIFF; -texture templTex_32F_SQDIFF; +texture imageTex_8U_CCORR; +texture templTex_8U_CCORR; -__global__ void matchTemplateNaiveKernel_32F_SQDIFF(int w, int h, - DevMem2Df result) +__global__ void matchTemplateNaiveKernel_8U_CCORR(int w, int h, + DevMem2Df result) { int x = blockDim.x * blockIdx.x + threadIdx.x; int y = blockDim.y * blockIdx.y + threadIdx.y; @@ -198,40 +204,34 @@ __global__ void matchTemplateNaiveKernel_32F_SQDIFF(int w, int h, if (x < result.cols && y < result.rows) { float sum = 0.f; - float delta; for (int i = 0; i < h; ++i) - { for (int j = 0; j < w; ++j) - { - delta = tex2D(imageTex_32F_SQDIFF, x + j, y + i) - - tex2D(templTex_32F_SQDIFF, j, i); - sum += delta * delta; - } - } + sum += (float)tex2D(imageTex_8U_CCORR, x + j, y + i) * + (float)tex2D(templTex_8U_CCORR, j, i); result.ptr(y)[x] = sum; } } -void matchTemplateNaive_32F_SQDIFF(const DevMem2D image, const DevMem2D templ, - DevMem2Df result) +void matchTemplateNaive_8U_CCORR(const DevMem2D image, const DevMem2D templ, + DevMem2Df result) { dim3 threads(32, 8); dim3 grid(divUp(image.cols - templ.cols + 1, threads.x), divUp(image.rows - templ.rows + 1, threads.y)); - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - cudaBindTexture2D(0, imageTex_32F_SQDIFF, image.data, desc, image.cols, image.rows, image.step); - cudaBindTexture2D(0, templTex_32F_SQDIFF, templ.data, desc, templ.cols, templ.rows, templ.step); - imageTex_8U_SQDIFF.filterMode = cudaFilterModePoint; - templTex_8U_SQDIFF.filterMode = cudaFilterModePoint; + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaBindTexture2D(0, imageTex_8U_CCORR, image.data, desc, image.cols, image.rows, image.step); + cudaBindTexture2D(0, templTex_8U_CCORR, templ.data, desc, templ.cols, templ.rows, templ.step); + imageTex_8U_CCORR.filterMode = cudaFilterModePoint; + templTex_8U_CCORR.filterMode = cudaFilterModePoint; - matchTemplateNaiveKernel_32F_SQDIFF<<>>(templ.cols, templ.rows, result); + matchTemplateNaiveKernel_8U_CCORR<<>>(templ.cols, templ.rows, result); cudaSafeCall(cudaThreadSynchronize()); - cudaSafeCall(cudaUnbindTexture(imageTex_32F_SQDIFF)); - cudaSafeCall(cudaUnbindTexture(templTex_32F_SQDIFF)); + cudaSafeCall(cudaUnbindTexture(imageTex_8U_CCORR)); + cudaSafeCall(cudaUnbindTexture(templTex_8U_CCORR)); } @@ -301,8 +301,8 @@ __global__ void matchTemplatePreparedKernel_8U_SQDIFF_NORMED( (image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) - (image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x])); float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = (image_sqsum_ - 2.f * ccorr + templ_sqsum) * - rsqrtf(image_sqsum_ * templ_sqsum); + result.ptr(y)[x] = min(1.f, (image_sqsum_ - 2.f * ccorr + templ_sqsum) * + rsqrtf(image_sqsum_ * templ_sqsum)); } } @@ -368,8 +368,8 @@ __global__ void matchTemplatePreparedKernel_8U_CCOEFF_NORMED( float image_sqsum_ = (float)( (image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) - (image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x])); - result.ptr(y)[x] = (ccorr - image_sum_ * templ_sum_scale) * - rsqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_)); + result.ptr(y)[x] = min(1.f, (ccorr - image_sum_ * templ_sum_scale) * + rsqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_))); } } @@ -405,7 +405,7 @@ __global__ void normalizeKernel_8U( float image_sqsum_ = (float)( (image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) - (image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x])); - result.ptr(y)[x] *= rsqrtf(image_sqsum_ * templ_sqsum); + result.ptr(y)[x] = min(1.f, result.ptr(y)[x] * rsqrtf(image_sqsum_ * templ_sqsum)); } } diff --git a/tests/gpu/src/gputest_main.cpp b/tests/gpu/src/gputest_main.cpp index a388fa7da9..cbeb0d0dcf 100644 --- a/tests/gpu/src/gputest_main.cpp +++ b/tests/gpu/src/gputest_main.cpp @@ -51,7 +51,6 @@ const char* blacklist[] = }; int main( int argc, char** argv ) - { return test_system.run( argc, argv, blacklist ); }