added gpu::Canny function

This commit is contained in:
Vladislav Vinogradov 2011-08-08 08:53:55 +00:00
parent 0a2c7803b6
commit 767ac9aa10
5 changed files with 754 additions and 3 deletions

View File

@ -976,7 +976,32 @@ namespace cv
//! performs linear blending of two images
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
GpuMat& result, Stream& stream = Stream::Null());
GpuMat& result, Stream& stream = Stream::Null());
struct CV_EXPORTS CannyBuf;
CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
struct CV_EXPORTS CannyBuf
{
CannyBuf() {}
explicit CannyBuf(const Size& image_size, int apperture_size = 3) {create(image_size, apperture_size);}
CannyBuf(const GpuMat& dx_, const GpuMat& dy_);
void create(const Size& image_size, int apperture_size = 3);
void release();
GpuMat dx, dy;
GpuMat dx_buf, dy_buf;
GpuMat edgeBuf;
GpuMat trackBuf1, trackBuf2;
Ptr<FilterEngine_GPU> filterDX, filterDY;
};
////////////////////////////// Matrix reductions //////////////////////////////

View File

@ -0,0 +1,489 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "internal_shared.hpp"
#include "opencv2/gpu/device/utility.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace canny
{
__global__ void calcSobelRowPass(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
__shared__ int smem[16][18];
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i < rows)
{
smem[threadIdx.y][threadIdx.x + 1] = src.ptr(i)[j];
if (threadIdx.x == 0)
{
smem[threadIdx.y][0] = src.ptr(i)[max(j - 1, 0)];
smem[threadIdx.y][17] = src.ptr(i)[min(j + 16, cols - 1)];
}
__syncthreads();
if (j < cols)
{
dx_buf.ptr(i)[j] = -smem[threadIdx.y][threadIdx.x] + smem[threadIdx.y][threadIdx.x + 2];
dy_buf.ptr(i)[j] = smem[threadIdx.y][threadIdx.x] + 2 * smem[threadIdx.y][threadIdx.x + 1] + smem[threadIdx.y][threadIdx.x + 2];
}
}
}
void calcSobelRowPass_gpu(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
calcSobelRowPass<<<grid, block>>>(src, dx_buf, dy_buf, rows, cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
struct L1
{
static __device__ __forceinline__ float calc(int x, int y)
{
return abs(x) + abs(y);
}
};
struct L2
{
static __device__ __forceinline__ float calc(int x, int y)
{
return sqrtf(x * x + y * y);
}
};
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{
__shared__ int sdx[18][16];
__shared__ int sdy[18][16];
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (j < cols)
{
sdx[threadIdx.y + 1][threadIdx.x] = dx_buf.ptr(i)[j];
sdy[threadIdx.y + 1][threadIdx.x] = dy_buf.ptr(i)[j];
if (threadIdx.y == 0)
{
sdx[0][threadIdx.x] = dx_buf.ptr(max(i - 1, 0))[j];
sdx[17][threadIdx.x] = dx_buf.ptr(min(i + 16, rows - 1))[j];
sdy[0][threadIdx.x] = dy_buf.ptr(max(i - 1, 0))[j];
sdy[17][threadIdx.x] = dy_buf.ptr(min(i + 16, rows - 1))[j];
}
__syncthreads();
if (i < rows)
{
int x = sdx[threadIdx.y][threadIdx.x] + 2 * sdx[threadIdx.y + 1][threadIdx.x] + sdx[threadIdx.y + 2][threadIdx.x];
int y = -sdy[threadIdx.y][threadIdx.x] + sdy[threadIdx.y + 2][threadIdx.x];
dx.ptr(i)[j] = x;
dy.ptr(i)[j] = y;
mag.ptr(i + 1)[j + 1] = Norm::calc(x, y);
}
}
}
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
if (L2Grad)
calcMagnitude<L2><<<grid, block>>>(dx_buf, dy_buf, dx, dy, mag, rows, cols);
else
calcMagnitude<L1><<<grid, block>>>(dx_buf, dy_buf, dx, dy, mag, rows, cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i < rows && j < cols)
mag.ptr(i + 1)[j + 1] = Norm::calc(dx.ptr(i)[j], dy.ptr(i)[j]);
}
void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
if (L2Grad)
calcMagnitude<L2><<<grid, block>>>(dx, dy, mag, rows, cols);
else
calcMagnitude<L1><<<grid, block>>>(dx, dy, mag, rows, cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
//////////////////////////////////////////////////////////////////////////////////////////
#define CANNY_SHIFT 15
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
__global__ void calcMap(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{
__shared__ float smem[18][18];
const int j = blockIdx.x * 16 + threadIdx.x;
const int i = blockIdx.y * 16 + threadIdx.y;
const int tid = threadIdx.y * 16 + threadIdx.x;
const int lx = tid % 18;
const int ly = tid / 18;
if (ly < 14)
smem[ly][lx] = mag.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx];
if (ly < 4 && blockIdx.y * 16 + ly + 14 <= rows && blockIdx.x * 16 + lx <= cols)
smem[ly + 14][lx] = mag.ptr(blockIdx.y * 16 + ly + 14)[blockIdx.x * 16 + lx];
__syncthreads();
if (i < rows && j < cols)
{
int x = dx.ptr(i)[j];
int y = dy.ptr(i)[j];
const int s = (x ^ y) < 0 ? -1 : 1;
const float m = smem[threadIdx.y + 1][threadIdx.x + 1];
x = abs(x);
y = abs(y);
// 0 - the pixel can not belong to an edge
// 1 - the pixel might belong to an edge
// 2 - the pixel does belong to an edge
int edge_type = 0;
if (m > low_thresh)
{
const int tg22x = x * TG22;
const int tg67x = tg22x + ((x + x) << CANNY_SHIFT);
y <<= CANNY_SHIFT;
if (y < tg22x)
{
if (m > smem[threadIdx.y + 1][threadIdx.x] && m >= smem[threadIdx.y + 1][threadIdx.x + 2])
edge_type = 1 + (int)(m > high_thresh);
}
else if( y > tg67x )
{
if (m > smem[threadIdx.y][threadIdx.x + 1] && m >= smem[threadIdx.y + 2][threadIdx.x + 1])
edge_type = 1 + (int)(m > high_thresh);
}
else
{
if (m > smem[threadIdx.y][threadIdx.x + 1 - s] && m > smem[threadIdx.y + 2][threadIdx.x + 1 + s])
edge_type = 1 + (int)(m > high_thresh);
}
}
map.ptr(i + 1)[j + 1] = edge_type;
}
}
#undef CANNY_SHIFT
#undef TG22
void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
calcMap<<<grid, block>>>(dx, dy, mag, map, rows, cols, low_thresh, high_thresh);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
//////////////////////////////////////////////////////////////////////////////////////////
__device__ unsigned int counter = 0;
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols)
{
#if __CUDA_ARCH__ >= 120
__shared__ int smem[18][18];
const int j = blockIdx.x * 16 + threadIdx.x;
const int i = blockIdx.y * 16 + threadIdx.y;
const int tid = threadIdx.y * 16 + threadIdx.x;
const int lx = tid % 18;
const int ly = tid / 18;
if (ly < 14)
smem[ly][lx] = map.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx];
if (ly < 4 && blockIdx.y * 16 + ly + 14 <= rows && blockIdx.x * 16 + lx <= cols)
smem[ly + 14][lx] = map.ptr(blockIdx.y * 16 + ly + 14)[blockIdx.x * 16 + lx];
__syncthreads();
if (i < rows && j < cols)
{
int n;
#pragma unroll
for (int k = 0; k < 16; ++k)
{
n = 0;
if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1)
{
n += smem[threadIdx.y ][threadIdx.x ] == 2;
n += smem[threadIdx.y ][threadIdx.x + 1] == 2;
n += smem[threadIdx.y ][threadIdx.x + 2] == 2;
n += smem[threadIdx.y + 1][threadIdx.x ] == 2;
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2;
n += smem[threadIdx.y + 2][threadIdx.x ] == 2;
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2;
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2;
}
if (n > 0)
smem[threadIdx.y + 1][threadIdx.x + 1] = 2;
}
const int e = smem[threadIdx.y + 1][threadIdx.x + 1];
map.ptr(i + 1)[j + 1] = e;
n = 0;
if (e == 2)
{
n += smem[threadIdx.y ][threadIdx.x ] == 1;
n += smem[threadIdx.y ][threadIdx.x + 1] == 1;
n += smem[threadIdx.y ][threadIdx.x + 2] == 1;
n += smem[threadIdx.y + 1][threadIdx.x ] == 1;
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1;
n += smem[threadIdx.y + 2][threadIdx.x ] == 1;
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1;
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1;
}
if (n > 0)
{
const unsigned int ind = atomicInc(&counter, (unsigned int)(-1));
st[ind] = make_ushort2(j + 1, i + 1);
}
}
#endif
}
void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
edgesHysteresisLocal<<<grid, block>>>(map, st1, rows, cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count)
{
#if __CUDA_ARCH__ >= 120
const int stack_size = 512;
__shared__ unsigned int s_counter;
__shared__ unsigned int s_ind;
__shared__ ushort2 s_st[stack_size];
if (threadIdx.x == 0)
s_counter = 0;
__syncthreads();
int ind = blockIdx.y * gridDim.x + blockIdx.x;
if (ind < count)
{
ushort2 pos = st1[ind];
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
{
if (threadIdx.x < 8)
{
pos.x += c_dx[threadIdx.x];
pos.y += c_dy[threadIdx.x];
if (map.ptr(pos.y)[pos.x] == 1)
{
map.ptr(pos.y)[pos.x] = 2;
ind = atomicInc(&s_counter, (unsigned int)(-1));
s_st[ind] = pos;
}
}
__syncthreads();
while (s_counter > 0 && s_counter <= stack_size - blockDim.x)
{
const int subTaskIdx = threadIdx.x >> 3;
const int portion = min(s_counter, blockDim.x >> 3);
pos.x = pos.y = 0;
if (subTaskIdx < portion)
pos = s_st[s_counter - 1 - subTaskIdx];
__syncthreads();
if (threadIdx.x == 0)
s_counter -= portion;
__syncthreads();
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
{
pos.x += c_dx[threadIdx.x & 7];
pos.y += c_dy[threadIdx.x & 7];
if (map.ptr(pos.y)[pos.x] == 1)
{
map.ptr(pos.y)[pos.x] = 2;
ind = atomicInc(&s_counter, (unsigned int)(-1));
s_st[ind] = pos;
}
}
__syncthreads();
}
if (s_counter > 0)
{
if (threadIdx.x == 0)
{
ind = atomicAdd(&counter, s_counter);
s_ind = ind - s_counter;
}
__syncthreads();
ind = s_ind;
for (int i = threadIdx.x; i < s_counter; i += blockDim.x)
{
st2[ind + i] = s_st[i];
}
}
}
}
#endif
}
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, "cv::gpu::canny::counter") );
unsigned int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
while (count > 0)
{
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
dim3 block(128, 1, 1);
dim3 grid(min(count, 65535u), divUp(count, 65535), 1);
edgesHysteresisGlobal<<<grid, block>>>(map, st1, st2, rows, cols, count);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
swap(st1, st2);
}
}
__global__ void getEdges(PtrStepi map, PtrStep dst, int rows, int cols)
{
const int j = blockIdx.x * 16 + threadIdx.x;
const int i = blockIdx.y * 16 + threadIdx.y;
if (i < rows && j < cols)
dst.ptr(i)[j] = (uchar)(-(map.ptr(i + 1)[j + 1] >> 1));
}
void getEdges_gpu(PtrStepi map, PtrStep dst, int rows, int cols)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
getEdges<<<grid, block>>>(map, dst, rows, cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
}}}

View File

@ -92,8 +92,13 @@ void cv::gpu::pyrDown(const GpuMat&, GpuMat&, PyrDownBuf&, Stream&) { throw_nogp
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::PyrUpBuf::create(Size, int) { throw_nogpu(); }
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, PyrUpBuf&, Stream&) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_nogpu(); }
cv::gpu::CannyBuf::CannyBuf(const GpuMat&, const GpuMat&) { throw_nogpu(); }
void cv::gpu::CannyBuf::create(const Size&, int) { throw_nogpu(); }
void cv::gpu::CannyBuf::release() { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
@ -1627,6 +1632,149 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, PyrUpBuf& buf, Stream& strea
buf.filter->apply(buf.buf, dst, Rect(0, 0, buf.buf.cols, buf.buf.rows), stream);
}
//////////////////////////////////////////////////////////////////////////////
// Canny
cv::gpu::CannyBuf::CannyBuf(const GpuMat& dx_, const GpuMat& dy_) : dx(dx_), dy(dy_)
{
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
create(dx_.size(), -1);
}
void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)
{
ensureSizeIsEnough(image_size, CV_32SC1, dx);
ensureSizeIsEnough(image_size, CV_32SC1, dy);
if (apperture_size == 3)
{
ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);
ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);
}
else if(apperture_size > 0)
{
if (!filterDX)
filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
if (!filterDY)
filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
}
ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
}
void cv::gpu::CannyBuf::release()
{
dx.release();
dy.release();
dx_buf.release();
dy_buf.release();
edgeBuf.release();
trackBuf1.release();
trackBuf2.release();
}
namespace cv { namespace gpu { namespace canny
{
void calcSobelRowPass_gpu(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);
void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);
void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh);
void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols);
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);
void getEdges_gpu(PtrStepi map, PtrStep dst, int rows, int cols);
}}}
namespace
{
void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
{
using namespace cv::gpu::canny;
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), dst.rows, dst.cols);
edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), buf.trackBuf2.ptr<ushort2>(), dst.rows, dst.cols);
getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);
}
}
void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
{
CannyBuf buf(src.size(), apperture_size);
Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
}
void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
{
using namespace cv::gpu::canny;
CV_Assert(src.type() == CV_8UC1);
if( low_thresh > high_thresh )
std::swap( low_thresh, high_thresh);
dst.create(src.size(), CV_8U);
dst.setTo(Scalar::all(0));
buf.create(src.size(), apperture_size);
buf.edgeBuf.setTo(Scalar::all(0));
if (apperture_size == 3)
{
calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
}
else
{
buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows));
buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows));
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
}
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
{
CannyBuf buf(dx, dy);
Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
}
void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
{
using namespace cv::gpu::canny;
CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
if( low_thresh > high_thresh )
std::swap( low_thresh, high_thresh);
dst.create(dx.size(), CV_8U);
dst.setTo(Scalar::all(0));
buf.dx = dx; buf.dy = dy;
buf.create(dx.size(), -1);
buf.edgeBuf.setTo(Scalar::all(0));
calcMagnitude_gpu(dx, dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
#endif /* !defined (HAVE_CUDA) */

View File

@ -2217,4 +2217,71 @@ TEST_P(PyrUp, Accuracy)
INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::ValuesIn(devices()));
////////////////////////////////////////////////////////
// Canny
struct Canny : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, bool> >
{
static cv::Mat img;
static void SetUpTestCase()
{
img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);
}
static void TearDownTestCase()
{
img.release();
}
cv::gpu::DeviceInfo devInfo;
int apperture_size;
bool L2gradient;
double low_thresh;
double high_thresh;
cv::Mat edges_gold;
virtual void SetUp()
{
devInfo = std::tr1::get<0>(GetParam());
apperture_size = std::tr1::get<1>(GetParam());
L2gradient = std::tr1::get<2>(GetParam());
cv::gpu::setDevice(devInfo.deviceID());
low_thresh = 50.0;
high_thresh = 100.0;
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, L2gradient);
}
};
cv::Mat Canny::img;
TEST_P(Canny, Accuracy)
{
PRINT_PARAM(devInfo);
PRINT_PARAM(apperture_size);
PRINT_PARAM(L2gradient);
cv::Mat edges;
ASSERT_NO_THROW(
cv::gpu::GpuMat d_edges;
cv::gpu::Canny(cv::gpu::GpuMat(img), d_edges, low_thresh, high_thresh, apperture_size, L2gradient);
d_edges.download(edges);
);
EXPECT_MAT_SIMILAR(edges_gold, edges, 1.0);
}
INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine(
testing::ValuesIn(devices()),
testing::Values(3, 5),
testing::Values(false, true)));
#endif // HAVE_CUDA

View File

@ -937,3 +937,25 @@ TEST(equalizeHist)
GPU_OFF;
}
}
TEST(Canny)
{
Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
if (img.empty()) throw runtime_error("can't open aloeL.jpg");
Mat edges(img.size(), CV_8UC1);
CPU_ON;
Canny(img, edges, 50.0, 100.0);
CPU_OFF;
gpu::GpuMat d_img(img);
gpu::GpuMat d_edges(img.size(), CV_8UC1);
gpu::CannyBuf d_buf(img.size());
GPU_ON;
gpu::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
GPU_OFF;
}