diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index a2506f1ee2..83fbd60c5f 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -917,8 +917,11 @@ CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTra GpuMat& labels, GpuMat& buf, Stream& stream = Stream::Null()); +//! compute mask for Generalized Flood fill componetns labeling. +CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null()); + //! performs connected componnents labeling. -CV_EXPORTS void labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null()); +CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, Stream& stream = Stream::Null()); ////////////////////////////////// Histograms ////////////////////////////////// diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index fbc68cbcf3..9b8d4c9925 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -65,32 +65,108 @@ namespace cv { namespace gpu { namespace device TILE_ROWS = CTA_SIZE_Y * TPB_Y }; + template struct IntervalsTraits + { + typedef T elem_type; + }; + + template<> struct IntervalsTraits + { + typedef int dist_type; + enum {ch = 1}; + }; + + template<> struct IntervalsTraits + { + typedef int3 dist_type; + enum {ch = 3}; + }; + + template<> struct IntervalsTraits + { + typedef int3 dist_type; + enum {ch = 4}; + }; + + template<> struct IntervalsTraits + { + typedef int dist_type; + enum {ch = 1}; + }; + + template<> struct IntervalsTraits + { + typedef int3 dist_type; + enum {ch = 3}; + }; + + template<> struct IntervalsTraits + { + typedef int4 dist_type; + enum {ch = 4}; + }; + + template<> struct IntervalsTraits + { + typedef float dist_type; + enum {ch = 1}; + }; + + template<> struct IntervalsTraits + { + typedef int dist_type; + enum {ch = 1}; + }; + typedef unsigned char component; enum Edges { UP = 1, DOWN = 2, LEFT = 4, RIGHT = 8, EMPTY = 0xF0 }; - template - struct InInterval + template struct InInterval {}; + + template struct InInterval { - __host__ __device__ __forceinline__ InInterval(const T& _lo, const T& _hi) : lo(-_lo), hi(_hi) {}; + __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi) : lo(-_lo.x), hi(_hi.x) {}; T lo, hi; - __device__ __forceinline__ bool operator() (const T& a, const T& b) const + template __device__ __forceinline__ bool operator() (const I& a, const I& b) const { T d = a - b; return lo <= d && d <= hi; } - }; - template - __global__ void computeConnectivity(const DevMem2D image, DevMem2D components, F connected) + template struct InInterval + { + __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi){}; + T lo, hi; + + template __device__ __forceinline__ bool operator() (const I& a, const I& b) const + { + return true; + } + }; + + template struct InInterval + { + __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi){}; + T lo, hi; + + template __device__ __forceinline__ bool operator() (const I& a, const I& b) const + { + return true; + } + }; + + + template + __global__ void computeConnectivity(const DevMem2D_ image, DevMem2D components, F connected) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x >= image.cols || y >= image.rows) return; - int intensity = image(y, x); + T intensity = image(y, x); component c = 0; if ( x > 0 && connected(intensity, image(y, x - 1))) @@ -108,18 +184,31 @@ namespace cv { namespace gpu { namespace device components(y, x) = c; } - void computeEdges(const DevMem2D& image, DevMem2D edges, const int lo, const int hi, cudaStream_t stream) + template< typename T> + void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream) { dim3 block(CTA_SIZE_X, CTA_SIZE_Y); dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y)); - InInterval inInt(lo, hi); - computeConnectivity ><<>>(image, edges, inInt); + + typedef InInterval::dist_type, IntervalsTraits::ch> Int_t; + + Int_t inInt(lo, hi); + computeConnectivity<<>>(static_cast >(image), edges, inInt); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } + template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + __global__ void lableTiles(const DevMem2D edges, DevMem2Di comps) { int x = threadIdx.x + blockIdx.x * TILE_COLS; diff --git a/modules/gpu/src/graphcuts.cpp b/modules/gpu/src/graphcuts.cpp index 3ea9d3bc2b..dd8cf8e59b 100644 --- a/modules/gpu/src/graphcuts.cpp +++ b/modules/gpu/src/graphcuts.cpp @@ -47,7 +47,8 @@ void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } -void cv::gpu::labelComponents(const GpuMat&, GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); } +void cv::gpu::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); } +void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, Stream& stream) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -56,29 +57,65 @@ namespace cv { namespace gpu { namespace device namespace ccl { void labelComponents(const DevMem2D& edges, DevMem2Di comps, cudaStream_t stream); - void computeEdges(const DevMem2D& image, DevMem2D edges, const int lo, const int hi, cudaStream_t stream); + + template + void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); } }}} -void cv::gpu::labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s) + +float4 scalarToCudaType(const cv::Scalar& in) +{ + float4 res; + res.x = in[0]; res.y = in[1]; res.z = in[2]; res.w = in[3]; + return res; +} + + +void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s) { CV_Assert(!image.empty()); - int type = image.type(); - CV_Assert(type == CV_8UC1); + int ch = image.channels(); + CV_Assert(ch <= 4); + + int depth = image.depth(); + + typedef void (*func_t)(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + + static const func_t suppotLookup[8][4] = + { // 1, 2, 3, 4 + { device::ccl::computeEdges, 0, device::ccl::computeEdges, device::ccl::computeEdges },// CV_8U + { 0, 0, 0, 0 },// CV_16U + { device::ccl::computeEdges, 0, device::ccl::computeEdges, device::ccl::computeEdges },// CV_8S + { 0, 0, 0, 0 },// CV_16S + { device::ccl::computeEdges, 0, 0, 0 },// CV_32S + { device::ccl::computeEdges, 0, 0, 0 },// CV_32F + { 0, 0, 0, 0 },// CV_64F + { 0, 0, 0, 0 } // CV_USRTYPE1 + }; + + func_t f = suppotLookup[depth][ch - 1]; + CV_Assert(f); if (image.size() != mask.size() || mask.type() != CV_8UC1) mask.create(image.size(), CV_8UC1); - if (image.size() != components.size() || components.type() != CV_32SC1) - components.create(image.size(), CV_32SC1); - cudaStream_t stream = StreamAccessor::getStream(s); - - device::ccl::computeEdges(image, mask, lo[0], hi[0], stream); - device::ccl::labelComponents(mask, components, stream); + float4 culo = scalarToCudaType(lo), cuhi = scalarToCudaType(hi); + f(image, mask, culo, cuhi, stream); } +void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, Stream& s) +{ + CV_Assert(!mask.empty() && mask.type() == CV_8U); + + if (mask.size() != components.size() || components.type() != CV_32SC1) + components.create(mask.size(), CV_32SC1); + + cudaStream_t stream = StreamAccessor::getStream(s); + device::ccl::labelComponents(mask, components, stream); +} namespace { diff --git a/modules/gpu/test/test_labeling.cpp b/modules/gpu/test/test_labeling.cpp index 2ff010de2a..6ba0ef5483 100644 --- a/modules/gpu/test/test_labeling.cpp +++ b/modules/gpu/test/test_labeling.cpp @@ -70,7 +70,9 @@ TEST_P(Labeling, ConnectedComponents) cv::gpu::GpuMat components; components.create(image.rows, image.cols, CV_32SC1); - cv::gpu::labelComponents(cv::gpu::GpuMat(image), mask, components, cv::Scalar::all(0), cv::Scalar::all(2)); + cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2)); + + cv::gpu::labelComponents(mask, components); // std::cout << cv::Mat(components) << std::endl; // cv::imshow("test", image);