From 4ddf634c3049125ae323027e4b8b10351603c7ea Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 11 Mar 2013 15:41:50 +0400 Subject: [PATCH] gpu : implement Bayer* -> Gray color conversion --- modules/gpu/perf/perf_imgproc.cpp | 7 +- modules/gpu/src/color.cpp | 45 +++++- modules/gpu/src/cuda/debayer.cu | 247 ++++++++++++++++++------------ modules/gpu/test/test_color.cpp | 64 ++++++++ 4 files changed, 261 insertions(+), 102 deletions(-) diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index be6eb4877c..ab445dc87c 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -1341,7 +1341,12 @@ PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColorBayer, Values(CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR), CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR), CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR), - CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR)))) + CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR), + + CvtColorInfo(1, 1, cv::COLOR_BayerBG2GRAY), + CvtColorInfo(1, 1, cv::COLOR_BayerGB2GRAY), + CvtColorInfo(1, 1, cv::COLOR_BayerRG2GRAY), + CvtColorInfo(1, 1, cv::COLOR_BayerGR2GRAY)))) { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); diff --git a/modules/gpu/src/color.cpp b/modules/gpu/src/color.cpp index 05d4609001..09986e8c33 100644 --- a/modules/gpu/src/color.cpp +++ b/modules/gpu/src/color.cpp @@ -1640,6 +1640,43 @@ namespace { bayer_to_bgr(src, dst, dcn, true, true, stream); } + void bayer_to_gray(const GpuMat& src, GpuMat& dst, bool blue_last, bool start_with_green, Stream& stream) + { + typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); + static const func_t funcs[3] = + { + Bayer2BGR_8u_gpu<1>, + 0, + Bayer2BGR_16u_gpu<1>, + }; + + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1); + CV_Assert(src.rows > 2 && src.cols > 2); + + dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); + + funcs[src.depth()](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream)); + } + + void bayerBG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream) + { + bayer_to_gray(src, dst, false, false, stream); + } + + void bayerGB_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream) + { + bayer_to_gray(src, dst, false, true, stream); + } + + void bayerRG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream) + { + bayer_to_gray(src, dst, true, false, stream); + } + + void bayerGR_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream) + { + bayer_to_gray(src, dst, true, true, stream); + } } void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream) @@ -1756,10 +1793,10 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream yuv_to_bgr, // CV_YUV2BGR = 84 yuv_to_rgb, // CV_YUV2RGB = 85 - 0, // CV_BayerBG2GRAY = 86 - 0, // CV_BayerGB2GRAY = 87 - 0, // CV_BayerRG2GRAY = 88 - 0, // CV_BayerGR2GRAY = 89 + bayerBG_to_gray, // CV_BayerBG2GRAY = 86 + bayerGB_to_gray, // CV_BayerGB2GRAY = 87 + bayerRG_to_gray, // CV_BayerRG2GRAY = 88 + bayerGR_to_gray, // CV_BayerGR2GRAY = 89 //YUV 4:2:0 formats family 0, // CV_YUV2RGB_NV12 = 90, diff --git a/modules/gpu/src/cuda/debayer.cu b/modules/gpu/src/cuda/debayer.cu index 57322ed81f..fc43726291 100644 --- a/modules/gpu/src/cuda/debayer.cu +++ b/modules/gpu/src/cuda/debayer.cu @@ -42,42 +42,37 @@ #if !defined CUDA_DISABLER -#include -#include -#include -#include +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/vec_traits.hpp" +#include "opencv2/gpu/device/vec_math.hpp" +#include "opencv2/gpu/device/limits.hpp" +#include "opencv2/gpu/device/color.hpp" -namespace cv { namespace gpu { - namespace device +namespace cv { namespace gpu { namespace device +{ + template struct Bayer2BGR; + + template <> struct Bayer2BGR { - template - __global__ void Bayer2BGR_8u(const PtrStepb src, PtrStepSz dst, const bool blue_last, const bool start_with_green) + uchar3 res0; + uchar3 res1; + uchar3 res2; + uchar3 res3; + + __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green) { - const int s_x = blockIdx.x * blockDim.x + threadIdx.x; - int s_y = blockIdx.y * blockDim.y + threadIdx.y; - - if (s_y >= dst.rows || (s_x << 2) >= dst.cols) - return; - - s_y = ::min(::max(s_y, 1), dst.rows - 2); - uchar4 patch[3][3]; patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x]; patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)]; - patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)]; + patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x]; patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)]; - patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)]; + patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x]; patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)]; - patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)]; - - D res0 = VecTraits::all(numeric_limits::max()); - D res1 = VecTraits::all(numeric_limits::max()); - D res2 = VecTraits::all(numeric_limits::max()); - D res3 = VecTraits::all(numeric_limits::max()); + patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; if ((s_y & 1) ^ start_with_green) { @@ -181,45 +176,69 @@ namespace cv { namespace gpu { res3.z = t7; } } - - const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2; - const int d_y = blockIdx.y * blockDim.y + threadIdx.y; - - dst(d_y, d_x) = res0; - if (d_x + 1 < dst.cols) - dst(d_y, d_x + 1) = res1; - if (d_x + 2 < dst.cols) - dst(d_y, d_x + 2) = res2; - if (d_x + 3 < dst.cols) - dst(d_y, d_x + 3) = res3; } + }; - template - __global__ void Bayer2BGR_16u(const PtrStepb src, PtrStepSz dst, const bool blue_last, const bool start_with_green) + template __device__ __forceinline__ D toDst(const uchar3& pix); + template <> __device__ __forceinline__ uchar toDst(const uchar3& pix) + { + typename bgr_to_gray_traits::functor_type f = bgr_to_gray_traits::create_functor(); + return f(pix); + } + template <> __device__ __forceinline__ uchar3 toDst(const uchar3& pix) + { + return pix; + } + template <> __device__ __forceinline__ uchar4 toDst(const uchar3& pix) + { + return make_uchar4(pix.x, pix.y, pix.z, 255); + } + + template + __global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep dst, const bool blue_last, const bool start_with_green) + { + const int s_x = blockIdx.x * blockDim.x + threadIdx.x; + int s_y = blockIdx.y * blockDim.y + threadIdx.y; + + if (s_y >= src.rows || (s_x << 2) >= src.cols) + return; + + s_y = ::min(::max(s_y, 1), src.rows - 2); + + Bayer2BGR bayer; + bayer.apply(src, s_x, s_y, blue_last, start_with_green); + + const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2; + const int d_y = blockIdx.y * blockDim.y + threadIdx.y; + + dst(d_y, d_x) = toDst(bayer.res0); + if (d_x + 1 < src.cols) + dst(d_y, d_x + 1) = toDst(bayer.res1); + if (d_x + 2 < src.cols) + dst(d_y, d_x + 2) = toDst(bayer.res2); + if (d_x + 3 < src.cols) + dst(d_y, d_x + 3) = toDst(bayer.res3); + } + + template <> struct Bayer2BGR + { + ushort3 res0; + ushort3 res1; + + __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green) { - const int s_x = blockIdx.x * blockDim.x + threadIdx.x; - int s_y = blockIdx.y * blockDim.y + threadIdx.y; - - if (s_y >= dst.rows || (s_x << 1) >= dst.cols) - return; - - s_y = ::min(::max(s_y, 1), dst.rows - 2); - ushort2 patch[3][3]; patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x]; patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)]; - patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)]; + patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x]; patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)]; - patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)]; + patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x]; patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)]; - patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)]; - - D res0 = VecTraits::all(numeric_limits::max()); - D res1 = VecTraits::all(numeric_limits::max()); + patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; if ((s_y & 1) ^ start_with_green) { @@ -279,53 +298,87 @@ namespace cv { namespace gpu { res1.z = t3; } } - - const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1; - const int d_y = blockIdx.y * blockDim.y + threadIdx.y; - - dst(d_y, d_x) = res0; - if (d_x + 1 < dst.cols) - dst(d_y, d_x + 1) = res1; } + }; - template - void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream) - { - typedef typename TypeVec::vec_type dst_t; - - const dim3 block(32, 8); - const dim3 grid(divUp(dst.cols, 4 * block.x), divUp(dst.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u, cudaFuncCachePreferL1) ); - - Bayer2BGR_8u<<>>(src, (PtrStepSz)dst, blue_last, start_with_green); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - template - void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream) - { - typedef typename TypeVec::vec_type dst_t; - - const dim3 block(32, 8); - const dim3 grid(divUp(dst.cols, 2 * block.x), divUp(dst.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u, cudaFuncCachePreferL1) ); - - Bayer2BGR_16u<<>>(src, (PtrStepSz)dst, blue_last, start_with_green); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); + template __device__ __forceinline__ D toDst(const ushort3& pix); + template <> __device__ __forceinline__ ushort toDst(const ushort3& pix) + { + typename bgr_to_gray_traits::functor_type f = bgr_to_gray_traits::create_functor(); + return f(pix); + } + template <> __device__ __forceinline__ ushort3 toDst(const ushort3& pix) + { + return pix; + } + template <> __device__ __forceinline__ ushort4 toDst(const ushort3& pix) + { + return make_ushort4(pix.x, pix.y, pix.z, numeric_limits::max()); } -}} -#endif /* CUDA_DISABLER */ \ No newline at end of file + template + __global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep dst, const bool blue_last, const bool start_with_green) + { + const int s_x = blockIdx.x * blockDim.x + threadIdx.x; + int s_y = blockIdx.y * blockDim.y + threadIdx.y; + + if (s_y >= src.rows || (s_x << 1) >= src.cols) + return; + + s_y = ::min(::max(s_y, 1), src.rows - 2); + + Bayer2BGR bayer; + bayer.apply(src, s_x, s_y, blue_last, start_with_green); + + const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1; + const int d_y = blockIdx.y * blockDim.y + threadIdx.y; + + dst(d_y, d_x) = toDst(bayer.res0); + if (d_x + 1 < src.cols) + dst(d_y, d_x + 1) = toDst(bayer.res1); + } + + template + void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream) + { + typedef typename TypeVec::vec_type dst_t; + + const dim3 block(32, 8); + const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u, cudaFuncCachePreferL1) ); + + Bayer2BGR_8u<<>>(src, (PtrStepSz)dst, blue_last, start_with_green); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template + void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream) + { + typedef typename TypeVec::vec_type dst_t; + + const dim3 block(32, 8); + const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u, cudaFuncCachePreferL1) ); + + Bayer2BGR_16u<<>>(src, (PtrStepSz)dst, blue_last, start_with_green); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); + template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); + template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); + + template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); + template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); + template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); +}}} + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/test/test_color.cpp b/modules/gpu/test/test_color.cpp index 5aee14d495..81831af8c5 100644 --- a/modules/gpu/test/test_color.cpp +++ b/modules/gpu/test/test_color.cpp @@ -2218,6 +2218,70 @@ GPU_TEST_P(CvtColor, BayerGR2BGR4) EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst3(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0); } +GPU_TEST_P(CvtColor, BayerBG2Gray) +{ + if ((depth != CV_8U && depth != CV_16U) || useRoi) + return; + + cv::Mat src = randomMat(size, depth); + + cv::gpu::GpuMat dst; + cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2GRAY); + + cv::Mat dst_gold; + cv::cvtColor(src, dst_gold, cv::COLOR_BayerBG2GRAY); + + EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2); +} + +GPU_TEST_P(CvtColor, BayerGB2Gray) +{ + if ((depth != CV_8U && depth != CV_16U) || useRoi) + return; + + cv::Mat src = randomMat(size, depth); + + cv::gpu::GpuMat dst; + cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2GRAY); + + cv::Mat dst_gold; + cv::cvtColor(src, dst_gold, cv::COLOR_BayerGB2GRAY); + + EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2); +} + +GPU_TEST_P(CvtColor, BayerRG2Gray) +{ + if ((depth != CV_8U && depth != CV_16U) || useRoi) + return; + + cv::Mat src = randomMat(size, depth); + + cv::gpu::GpuMat dst; + cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2GRAY); + + cv::Mat dst_gold; + cv::cvtColor(src, dst_gold, cv::COLOR_BayerRG2GRAY); + + EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2); +} + +GPU_TEST_P(CvtColor, BayerGR2Gray) +{ + if ((depth != CV_8U && depth != CV_16U) || useRoi) + return; + + cv::Mat src = randomMat(size, depth); + + cv::gpu::GpuMat dst; + cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2GRAY); + + cv::Mat dst_gold; + cv::cvtColor(src, dst_gold, cv::COLOR_BayerGR2GRAY); + + EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2); +} + INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES,