From f9ff9c56183e92cfb54992f7a07dcc18f4f86e65 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 7 May 2014 13:15:19 +0400 Subject: [PATCH 01/20] fix cv::subtract function: call dst.create(...) before using it(cherry picked from commit 4c66614e07319b66537b6327e2dcf871c5aa6829) --- modules/core/src/arithm.cpp | 6 +++++- modules/core/test/test_arithm.cpp | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 0517a5fae6..f0ef920554 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1562,8 +1562,12 @@ void cv::subtract( InputArray src1, InputArray src2, OutputArray dst, if (dtype == -1 && dst.fixedType()) dtype = dst.depth(); - if (!dst.fixedType() || dtype == dst.depth()) + dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()); + + if (!dst.fixedType() || dtype == dst.type()) { + dst.create(src1.size(), dtype); + if (dtype == CV_16S) { Mat _dst = dst.getMat(); diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index a240941847..1687285a60 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -1579,3 +1579,13 @@ TEST_P(Mul1, One) } INSTANTIATE_TEST_CASE_P(Arithm, Mul1, testing::Values(Size(2, 2), Size(1, 1))); + +TEST(Subtract8u8u16s, EmptyOutputMat) +{ + cv::Mat src1 = cv::Mat::zeros(16, 16, CV_8UC1); + cv::Mat src2 = cv::Mat::zeros(16, 16, CV_8UC1); + cv::Mat dst; + cv::subtract(src1, src2, dst, cv::noArray(), CV_16S); + ASSERT_FALSE(dst.empty()); + ASSERT_EQ(0, cv::countNonZero(dst)); +} From 942401de162838964f79cd4d6e6aed27ddc1a487 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 7 May 2014 19:52:35 +0400 Subject: [PATCH 02/20] fix output matrix allocation in cv::subtract(cherry picked from commit 629461c83652e2416ccb6c8685a0788bb6fb15f5) --- modules/core/src/arithm.cpp | 47 ++++++++++++++++++++----------- modules/core/test/test_arithm.cpp | 19 +++++++++---- 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index f0ef920554..4058856fff 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1553,43 +1553,58 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst, arithm_op(src1, src2, dst, mask, dtype, getAddTab() ); } -void cv::subtract( InputArray src1, InputArray src2, OutputArray dst, +void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst, InputArray mask, int dtype ) { #ifdef HAVE_TEGRA_OPTIMIZATION - if (mask.empty() && src1.depth() == CV_8U && src2.depth() == CV_8U) + int kind1 = _src1.kind(), kind2 = _src2.kind(); + Mat src1 = _src1.getMat(), src2 = _src2.getMat(); + bool src1Scalar = checkScalar(src1, _src2.type(), kind1, kind2); + bool src2Scalar = checkScalar(src2, _src1.type(), kind2, kind1); + + if (!src1Scalar && !src2Scalar && mask.empty() && + src1.depth() == CV_8U && src2.depth() == CV_8U) { - if (dtype == -1 && dst.fixedType()) - dtype = dst.depth(); - - dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()); - - if (!dst.fixedType() || dtype == dst.type()) + if (dtype == -1) { - dst.create(src1.size(), dtype); + if (_dst.fixedType()) + { + dtype = _dst.depth(); + } + else + { + dtype = src1.depth(); + } + } + + dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), _src1.channels()); + + if (dtype == _dst.type()) + { + _dst.create(_src1.size(), dtype); if (dtype == CV_16S) { - Mat _dst = dst.getMat(); - if(tegra::subtract_8u8u16s(src1.getMat(), src2.getMat(), _dst)) + Mat dst = _dst.getMat(); + if(tegra::subtract_8u8u16s(src1, src2, dst)) return; } else if (dtype == CV_32F) { - Mat _dst = dst.getMat(); - if(tegra::subtract_8u8u32f(src1.getMat(), src2.getMat(), _dst)) + Mat dst = _dst.getMat(); + if(tegra::subtract_8u8u32f(src1, src2, dst)) return; } else if (dtype == CV_8S) { - Mat _dst = dst.getMat(); - if(tegra::subtract_8u8u8s(src1.getMat(), src2.getMat(), _dst)) + Mat dst = _dst.getMat(); + if(tegra::subtract_8u8u8s(src1, src2, dst)) return; } } } #endif - arithm_op(src1, src2, dst, mask, dtype, getSubTab() ); + arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab() ); } void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst ) diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 1687285a60..68b06267b2 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -1580,12 +1580,21 @@ TEST_P(Mul1, One) INSTANTIATE_TEST_CASE_P(Arithm, Mul1, testing::Values(Size(2, 2), Size(1, 1))); -TEST(Subtract8u8u16s, EmptyOutputMat) +TEST(Subtract, EmptyOutputMat) { cv::Mat src1 = cv::Mat::zeros(16, 16, CV_8UC1); cv::Mat src2 = cv::Mat::zeros(16, 16, CV_8UC1); - cv::Mat dst; - cv::subtract(src1, src2, dst, cv::noArray(), CV_16S); - ASSERT_FALSE(dst.empty()); - ASSERT_EQ(0, cv::countNonZero(dst)); + cv::Mat dst1, dst2, dst3; + + cv::subtract(src1, src2, dst1, cv::noArray(), CV_16S); + cv::subtract(src1, src2, dst2); + cv::subtract(src1, cv::Scalar::all(0), dst3, cv::noArray(), CV_16S); + + ASSERT_FALSE(dst1.empty()); + ASSERT_FALSE(dst2.empty()); + ASSERT_FALSE(dst3.empty()); + + ASSERT_EQ(0, cv::countNonZero(dst1)); + ASSERT_EQ(0, cv::countNonZero(dst2)); + ASSERT_EQ(0, cv::countNonZero(dst3)); } From 3c0b0b0f94deeac7182d2d56b0c1224b70acbeec Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 14 Jul 2014 14:26:50 +0400 Subject: [PATCH 03/20] Build fixes for CUDA 6.5 (cherry picked from commit 60a5ada4541e777bd2ad3fe0322180706351e58b) --- .../src/nvidia/core/NCVPixelOperations.hpp | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp index c1e06b434e..1c95414734 100644 --- a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp +++ b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp @@ -48,24 +48,24 @@ #include "NCV.hpp" template inline __host__ __device__ TBase _pixMaxVal(); -template<> static inline __host__ __device__ Ncv8u _pixMaxVal() {return UCHAR_MAX;} -template<> static inline __host__ __device__ Ncv16u _pixMaxVal() {return USHRT_MAX;} -template<> static inline __host__ __device__ Ncv32u _pixMaxVal() {return UINT_MAX;} -template<> static inline __host__ __device__ Ncv8s _pixMaxVal() {return SCHAR_MAX;} -template<> static inline __host__ __device__ Ncv16s _pixMaxVal() {return SHRT_MAX;} -template<> static inline __host__ __device__ Ncv32s _pixMaxVal() {return INT_MAX;} -template<> static inline __host__ __device__ Ncv32f _pixMaxVal() {return FLT_MAX;} -template<> static inline __host__ __device__ Ncv64f _pixMaxVal() {return DBL_MAX;} +template<> inline __host__ __device__ Ncv8u _pixMaxVal() {return UCHAR_MAX;} +template<> inline __host__ __device__ Ncv16u _pixMaxVal() {return USHRT_MAX;} +template<> inline __host__ __device__ Ncv32u _pixMaxVal() {return UINT_MAX;} +template<> inline __host__ __device__ Ncv8s _pixMaxVal() {return SCHAR_MAX;} +template<> inline __host__ __device__ Ncv16s _pixMaxVal() {return SHRT_MAX;} +template<> inline __host__ __device__ Ncv32s _pixMaxVal() {return INT_MAX;} +template<> inline __host__ __device__ Ncv32f _pixMaxVal() {return FLT_MAX;} +template<> inline __host__ __device__ Ncv64f _pixMaxVal() {return DBL_MAX;} template inline __host__ __device__ TBase _pixMinVal(); -template<> static inline __host__ __device__ Ncv8u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv16u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv32u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv8s _pixMinVal() {return SCHAR_MIN;} -template<> static inline __host__ __device__ Ncv16s _pixMinVal() {return SHRT_MIN;} -template<> static inline __host__ __device__ Ncv32s _pixMinVal() {return INT_MIN;} -template<> static inline __host__ __device__ Ncv32f _pixMinVal() {return FLT_MIN;} -template<> static inline __host__ __device__ Ncv64f _pixMinVal() {return DBL_MIN;} +template<> inline __host__ __device__ Ncv8u _pixMinVal() {return 0;} +template<> inline __host__ __device__ Ncv16u _pixMinVal() {return 0;} +template<> inline __host__ __device__ Ncv32u _pixMinVal() {return 0;} +template<> inline __host__ __device__ Ncv8s _pixMinVal() {return SCHAR_MIN;} +template<> inline __host__ __device__ Ncv16s _pixMinVal() {return SHRT_MIN;} +template<> inline __host__ __device__ Ncv32s _pixMinVal() {return INT_MIN;} +template<> inline __host__ __device__ Ncv32f _pixMinVal() {return FLT_MIN;} +template<> inline __host__ __device__ Ncv64f _pixMinVal() {return DBL_MIN;} template struct TConvVec2Base; template<> struct TConvVec2Base {typedef Ncv8u TBase;}; @@ -116,21 +116,21 @@ template static inline __host__ __device__ void _TDemoteClampNN(Ti template static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;} template inline Tout _pixMakeZero(); -template<> static inline __host__ __device__ uchar1 _pixMakeZero() {return make_uchar1(0);} -template<> static inline __host__ __device__ uchar3 _pixMakeZero() {return make_uchar3(0,0,0);} -template<> static inline __host__ __device__ uchar4 _pixMakeZero() {return make_uchar4(0,0,0,0);} -template<> static inline __host__ __device__ ushort1 _pixMakeZero() {return make_ushort1(0);} -template<> static inline __host__ __device__ ushort3 _pixMakeZero() {return make_ushort3(0,0,0);} -template<> static inline __host__ __device__ ushort4 _pixMakeZero() {return make_ushort4(0,0,0,0);} -template<> static inline __host__ __device__ uint1 _pixMakeZero() {return make_uint1(0);} -template<> static inline __host__ __device__ uint3 _pixMakeZero() {return make_uint3(0,0,0);} -template<> static inline __host__ __device__ uint4 _pixMakeZero() {return make_uint4(0,0,0,0);} -template<> static inline __host__ __device__ float1 _pixMakeZero() {return make_float1(0.f);} -template<> static inline __host__ __device__ float3 _pixMakeZero() {return make_float3(0.f,0.f,0.f);} -template<> static inline __host__ __device__ float4 _pixMakeZero() {return make_float4(0.f,0.f,0.f,0.f);} -template<> static inline __host__ __device__ double1 _pixMakeZero() {return make_double1(0.);} -template<> static inline __host__ __device__ double3 _pixMakeZero() {return make_double3(0.,0.,0.);} -template<> static inline __host__ __device__ double4 _pixMakeZero() {return make_double4(0.,0.,0.,0.);} +template<> inline __host__ __device__ uchar1 _pixMakeZero() {return make_uchar1(0);} +template<> inline __host__ __device__ uchar3 _pixMakeZero() {return make_uchar3(0,0,0);} +template<> inline __host__ __device__ uchar4 _pixMakeZero() {return make_uchar4(0,0,0,0);} +template<> inline __host__ __device__ ushort1 _pixMakeZero() {return make_ushort1(0);} +template<> inline __host__ __device__ ushort3 _pixMakeZero() {return make_ushort3(0,0,0);} +template<> inline __host__ __device__ ushort4 _pixMakeZero() {return make_ushort4(0,0,0,0);} +template<> inline __host__ __device__ uint1 _pixMakeZero() {return make_uint1(0);} +template<> inline __host__ __device__ uint3 _pixMakeZero() {return make_uint3(0,0,0);} +template<> inline __host__ __device__ uint4 _pixMakeZero() {return make_uint4(0,0,0,0);} +template<> inline __host__ __device__ float1 _pixMakeZero() {return make_float1(0.f);} +template<> inline __host__ __device__ float3 _pixMakeZero() {return make_float3(0.f,0.f,0.f);} +template<> inline __host__ __device__ float4 _pixMakeZero() {return make_float4(0.f,0.f,0.f,0.f);} +template<> inline __host__ __device__ double1 _pixMakeZero() {return make_double1(0.);} +template<> inline __host__ __device__ double3 _pixMakeZero() {return make_double3(0.,0.,0.);} +template<> inline __host__ __device__ double4 _pixMakeZero() {return make_double4(0.,0.,0.,0.);} static inline __host__ __device__ uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);} static inline __host__ __device__ uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);} From ca9c52ac9778d01eba933a786b4303c79d0162be Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 14 Jul 2014 21:27:23 +0400 Subject: [PATCH 04/20] Deb package build fix for CUDA 6.5 and newer. (cherry picked from commit e650d87e470b2e6a8f87ad4dd81977748a136aee) --- cmake/OpenCVPackaging.cmake | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/cmake/OpenCVPackaging.cmake b/cmake/OpenCVPackaging.cmake index 91f5940960..65e6c13abc 100644 --- a/cmake/OpenCVPackaging.cmake +++ b/cmake/OpenCVPackaging.cmake @@ -68,9 +68,23 @@ set(CPACK_COMPONENT_tests_DEPENDS libs) if(HAVE_CUDA) string(REPLACE "." "-" cuda_version_suffix ${CUDA_VERSION}) - set(CPACK_DEB_libs_PACKAGE_DEPENDS "cuda-core-libs-${cuda_version_suffix}, cuda-extra-libs-${cuda_version_suffix}") + if(${CUDA_VERSION} VERSION_LESS "6.5") + set(CPACK_DEB_libs_PACKAGE_DEPENDS "cuda-core-libs-${cuda_version_suffix}, cuda-extra-libs-${cuda_version_suffix}") + set(CPACK_DEB_dev_PACKAGE_DEPENDS "cuda-headers-${cuda_version_suffix}") + else() + set(CPACK_DEB_libs_PACKAGE_DEPENDS "cuda-cudart-${cuda_version_suffix}, cuda-npp-${cuda_version_suffix}") + set(CPACK_DEB_dev_PACKAGE_DEPENDS "cuda-cudart-dev-${cuda_version_suffix}, cuda-npp-dev-${cuda_version_suffix}") + if(HAVE_CUFFT) + set(CPACK_DEB_libs_PACKAGE_DEPENDS "${CPACK_DEB_libs_PACKAGE_DEPENDS}, cuda-cufft-${cuda_version_suffix}") + set(CPACK_DEB_dev_PACKAGE_DEPENDS "${CPACK_DEB_dev_PACKAGE_DEPENDS}, cuda-cufft-dev-${cuda_version_suffix}") + endif() + if(HAVE_HAVE_CUBLAS) + set(CPACK_DEB_libs_PACKAGE_DEPENDS "${CPACK_DEB_libs_PACKAGE_DEPENDS}, cuda-cublas-${cuda_version_suffix}") + set(CPACK_DEB_dev_PACKAGE_DEPENDS "${CPACK_DEB_dev_PACKAGE_DEPENDS}, cuda-cublas-dev-${cuda_version_suffix}") + endif() + endif() set(CPACK_COMPONENT_dev_DEPENDS libs) - set(CPACK_DEB_dev_PACKAGE_DEPENDS "cuda-headers-${cuda_version_suffix}") + endif() if(NOT OPENCV_CUSTOM_PACKAGE_INFO) From f8758da289c092fcce10baa4723d6388c4e41cb3 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 14 Jul 2014 23:58:05 +0400 Subject: [PATCH 05/20] More accurate deb package build fix for CUDA 6.5 and newer. (cherry picked from commit b2790973a32eb662c165a921afe03dbfd2c65269) --- cmake/OpenCVPackaging.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/OpenCVPackaging.cmake b/cmake/OpenCVPackaging.cmake index 65e6c13abc..22dbf6b1bc 100644 --- a/cmake/OpenCVPackaging.cmake +++ b/cmake/OpenCVPackaging.cmake @@ -68,7 +68,7 @@ set(CPACK_COMPONENT_tests_DEPENDS libs) if(HAVE_CUDA) string(REPLACE "." "-" cuda_version_suffix ${CUDA_VERSION}) - if(${CUDA_VERSION} VERSION_LESS "6.5") + if(CUDA_VERSION VERSION_LESS "6.5") set(CPACK_DEB_libs_PACKAGE_DEPENDS "cuda-core-libs-${cuda_version_suffix}, cuda-extra-libs-${cuda_version_suffix}") set(CPACK_DEB_dev_PACKAGE_DEPENDS "cuda-headers-${cuda_version_suffix}") else() @@ -83,8 +83,8 @@ if(HAVE_CUDA) set(CPACK_DEB_dev_PACKAGE_DEPENDS "${CPACK_DEB_dev_PACKAGE_DEPENDS}, cuda-cublas-dev-${cuda_version_suffix}") endif() endif() - set(CPACK_COMPONENT_dev_DEPENDS libs) + set(CPACK_COMPONENT_dev_DEPENDS libs) endif() if(NOT OPENCV_CUSTOM_PACKAGE_INFO) From 628b23acc854232f95fbbc3f5158a4f84e53680d Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 13 Aug 2014 13:46:43 +0400 Subject: [PATCH 06/20] GCC 4.8 warning array subscript is above array bounds fixed. (cherry picked from commit e11333dd831ef43f962e513e26f3dfa6dc789155) --- modules/imgproc/src/floodfill.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/imgproc/src/floodfill.cpp b/modules/imgproc/src/floodfill.cpp index db2563ddea..87fca7ce2d 100644 --- a/modules/imgproc/src/floodfill.cpp +++ b/modules/imgproc/src/floodfill.cpp @@ -470,6 +470,12 @@ cvFloodFill( CvArr* arr, CvPoint seed_point, depth = CV_MAT_DEPTH(type); cn = CV_MAT_CN(type); + if ( (cn != 1) && (cn != 3) ) + { + CV_Error( CV_StsBadArg, "Number of channels in input image must be 1 or 3" ); + return; + } + if( connectivity == 0 ) connectivity = 4; else if( connectivity != 4 && connectivity != 8 ) From 975e40f1c0552c776a3b71dcf0358c8dfd8ecbe0 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 15 Aug 2014 13:41:47 +0400 Subject: [PATCH 07/20] increase epsilon for TVL1 sanity test (cherry picked from commit 5dff283b39139008923eeb81d0ef5d351ac04522) --- modules/gpu/perf/perf_video.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index 6c7a648221..16e0844106 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -427,8 +427,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1, TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v); - GPU_SANITY_CHECK(u, 1e-1); - GPU_SANITY_CHECK(v, 1e-1); + GPU_SANITY_CHECK(u, 0.12); + GPU_SANITY_CHECK(v, 0.12); } else { From 2205b2f5bcdd2a48b2f8d76ed3172ee1dcb5f6e9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 15 Aug 2014 13:42:06 +0400 Subject: [PATCH 08/20] increase epsilon for ResizeSameAsHost test (cherry picked from commit 86e12b607416644ec037ca8b34e4eebc6a585165) --- modules/gpu/test/test_resize.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp index 88e6b1cab7..b59e7b33f4 100644 --- a/modules/gpu/test/test_resize.cpp +++ b/modules/gpu/test/test_resize.cpp @@ -195,7 +195,8 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy) cv::Mat dst_gold; cv::resize(src, dst_gold, cv::Size(), coeff, coeff, interpolation); - EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : 1.0); + // CPU test for cv::resize uses 16 as error threshold for CV_8U, we uses 4 as error threshold for CV_8U + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : src.depth() == CV_8U ? 4.0 : 1.0); } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( From 86c1babd03857d968002ad6c69f3905f61a6bf1b Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 15 Aug 2014 13:42:25 +0400 Subject: [PATCH 09/20] use downscaled frames in FGDStatModel test (cherry picked from commit 599f5ef51bfb7a9e71a8c4e50a5f942fb2898cdb) --- modules/gpu/test/test_bgfg.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/modules/gpu/test/test_bgfg.cpp b/modules/gpu/test/test_bgfg.cpp index e08bfb399b..e279bc1417 100644 --- a/modules/gpu/test/test_bgfg.cpp +++ b/modules/gpu/test/test_bgfg.cpp @@ -98,10 +98,13 @@ GPU_TEST_P(FGDStatModel, Update) cap >> frame; ASSERT_FALSE(frame.empty()); - IplImage ipl_frame = frame; + cv::Mat frameSmall; + cv::resize(frame, frameSmall, cv::Size(), 0.5, 0.5); + + IplImage ipl_frame = frameSmall; cv::Ptr model(cvCreateFGDStatModel(&ipl_frame)); - cv::gpu::GpuMat d_frame(frame); + cv::gpu::GpuMat d_frame(frameSmall); cv::gpu::FGDStatModel d_model(out_cn); d_model.create(d_frame); @@ -109,18 +112,17 @@ GPU_TEST_P(FGDStatModel, Update) cv::Mat h_foreground; cv::Mat h_background3; - cv::Mat backgroundDiff; - cv::Mat foregroundDiff; - for (int i = 0; i < 5; ++i) { cap >> frame; ASSERT_FALSE(frame.empty()); - ipl_frame = frame; + cv::resize(frame, frameSmall, cv::Size(), 0.5, 0.5); + + ipl_frame = frameSmall; int gold_count = cvUpdateBGStatModel(&ipl_frame, model); - d_frame.upload(frame); + d_frame.upload(frameSmall); int count = d_model.update(d_frame); From c821cb148959a6b7203e485686eaabfc1318155a Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 15 Aug 2014 14:10:15 +0400 Subject: [PATCH 10/20] fix BGR->BGR5x5 color convertion (cherry picked from commit 62f27b28edb6406b6cf8f2c16370187ce8c24e30) --- .../gpu/device/detail/color_detail.hpp | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp index 5b422849bd..f938b90801 100644 --- a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp +++ b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp @@ -160,16 +160,12 @@ namespace cv { namespace gpu { namespace device template struct RGB2RGB5x5Converter; template struct RGB2RGB5x5Converter<6, bidx> { - static __device__ __forceinline__ ushort cvt(const uchar3& src) + template + static __device__ __forceinline__ ushort cvt(const T& src) { - return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8)); - } - - static __device__ __forceinline__ ushort cvt(uint src) - { - uint b = 0xffu & (src >> (bidx * 8)); - uint g = 0xffu & (src >> 8); - uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + uint b = bidx == 0 ? src.x : src.z; + uint g = src.y; + uint r = bidx == 0 ? src.z : src.x; return (ushort)((b >> 3) | ((g & ~3) << 3) | ((r & ~7) << 8)); } }; @@ -178,22 +174,25 @@ namespace cv { namespace gpu { namespace device { static __device__ __forceinline__ ushort cvt(const uchar3& src) { - return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7)); + uint b = bidx == 0 ? src.x : src.z; + uint g = src.y; + uint r = bidx == 0 ? src.z : src.x; + return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7)); } - static __device__ __forceinline__ ushort cvt(uint src) + static __device__ __forceinline__ ushort cvt(const uchar4& src) { - uint b = 0xffu & (src >> (bidx * 8)); - uint g = 0xffu & (src >> 8); - uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); - uint a = 0xffu & (src >> 24); + uint b = bidx == 0 ? src.x : src.z; + uint g = src.y; + uint r = bidx == 0 ? src.z : src.x; + uint a = src.w; return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7) | (a * 0x8000)); } }; template struct RGB2RGB5x5; - template struct RGB2RGB5x5<3, bidx,green_bits> : unary_function + template struct RGB2RGB5x5<3, bidx, green_bits> : unary_function { __device__ __forceinline__ ushort operator()(const uchar3& src) const { @@ -204,9 +203,9 @@ namespace cv { namespace gpu { namespace device __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} }; - template struct RGB2RGB5x5<4, bidx,green_bits> : unary_function + template struct RGB2RGB5x5<4, bidx, green_bits> : unary_function { - __device__ __forceinline__ ushort operator()(uint src) const + __device__ __forceinline__ ushort operator()(const uchar4& src) const { return RGB2RGB5x5Converter::cvt(src); } From 77585bf8af195ec86fd6869c2a39533f9ff5ed73 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Fri, 22 Aug 2014 10:51:52 +0400 Subject: [PATCH 11/20] Several fixes for lintian varnings (cherry picked from commit 634ffed488052efb5cab1fb5cdd90d192f0121a9) --- cmake/OpenCVPackaging.cmake | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmake/OpenCVPackaging.cmake b/cmake/OpenCVPackaging.cmake index 22dbf6b1bc..8cd94ea24b 100644 --- a/cmake/OpenCVPackaging.cmake +++ b/cmake/OpenCVPackaging.cmake @@ -1,3 +1,6 @@ +# Use patched version of CPACK to build accurate set of Debian packages +# https://github.com/asmorkalov/CMake/tree/deb_generator_improvement + if(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake") set(CPACK_set_DESTDIR "on") @@ -18,6 +21,8 @@ OpenCV makes it easy for businesses to utilize and modify the code.") set(CPACK_PACKAGE_VERSION "${OPENCV_VCSVERSION}") endif(NOT OPENCV_CUSTOM_PACKAGE_INFO) +set(CPACK_STRIP_FILES 1) + #arch if(X86) set(CPACK_DEBIAN_ARCHITECTURE "i386") @@ -90,24 +95,31 @@ endif() if(NOT OPENCV_CUSTOM_PACKAGE_INFO) set(CPACK_COMPONENT_libs_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}") set(CPACK_COMPONENT_libs_DESCRIPTION "Open Computer Vision Library") + set(CPACK_COMPONENT_libs_SECTION "libs") set(CPACK_COMPONENT_python_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-python") set(CPACK_COMPONENT_python_DESCRIPTION "Python bindings for Open Source Computer Vision Library") + set(CPACK_COMPONENT_python_SECTION "python") set(CPACK_COMPONENT_java_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-java") set(CPACK_COMPONENT_java_DESCRIPTION "Java bindings for Open Source Computer Vision Library") + set(CPACK_COMPONENT_java_SECTION "java") set(CPACK_COMPONENT_dev_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-dev") set(CPACK_COMPONENT_dev_DESCRIPTION "Development files for Open Source Computer Vision Library") + set(CPACK_COMPONENT_dev_SECTION "libdevel") set(CPACK_COMPONENT_docs_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-docs") set(CPACK_COMPONENT_docs_DESCRIPTION "Documentation for Open Source Computer Vision Library") + set(CPACK_COMPONENT_docs_SECTION "doc") set(CPACK_COMPONENT_samples_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-samples") set(CPACK_COMPONENT_samples_DESCRIPTION "Samples for Open Source Computer Vision Library") + set(CPACK_COMPONENT_samples_SECTION "devel") set(CPACK_COMPONENT_tests_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-tests") set(CPACK_COMPONENT_tests_DESCRIPTION "Accuracy and performance tests for Open Source Computer Vision Library") + set(CPACK_COMPONENT_tests_SECTION "misc") endif(NOT OPENCV_CUSTOM_PACKAGE_INFO) if(NOT OPENCV_CUSTOM_PACKAGE_LAYOUT) From 7316676c41033ba62daa6780c8ef1307ab568205 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 28 Aug 2014 14:47:26 +0400 Subject: [PATCH 12/20] fix CUDA LUT implementation In CUDA 6.0 there was a bug in NPP LUT implementation (invalid results when src == 255). In CUDA 6.5 the bug was fixed. Replaced NPP LUT call with own implementation (ported from master branch) to be independant from CUDA Toolkit version. (cherry picked from commit eaaa2d27d5ab334c74c2d10550a6097f437fb297) --- modules/gpu/src/arithm.cpp | 82 ++++---------------- modules/gpu/src/cuda/lut.cu | 151 ++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 69 deletions(-) create mode 100644 modules/gpu/src/cuda/lut.cu diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index 53b6aea0c5..df001d037b 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -317,6 +317,11 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) //////////////////////////////////////////////////////////////////////// // LUT +namespace arithm +{ + void lut(PtrStepSzb src, uchar* lut, int lut_cn, PtrStepSzb dst, bool cc30, cudaStream_t stream); +} + void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) { const int cn = src.channels(); @@ -328,82 +333,21 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) dst.create(src.size(), CV_MAKE_TYPE(lut.depth(), cn)); - NppiSize sz; - sz.height = src.rows; - sz.width = src.cols; - - Mat nppLut; - lut.convertTo(nppLut, CV_32S); - - int nValues3[] = {256, 256, 256}; - - Npp32s pLevels[256]; - for (int i = 0; i < 256; ++i) - pLevels[i] = i; - - const Npp32s* pLevels3[3]; - -#if (CUDA_VERSION <= 4020) - pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; -#else - GpuMat d_pLevels; - d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); - pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr(); -#endif + GpuMat d_lut; + d_lut.upload(Mat(1, 256, lut.type(), lut.data)); + int lut_cn = d_lut.channels(); + bool cc30 = deviceSupports(FEATURE_SET_COMPUTE_30); cudaStream_t stream = StreamAccessor::getStream(s); - NppStreamHandler h(stream); - if (src.type() == CV_8UC1) + if (lut_cn == 1) { -#if (CUDA_VERSION <= 4020) - nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, nppLut.ptr(), pLevels, 256) ); -#else - GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); - nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, d_nppLut.ptr(), d_pLevels.ptr(), 256) ); -#endif + arithm::lut(src.reshape(1), d_lut.data, lut_cn, dst.reshape(1), cc30, stream); } - else + else if (lut_cn == 3) { - const Npp32s* pValues3[3]; - - Mat nppLut3[3]; - if (nppLut.channels() == 1) - { -#if (CUDA_VERSION <= 4020) - pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr(); -#else - GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); - pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr(); -#endif - } - else - { - cv::split(nppLut, nppLut3); - -#if (CUDA_VERSION <= 4020) - pValues3[0] = nppLut3[0].ptr(); - pValues3[1] = nppLut3[1].ptr(); - pValues3[2] = nppLut3[2].ptr(); -#else - GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data)); - GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data)); - GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data)); - - pValues3[0] = d_nppLut0.ptr(); - pValues3[1] = d_nppLut1.ptr(); - pValues3[2] = d_nppLut2.ptr(); -#endif - } - - nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, pValues3, pLevels3, nValues3) ); + arithm::lut(src, d_lut.data, lut_cn, dst, cc30, stream); } - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/cuda/lut.cu b/modules/gpu/src/cuda/lut.cu new file mode 100644 index 0000000000..be5efeca2b --- /dev/null +++ b/modules/gpu/src/cuda/lut.cu @@ -0,0 +1,151 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/transform.hpp" +#include "opencv2/gpu/device/functional.hpp" + +using namespace cv::gpu; +using namespace cv::gpu::device; + +namespace +{ + texture texLutTable; + + struct LutC1 : public unary_function + { + typedef uchar value_type; + typedef uchar index_type; + + cudaTextureObject_t texLutTableObj; + + __device__ __forceinline__ uchar operator ()(uchar x) const + { + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 300) + // Use the texture reference + return tex1Dfetch(texLutTable, x); + #else + // Use the texture object + return tex1Dfetch(texLutTableObj, x); + #endif + } + }; + struct LutC3 : public unary_function + { + typedef uchar3 value_type; + typedef uchar3 index_type; + + cudaTextureObject_t texLutTableObj; + + __device__ __forceinline__ uchar3 operator ()(const uchar3& x) const + { + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 300) + // Use the texture reference + return make_uchar3(tex1Dfetch(texLutTable, x.x * 3), tex1Dfetch(texLutTable, x.y * 3 + 1), tex1Dfetch(texLutTable, x.z * 3 + 2)); + #else + // Use the texture object + return make_uchar3(tex1Dfetch(texLutTableObj, x.x * 3), tex1Dfetch(texLutTableObj, x.y * 3 + 1), tex1Dfetch(texLutTableObj, x.z * 3 + 2)); + #endif + } + }; +} + +namespace arithm +{ + void lut(PtrStepSzb src, uchar* lut, int lut_cn, PtrStepSzb dst, bool cc30, cudaStream_t stream) + { + cudaTextureObject_t texLutTableObj; + + if (cc30) + { + // Use the texture object + cudaResourceDesc texRes; + std::memset(&texRes, 0, sizeof(texRes)); + texRes.resType = cudaResourceTypeLinear; + texRes.res.linear.devPtr = lut; + texRes.res.linear.desc = cudaCreateChannelDesc(); + texRes.res.linear.sizeInBytes = 256 * lut_cn * sizeof(uchar); + + cudaTextureDesc texDescr; + std::memset(&texDescr, 0, sizeof(texDescr)); + + cudaSafeCall( cudaCreateTextureObject(&texLutTableObj, &texRes, &texDescr, 0) ); + } + else + { + // Use the texture reference + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture(0, &texLutTable, lut, &desc) ); + } + + if (lut_cn == 1) + { + LutC1 op; + op.texLutTableObj = texLutTableObj; + + transform((PtrStepSz) src, (PtrStepSz) dst, op, WithOutMask(), stream); + } + else if (lut_cn == 3) + { + LutC3 op; + op.texLutTableObj = texLutTableObj; + + transform((PtrStepSz) src, (PtrStepSz) dst, op, WithOutMask(), stream); + } + + if (cc30) + { + // Use the texture object + cudaSafeCall( cudaDestroyTextureObject(texLutTableObj) ); + } + else + { + // Use the texture reference + cudaSafeCall( cudaUnbindTexture(texLutTable) ); + } + } +} + +#endif From 00575b346d896d095cf65928a6e976bf5c103c5e Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Wed, 20 Aug 2014 11:09:21 +0400 Subject: [PATCH 13/20] Fixed range for 'v' channel for 8U images. (cherry picked from commit b027eac173ed9bc610f08d12a05c90fde395a07e) --- modules/imgproc/doc/miscellaneous_transformations.rst | 2 +- modules/imgproc/src/color.cpp | 4 ++-- modules/imgproc/test/test_color.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/imgproc/doc/miscellaneous_transformations.rst b/modules/imgproc/doc/miscellaneous_transformations.rst index e525f726da..3f6dcfcc1e 100644 --- a/modules/imgproc/doc/miscellaneous_transformations.rst +++ b/modules/imgproc/doc/miscellaneous_transformations.rst @@ -383,7 +383,7 @@ The function can do the following transformations: .. math:: - L \leftarrow 255/100 L, \; u \leftarrow 255/354 (u + 134), \; v \leftarrow 255/256 (v + 140) + L \leftarrow 255/100 L, \; u \leftarrow 255/354 (u + 134), \; v \leftarrow 255/262 (v + 140) * 16-bit images (currently not supported) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 08f27aef97..f27d04d01e 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -2044,7 +2044,7 @@ struct RGB2Luv_b { dst[j] = saturate_cast(buf[j]*2.55f); dst[j+1] = saturate_cast(buf[j+1]*0.72033898305084743f + 96.525423728813564f); - dst[j+2] = saturate_cast(buf[j+2]*0.99609375f + 139.453125f); + dst[j+2] = saturate_cast(buf[j+2]*0.9732824427480916f + 136.259541984732824f); } } } @@ -2076,7 +2076,7 @@ struct Luv2RGB_b { buf[j] = src[j]*(100.f/255.f); buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f); - buf[j+2] = (float)(src[j+2]*1.003921568627451f - 140.f); + buf[j+2] = (float)(src[j+2]*1.027450980392157f - 140.f); } cvt(buf, buf, dn); diff --git a/modules/imgproc/test/test_color.cpp b/modules/imgproc/test/test_color.cpp index 55499855c4..47f79f8ec0 100644 --- a/modules/imgproc/test/test_color.cpp +++ b/modules/imgproc/test/test_color.cpp @@ -1168,8 +1168,8 @@ void CV_ColorLuvTest::convert_row_bgr2abc_32f_c3( const float* src_row, float* d { u_scale = 0.720338983f; u_bias = 96.5254237f; - v_scale = 0.99609375f; - v_bias = 139.453125f; + v_scale = 0.973282442f; + v_bias = 136.2595419f; } for( j = 0; j < n*3; j += 3 ) @@ -1221,8 +1221,8 @@ void CV_ColorLuvTest::convert_row_abc2bgr_32f_c3( const float* src_row, float* d { u_scale = 1.f/0.720338983f; u_bias = 96.5254237f; - v_scale = 1.f/0.99609375f; - v_bias = 139.453125f; + v_scale = 1.f/0.973282442f; + v_bias = 136.2595419f; } for( j = 0; j < n*3; j += 3 ) From bb93c5394876d1e11ca86e5fd5d4e3544a33b1c7 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 2 Sep 2014 11:54:54 +0400 Subject: [PATCH 14/20] OpenCV version++. --- modules/core/include/opencv2/core/version.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp index 63c2935282..2107552b71 100644 --- a/modules/core/include/opencv2/core/version.hpp +++ b/modules/core/include/opencv2/core/version.hpp @@ -50,7 +50,7 @@ #define CV_VERSION_EPOCH 2 #define CV_VERSION_MAJOR 4 #define CV_VERSION_MINOR 9 -#define CV_VERSION_REVISION 0 +#define CV_VERSION_REVISION 1 #define CVAUX_STR_EXP(__A) #__A #define CVAUX_STR(__A) CVAUX_STR_EXP(__A) From 562796e41b80c9d22b7f8f942a432b22de3d05de Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 1 Aug 2014 11:33:29 +0400 Subject: [PATCH 15/20] fix cv::gpu::resize for INTER_LINEAR, now it produces the same result as CPU version (cherry picked from commit da9be8231fc153fd70ac4f4d41091d1653d00fd2) --- modules/gpu/src/cuda/resize.cu | 34 +++++++++++++++++------- modules/gpu/test/test_resize.cpp | 44 +++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index 110e62d036..d679c32263 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -77,8 +77,8 @@ namespace cv { namespace gpu { namespace device if (dst_x < dst.cols && dst_y < dst.rows) { - const float src_x = dst_x * fx; - const float src_y = dst_y * fy; + const float src_x = (dst_x + 0.5f) * fx - 0.5f; + const float src_y = (dst_y + 0.5f) * fy - 0.5f; work_type out = VecTraits::all(0); @@ -86,16 +86,18 @@ namespace cv { namespace gpu { namespace device const int y1 = __float2int_rd(src_y); const int x2 = x1 + 1; const int y2 = y1 + 1; - const int x2_read = ::min(x2, src.cols - 1); - const int y2_read = ::min(y2, src.rows - 1); + const int x1_read = ::max(::min(x1, src.cols - 1), 0); + const int y1_read = ::max(::min(y1, src.rows - 1), 0); + const int x2_read = ::max(::min(x2, src.cols - 1), 0); + const int y2_read = ::max(::min(y2, src.rows - 1), 0); - T src_reg = src(y1, x1); + T src_reg = src(y1_read, x1_read); out = out + src_reg * ((x2 - src_x) * (y2 - src_y)); - src_reg = src(y1, x2_read); + src_reg = src(y1_read, x2_read); out = out + src_reg * ((src_x - x1) * (y2 - src_y)); - src_reg = src(y2_read, x1); + src_reg = src(y2_read, x1_read); out = out + src_reg * ((x2 - src_x) * (src_y - y1)); src_reg = src(y2_read, x2_read); @@ -119,6 +121,20 @@ namespace cv { namespace gpu { namespace device } } + template __global__ void resize_linear(const Ptr2D src, PtrStepSz dst, const float fy, const float fx) + { + const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; + const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; + + if (dst_x < dst.cols && dst_y < dst.rows) + { + const float src_x = (dst_x + 0.5f) * fx - 0.5f; + const float src_y = (dst_y + 0.5f) * fy - 0.5f; + + dst(dst_y, dst_x) = src(src_y, src_x); + } + } + template __global__ void resize_area(const Ptr2D src, PtrStepSz dst) { const int x = blockDim.x * blockIdx.x + threadIdx.x; @@ -231,7 +247,7 @@ namespace cv { namespace gpu { namespace device TextureAccessor texSrc = texAccessor(src, 0, 0); LinearFilter< TextureAccessor > filteredSrc(texSrc); - resize<<>>(filteredSrc, dst, fy, fx); + resize_linear<<>>(filteredSrc, dst, fy, fx); } else { @@ -241,7 +257,7 @@ namespace cv { namespace gpu { namespace device BorderReader, BrdReplicate > brdSrc(texSrc, brd); LinearFilter< BorderReader, BrdReplicate > > filteredSrc(brdSrc); - resize<<>>(filteredSrc, dst, fy, fx); + resize_linear<<>>(filteredSrc, dst, fy, fx); } cudaSafeCall( cudaGetLastError() ); diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp index b59e7b33f4..25f0f0e2bb 100644 --- a/modules/gpu/test/test_resize.cpp +++ b/modules/gpu/test/test_resize.cpp @@ -73,6 +73,28 @@ namespace } } + template class Interpolator> + void resizeLinearImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy) + { + const int cn = src.channels(); + + cv::Size dsize(cv::saturate_cast(src.cols * fx), cv::saturate_cast(src.rows * fy)); + + dst.create(dsize, src.type()); + + float ifx = static_cast(1.0 / fx); + float ify = static_cast(1.0 / fy); + + for (int y = 0; y < dsize.height; ++y) + { + for (int x = 0; x < dsize.width; ++x) + { + for (int c = 0; c < cn; ++c) + dst.at(y, x * cn + c) = Interpolator::getValue(src, (y + 0.5f) * ify - 0.5f, (x + 0.5f) * ifx - 0.5f, c, cv::BORDER_REPLICATE); + } + } + } + void resizeGold(const cv::Mat& src, cv::Mat& dst, double fx, double fy, int interpolation) { typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst, double fx, double fy); @@ -90,12 +112,12 @@ namespace static const func_t linear_funcs[] = { - resizeImpl, - resizeImpl, - resizeImpl, - resizeImpl, - resizeImpl, - resizeImpl + resizeLinearImpl, + resizeLinearImpl, + resizeLinearImpl, + resizeLinearImpl, + resizeLinearImpl, + resizeLinearImpl }; static const func_t cubic_funcs[] = @@ -204,7 +226,15 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(0.3, 0.5), - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_AREA)), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)), + WHOLE_SUBMAT)); + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(0.3, 0.5, 1.5, 2.0), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), WHOLE_SUBMAT)); #endif // HAVE_CUDA From cb1e9adc633772c09c0613b57b2d2b19316e7376 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Mon, 18 Aug 2014 13:01:19 +0400 Subject: [PATCH 16/20] Fixed getConversionInfo() for YUV2RGBA_* conversions (cherry picked from commit 023a42ba55859932861f4a849fbec4cf1bd3ead7) --- modules/imgproc/perf/perf_cvt_color.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/imgproc/perf/perf_cvt_color.cpp b/modules/imgproc/perf/perf_cvt_color.cpp index 966a442f36..86100ee834 100644 --- a/modules/imgproc/perf/perf_cvt_color.cpp +++ b/modules/imgproc/perf/perf_cvt_color.cpp @@ -155,14 +155,14 @@ ChPair getConversionInfo(int cvtMode) case CV_BGR5552BGR: case CV_BGR5552RGB: case CV_BGR5652BGR: case CV_BGR5652RGB: case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: - case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY: case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU: - case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: - case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU: return ChPair(2,3); case CV_BGR5552BGRA: case CV_BGR5552RGBA: case CV_BGR5652BGRA: case CV_BGR5652RGBA: + case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY: + case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: + case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU: return ChPair(2,4); case CV_BGR2GRAY: case CV_RGB2GRAY: case CV_RGB2YUV_IYUV: case CV_RGB2YUV_YV12: From fb81e4df1c6c5881d3a3f09b5762ee9d7c9741f3 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 21 Aug 2014 14:14:06 +0400 Subject: [PATCH 17/20] fix CUDA cvtColor after corresponding change in CPU version see https://github.com/Itseez/opencv/pull/3137 (cherry picked from commit ebe36d6e7c99430b16f61e6b2ee832b91f155ff7) --- .../gpu/include/opencv2/gpu/device/detail/color_detail.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp index f938b90801..a8952f1e61 100644 --- a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp +++ b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp @@ -1821,7 +1821,7 @@ namespace cv { namespace gpu { namespace device dst.x = saturate_cast(dstf.x * 2.55f); dst.y = saturate_cast(dstf.y * 0.72033898305084743f + 96.525423728813564f); - dst.z = saturate_cast(dstf.z * 0.99609375f + 139.453125f); + dst.z = saturate_cast(dstf.z * 0.9732824427480916f + 136.259541984732824f); } template struct RGB2Luv; @@ -1915,7 +1915,7 @@ namespace cv { namespace gpu { namespace device srcf.x = src.x * (100.f / 255.f); srcf.y = src.y * 1.388235294117647f - 134.f; - srcf.z = src.z * 1.003921568627451f - 140.f; + srcf.z = src.z * 1.027450980392157f - 140.f; Luv2RGBConvert_f(srcf, dstf); From 4664a339ae4131589223e9cad1a16f5b0dbeb88e Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 8 May 2014 15:55:30 +0400 Subject: [PATCH 18/20] Fix non-Android cross compilation with OpenCVConfig.cmake (cherry picked from commit e8376c789d675d9d4b536066320e2981b9981b49) --- cmake/templates/OpenCVConfig.cmake.in | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in index 3b011109aa..6468aea5b3 100644 --- a/cmake/templates/OpenCVConfig.cmake.in +++ b/cmake/templates/OpenCVConfig.cmake.in @@ -60,7 +60,11 @@ set(OpenCV_USE_CUFFT @HAVE_CUFFT@) set(OpenCV_USE_NVCUVID @HAVE_NVCUVID@) # Android API level from which OpenCV has been compiled is remembered -set(OpenCV_ANDROID_NATIVE_API_LEVEL @OpenCV_ANDROID_NATIVE_API_LEVEL_CONFIGCMAKE@) +if(ANDROID) + set(OpenCV_ANDROID_NATIVE_API_LEVEL @OpenCV_ANDROID_NATIVE_API_LEVEL_CONFIGCMAKE@) +else + set(OpenCV_ANDROID_NATIVE_API_LEVEL 0) +endif() # Some additional settings are required if OpenCV is built as static libs set(OpenCV_SHARED @BUILD_SHARED_LIBS@) @@ -71,8 +75,8 @@ set(OpenCV_USE_MANGLED_PATHS @OpenCV_USE_MANGLED_PATHS_CONFIGCMAKE@) # Extract the directory where *this* file has been installed (determined at cmake run-time) get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_FILE}" PATH CACHE) -if(NOT WIN32 OR OpenCV_ANDROID_NATIVE_API_LEVEL GREATER 0) - if(OpenCV_ANDROID_NATIVE_API_LEVEL GREATER 0) +if(NOT WIN32 OR ANDROID) + if(ANDROID) set(OpenCV_INSTALL_PATH "${OpenCV_CONFIG_PATH}/../../..") else() set(OpenCV_INSTALL_PATH "${OpenCV_CONFIG_PATH}/../..") From 10a2c51c52e19678264adfe7920a507e57835248 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 13 May 2014 11:37:21 +0400 Subject: [PATCH 19/20] fix OpenCVConfig.cmake template - missing parentheses (cherry picked from commit 67b562d543154b29e3b5f8f9c79a03790da40712) --- cmake/templates/OpenCVConfig.cmake.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in index 6468aea5b3..6d1c1a990b 100644 --- a/cmake/templates/OpenCVConfig.cmake.in +++ b/cmake/templates/OpenCVConfig.cmake.in @@ -62,7 +62,7 @@ set(OpenCV_USE_NVCUVID @HAVE_NVCUVID@) # Android API level from which OpenCV has been compiled is remembered if(ANDROID) set(OpenCV_ANDROID_NATIVE_API_LEVEL @OpenCV_ANDROID_NATIVE_API_LEVEL_CONFIGCMAKE@) -else +else() set(OpenCV_ANDROID_NATIVE_API_LEVEL 0) endif() From e929dd4d98cc705a5434125b12322a491d431480 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 2 Sep 2014 20:24:49 +0400 Subject: [PATCH 20/20] GPU MatchTemplate32F test epsilon increased. --- modules/gpu/test/test_imgproc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index 9ce32d12b8..2fde0d9d84 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -738,7 +738,7 @@ GPU_TEST_P(MatchTemplate32F, Regression) cv::Mat dst_gold; cv::matchTemplate(image, templ, dst_gold, method); - EXPECT_MAT_NEAR(dst_gold, dst, templ_size.area() * 1e-1); + EXPECT_MAT_NEAR(dst_gold, dst, templ_size.area() * 1.1e-1); } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(