From f9ff9c56183e92cfb54992f7a07dcc18f4f86e65 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 7 May 2014 13:15:19 +0400
Subject: [PATCH 01/20] fix cv::subtract function:

call dst.create(...) before using it(cherry picked from commit 4c66614e07319b66537b6327e2dcf871c5aa6829)
---
 modules/core/src/arithm.cpp       |  6 +++++-
 modules/core/test/test_arithm.cpp | 10 ++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index 0517a5fae6..f0ef920554 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -1562,8 +1562,12 @@ void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
         if (dtype == -1 && dst.fixedType())
             dtype = dst.depth();
 
-        if (!dst.fixedType() || dtype == dst.depth())
+        dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels());
+
+        if (!dst.fixedType() || dtype == dst.type())
         {
+            dst.create(src1.size(), dtype);
+
             if (dtype == CV_16S)
             {
                 Mat _dst = dst.getMat();
diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp
index a240941847..1687285a60 100644
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@@ -1579,3 +1579,13 @@ TEST_P(Mul1, One)
 }
 
 INSTANTIATE_TEST_CASE_P(Arithm, Mul1, testing::Values(Size(2, 2), Size(1, 1)));
+
+TEST(Subtract8u8u16s, EmptyOutputMat)
+{
+    cv::Mat src1 = cv::Mat::zeros(16, 16, CV_8UC1);
+    cv::Mat src2 = cv::Mat::zeros(16, 16, CV_8UC1);
+    cv::Mat dst;
+    cv::subtract(src1, src2, dst, cv::noArray(), CV_16S);
+    ASSERT_FALSE(dst.empty());
+    ASSERT_EQ(0, cv::countNonZero(dst));
+}

From 942401de162838964f79cd4d6e6aed27ddc1a487 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 7 May 2014 19:52:35 +0400
Subject: [PATCH 02/20] fix output matrix allocation in cv::subtract(cherry
 picked from commit 629461c83652e2416ccb6c8685a0788bb6fb15f5)

---
 modules/core/src/arithm.cpp       | 47 ++++++++++++++++++++-----------
 modules/core/test/test_arithm.cpp | 19 +++++++++----
 2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index f0ef920554..4058856fff 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -1553,43 +1553,58 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst,
     arithm_op(src1, src2, dst, mask, dtype, getAddTab() );
 }
 
-void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
+void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst,
                InputArray mask, int dtype )
 {
 #ifdef HAVE_TEGRA_OPTIMIZATION
-    if (mask.empty() && src1.depth() == CV_8U && src2.depth() == CV_8U)
+    int kind1 = _src1.kind(), kind2 = _src2.kind();
+    Mat src1 = _src1.getMat(), src2 = _src2.getMat();
+    bool src1Scalar = checkScalar(src1, _src2.type(), kind1, kind2);
+    bool src2Scalar = checkScalar(src2, _src1.type(), kind2, kind1);
+
+    if (!src1Scalar && !src2Scalar && mask.empty() &&
+        src1.depth() == CV_8U && src2.depth() == CV_8U)
     {
-        if (dtype == -1 && dst.fixedType())
-            dtype = dst.depth();
-
-        dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels());
-
-        if (!dst.fixedType() || dtype == dst.type())
+        if (dtype == -1)
         {
-            dst.create(src1.size(), dtype);
+            if (_dst.fixedType())
+            {
+                dtype = _dst.depth();
+            }
+            else
+            {
+                dtype = src1.depth();
+            }
+        }
+
+        dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), _src1.channels());
+
+        if (dtype == _dst.type())
+        {
+            _dst.create(_src1.size(), dtype);
 
             if (dtype == CV_16S)
             {
-                Mat _dst = dst.getMat();
-                if(tegra::subtract_8u8u16s(src1.getMat(), src2.getMat(), _dst))
+                Mat dst = _dst.getMat();
+                if(tegra::subtract_8u8u16s(src1, src2, dst))
                     return;
             }
             else if (dtype == CV_32F)
             {
-                Mat _dst = dst.getMat();
-                if(tegra::subtract_8u8u32f(src1.getMat(), src2.getMat(), _dst))
+                Mat dst = _dst.getMat();
+                if(tegra::subtract_8u8u32f(src1, src2, dst))
                     return;
             }
             else if (dtype == CV_8S)
             {
-                Mat _dst = dst.getMat();
-                if(tegra::subtract_8u8u8s(src1.getMat(), src2.getMat(), _dst))
+                Mat dst = _dst.getMat();
+                if(tegra::subtract_8u8u8s(src1, src2, dst))
                     return;
             }
         }
     }
 #endif
-    arithm_op(src1, src2, dst, mask, dtype, getSubTab() );
+    arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab() );
 }
 
 void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp
index 1687285a60..68b06267b2 100644
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@@ -1580,12 +1580,21 @@ TEST_P(Mul1, One)
 
 INSTANTIATE_TEST_CASE_P(Arithm, Mul1, testing::Values(Size(2, 2), Size(1, 1)));
 
-TEST(Subtract8u8u16s, EmptyOutputMat)
+TEST(Subtract, EmptyOutputMat)
 {
     cv::Mat src1 = cv::Mat::zeros(16, 16, CV_8UC1);
     cv::Mat src2 = cv::Mat::zeros(16, 16, CV_8UC1);
-    cv::Mat dst;
-    cv::subtract(src1, src2, dst, cv::noArray(), CV_16S);
-    ASSERT_FALSE(dst.empty());
-    ASSERT_EQ(0, cv::countNonZero(dst));
+    cv::Mat dst1, dst2, dst3;
+
+    cv::subtract(src1, src2, dst1, cv::noArray(), CV_16S);
+    cv::subtract(src1, src2, dst2);
+    cv::subtract(src1, cv::Scalar::all(0), dst3, cv::noArray(), CV_16S);
+
+    ASSERT_FALSE(dst1.empty());
+    ASSERT_FALSE(dst2.empty());
+    ASSERT_FALSE(dst3.empty());
+
+    ASSERT_EQ(0, cv::countNonZero(dst1));
+    ASSERT_EQ(0, cv::countNonZero(dst2));
+    ASSERT_EQ(0, cv::countNonZero(dst3));
 }

From 3c0b0b0f94deeac7182d2d56b0c1224b70acbeec Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Mon, 14 Jul 2014 14:26:50 +0400
Subject: [PATCH 03/20] Build fixes for CUDA 6.5 (cherry picked from commit
 60a5ada4541e777bd2ad3fe0322180706351e58b)

---
 .../src/nvidia/core/NCVPixelOperations.hpp    | 62 +++++++++----------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp
index c1e06b434e..1c95414734 100644
--- a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp
+++ b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp
@@ -48,24 +48,24 @@
 #include "NCV.hpp"
 
 template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
-template<> static inline __host__ __device__ Ncv8u  _pixMaxVal<Ncv8u>()  {return UCHAR_MAX;}
-template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
-template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return  UINT_MAX;}
-template<> static inline __host__ __device__ Ncv8s  _pixMaxVal<Ncv8s>()  {return  SCHAR_MAX;}
-template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return  SHRT_MAX;}
-template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return   INT_MAX;}
-template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return   FLT_MAX;}
-template<> static inline __host__ __device__ Ncv64f _pixMaxVal<Ncv64f>() {return   DBL_MAX;}
+template<>  inline __host__ __device__ Ncv8u  _pixMaxVal<Ncv8u>()  {return UCHAR_MAX;}
+template<>  inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
+template<>  inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return  UINT_MAX;}
+template<>  inline __host__ __device__ Ncv8s  _pixMaxVal<Ncv8s>()  {return  SCHAR_MAX;}
+template<>  inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return  SHRT_MAX;}
+template<>  inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return   INT_MAX;}
+template<>  inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return   FLT_MAX;}
+template<>  inline __host__ __device__ Ncv64f _pixMaxVal<Ncv64f>() {return   DBL_MAX;}
 
 template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
-template<> static inline __host__ __device__ Ncv8u  _pixMinVal<Ncv8u>()  {return 0;}
-template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
-template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
-template<> static inline __host__ __device__ Ncv8s  _pixMinVal<Ncv8s>()  {return SCHAR_MIN;}
-template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
-template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
-template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
-template<> static inline __host__ __device__ Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
+template<>  inline __host__ __device__ Ncv8u  _pixMinVal<Ncv8u>()  {return 0;}
+template<>  inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
+template<>  inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
+template<>  inline __host__ __device__ Ncv8s  _pixMinVal<Ncv8s>()  {return SCHAR_MIN;}
+template<>  inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
+template<>  inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
+template<>  inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
+template<>  inline __host__ __device__ Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
 
 template<typename Tvec> struct TConvVec2Base;
 template<> struct TConvVec2Base<uchar1>  {typedef Ncv8u TBase;};
@@ -116,21 +116,21 @@ template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Ti
 template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
 
 template<typename Tout> inline Tout _pixMakeZero();
-template<> static inline __host__ __device__ uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
-template<> static inline __host__ __device__ uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
-template<> static inline __host__ __device__ uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
-template<> static inline __host__ __device__ ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
-template<> static inline __host__ __device__ ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
-template<> static inline __host__ __device__ ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
-template<> static inline __host__ __device__ uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
-template<> static inline __host__ __device__ uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
-template<> static inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
-template<> static inline __host__ __device__ float1 _pixMakeZero<float1>() {return make_float1(0.f);}
-template<> static inline __host__ __device__ float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
-template<> static inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
-template<> static inline __host__ __device__ double1 _pixMakeZero<double1>() {return make_double1(0.);}
-template<> static inline __host__ __device__ double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
-template<> static inline __host__ __device__ double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
+template<>  inline __host__ __device__ uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
+template<>  inline __host__ __device__ uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
+template<>  inline __host__ __device__ uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
+template<>  inline __host__ __device__ ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
+template<>  inline __host__ __device__ ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
+template<>  inline __host__ __device__ ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
+template<>  inline __host__ __device__ uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
+template<>  inline __host__ __device__ uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
+template<>  inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
+template<>  inline __host__ __device__ float1 _pixMakeZero<float1>() {return make_float1(0.f);}
+template<>  inline __host__ __device__ float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
+template<>  inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
+template<>  inline __host__ __device__ double1 _pixMakeZero<double1>() {return make_double1(0.);}
+template<>  inline __host__ __device__ double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
+template<>  inline __host__ __device__ double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
 
 static inline __host__ __device__ uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
 static inline __host__ __device__ uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}

From ca9c52ac9778d01eba933a786b4303c79d0162be Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Mon, 14 Jul 2014 21:27:23 +0400
Subject: [PATCH 04/20] Deb package build fix for CUDA 6.5 and newer.

(cherry picked from commit e650d87e470b2e6a8f87ad4dd81977748a136aee)
---
 cmake/OpenCVPackaging.cmake | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/cmake/OpenCVPackaging.cmake b/cmake/OpenCVPackaging.cmake
index 91f5940960..65e6c13abc 100644
--- a/cmake/OpenCVPackaging.cmake
+++ b/cmake/OpenCVPackaging.cmake
@@ -68,9 +68,23 @@ set(CPACK_COMPONENT_tests_DEPENDS libs)
 
 if(HAVE_CUDA)
   string(REPLACE "." "-" cuda_version_suffix ${CUDA_VERSION})
-  set(CPACK_DEB_libs_PACKAGE_DEPENDS "cuda-core-libs-${cuda_version_suffix}, cuda-extra-libs-${cuda_version_suffix}")
+  if(${CUDA_VERSION} VERSION_LESS "6.5")
+    set(CPACK_DEB_libs_PACKAGE_DEPENDS "cuda-core-libs-${cuda_version_suffix}, cuda-extra-libs-${cuda_version_suffix}")
+    set(CPACK_DEB_dev_PACKAGE_DEPENDS "cuda-headers-${cuda_version_suffix}")
+  else()
+    set(CPACK_DEB_libs_PACKAGE_DEPENDS "cuda-cudart-${cuda_version_suffix}, cuda-npp-${cuda_version_suffix}")
+    set(CPACK_DEB_dev_PACKAGE_DEPENDS "cuda-cudart-dev-${cuda_version_suffix}, cuda-npp-dev-${cuda_version_suffix}")
+    if(HAVE_CUFFT)
+      set(CPACK_DEB_libs_PACKAGE_DEPENDS "${CPACK_DEB_libs_PACKAGE_DEPENDS}, cuda-cufft-${cuda_version_suffix}")
+      set(CPACK_DEB_dev_PACKAGE_DEPENDS "${CPACK_DEB_dev_PACKAGE_DEPENDS}, cuda-cufft-dev-${cuda_version_suffix}")
+    endif()
+    if(HAVE_HAVE_CUBLAS)
+      set(CPACK_DEB_libs_PACKAGE_DEPENDS "${CPACK_DEB_libs_PACKAGE_DEPENDS}, cuda-cublas-${cuda_version_suffix}")
+      set(CPACK_DEB_dev_PACKAGE_DEPENDS "${CPACK_DEB_dev_PACKAGE_DEPENDS}, cuda-cublas-dev-${cuda_version_suffix}")
+    endif()
+  endif()
   set(CPACK_COMPONENT_dev_DEPENDS libs)
-  set(CPACK_DEB_dev_PACKAGE_DEPENDS "cuda-headers-${cuda_version_suffix}")
+
 endif()
 
 if(NOT OPENCV_CUSTOM_PACKAGE_INFO)

From f8758da289c092fcce10baa4723d6388c4e41cb3 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Mon, 14 Jul 2014 23:58:05 +0400
Subject: [PATCH 05/20] More accurate deb package build fix for CUDA 6.5 and
 newer.

(cherry picked from commit b2790973a32eb662c165a921afe03dbfd2c65269)
---
 cmake/OpenCVPackaging.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/OpenCVPackaging.cmake b/cmake/OpenCVPackaging.cmake
index 65e6c13abc..22dbf6b1bc 100644
--- a/cmake/OpenCVPackaging.cmake
+++ b/cmake/OpenCVPackaging.cmake
@@ -68,7 +68,7 @@ set(CPACK_COMPONENT_tests_DEPENDS libs)
 
 if(HAVE_CUDA)
   string(REPLACE "." "-" cuda_version_suffix ${CUDA_VERSION})
-  if(${CUDA_VERSION} VERSION_LESS "6.5")
+  if(CUDA_VERSION VERSION_LESS "6.5")
     set(CPACK_DEB_libs_PACKAGE_DEPENDS "cuda-core-libs-${cuda_version_suffix}, cuda-extra-libs-${cuda_version_suffix}")
     set(CPACK_DEB_dev_PACKAGE_DEPENDS "cuda-headers-${cuda_version_suffix}")
   else()
@@ -83,8 +83,8 @@ if(HAVE_CUDA)
       set(CPACK_DEB_dev_PACKAGE_DEPENDS "${CPACK_DEB_dev_PACKAGE_DEPENDS}, cuda-cublas-dev-${cuda_version_suffix}")
     endif()
   endif()
-  set(CPACK_COMPONENT_dev_DEPENDS libs)
 
+  set(CPACK_COMPONENT_dev_DEPENDS libs)
 endif()
 
 if(NOT OPENCV_CUSTOM_PACKAGE_INFO)

From 628b23acc854232f95fbbc3f5158a4f84e53680d Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Wed, 13 Aug 2014 13:46:43 +0400
Subject: [PATCH 06/20] GCC 4.8 warning array subscript is above array bounds
 fixed.

(cherry picked from commit e11333dd831ef43f962e513e26f3dfa6dc789155)
---
 modules/imgproc/src/floodfill.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/imgproc/src/floodfill.cpp b/modules/imgproc/src/floodfill.cpp
index db2563ddea..87fca7ce2d 100644
--- a/modules/imgproc/src/floodfill.cpp
+++ b/modules/imgproc/src/floodfill.cpp
@@ -470,6 +470,12 @@ cvFloodFill( CvArr* arr, CvPoint seed_point,
     depth = CV_MAT_DEPTH(type);
     cn = CV_MAT_CN(type);
 
+    if ( (cn != 1) && (cn != 3) )
+    {
+        CV_Error( CV_StsBadArg, "Number of channels in input image must be 1 or 3" );
+        return;
+    }
+
     if( connectivity == 0 )
         connectivity = 4;
     else if( connectivity != 4 && connectivity != 8 )

From 975e40f1c0552c776a3b71dcf0358c8dfd8ecbe0 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 15 Aug 2014 13:41:47 +0400
Subject: [PATCH 07/20] increase epsilon for TVL1 sanity test (cherry picked
 from commit 5dff283b39139008923eeb81d0ef5d351ac04522)

---
 modules/gpu/perf/perf_video.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp
index 6c7a648221..16e0844106 100644
--- a/modules/gpu/perf/perf_video.cpp
+++ b/modules/gpu/perf/perf_video.cpp
@@ -427,8 +427,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
 
         TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
 
-        GPU_SANITY_CHECK(u, 1e-1);
-        GPU_SANITY_CHECK(v, 1e-1);
+        GPU_SANITY_CHECK(u, 0.12);
+        GPU_SANITY_CHECK(v, 0.12);
     }
     else
     {

From 2205b2f5bcdd2a48b2f8d76ed3172ee1dcb5f6e9 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 15 Aug 2014 13:42:06 +0400
Subject: [PATCH 08/20] increase epsilon for ResizeSameAsHost test (cherry
 picked from commit 86e12b607416644ec037ca8b34e4eebc6a585165)

---
 modules/gpu/test/test_resize.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp
index 88e6b1cab7..b59e7b33f4 100644
--- a/modules/gpu/test/test_resize.cpp
+++ b/modules/gpu/test/test_resize.cpp
@@ -195,7 +195,8 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy)
     cv::Mat dst_gold;
     cv::resize(src, dst_gold, cv::Size(), coeff, coeff, interpolation);
 
-    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : 1.0);
+    // CPU test for cv::resize uses 16 as error threshold for CV_8U, we uses 4 as error threshold for CV_8U
+    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : src.depth() == CV_8U ? 4.0 : 1.0);
 }
 
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine(

From 86c1babd03857d968002ad6c69f3905f61a6bf1b Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 15 Aug 2014 13:42:25 +0400
Subject: [PATCH 09/20] use downscaled frames in FGDStatModel test (cherry
 picked from commit 599f5ef51bfb7a9e71a8c4e50a5f942fb2898cdb)

---
 modules/gpu/test/test_bgfg.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/modules/gpu/test/test_bgfg.cpp b/modules/gpu/test/test_bgfg.cpp
index e08bfb399b..e279bc1417 100644
--- a/modules/gpu/test/test_bgfg.cpp
+++ b/modules/gpu/test/test_bgfg.cpp
@@ -98,10 +98,13 @@ GPU_TEST_P(FGDStatModel, Update)
     cap >> frame;
     ASSERT_FALSE(frame.empty());
 
-    IplImage ipl_frame = frame;
+    cv::Mat frameSmall;
+    cv::resize(frame, frameSmall, cv::Size(), 0.5, 0.5);
+
+    IplImage ipl_frame = frameSmall;
     cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
 
-    cv::gpu::GpuMat d_frame(frame);
+    cv::gpu::GpuMat d_frame(frameSmall);
     cv::gpu::FGDStatModel d_model(out_cn);
     d_model.create(d_frame);
 
@@ -109,18 +112,17 @@ GPU_TEST_P(FGDStatModel, Update)
     cv::Mat h_foreground;
     cv::Mat h_background3;
 
-    cv::Mat backgroundDiff;
-    cv::Mat foregroundDiff;
-
     for (int i = 0; i < 5; ++i)
     {
         cap >> frame;
         ASSERT_FALSE(frame.empty());
 
-        ipl_frame = frame;
+        cv::resize(frame, frameSmall, cv::Size(), 0.5, 0.5);
+
+        ipl_frame = frameSmall;
         int gold_count = cvUpdateBGStatModel(&ipl_frame, model);
 
-        d_frame.upload(frame);
+        d_frame.upload(frameSmall);
 
         int count = d_model.update(d_frame);
 

From c821cb148959a6b7203e485686eaabfc1318155a Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 15 Aug 2014 14:10:15 +0400
Subject: [PATCH 10/20] fix BGR->BGR5x5 color convertion (cherry picked from
 commit 62f27b28edb6406b6cf8f2c16370187ce8c24e30)

---
 .../gpu/device/detail/color_detail.hpp        | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
index 5b422849bd..f938b90801 100644
--- a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
+++ b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
@@ -160,16 +160,12 @@ namespace cv { namespace gpu { namespace device
         template <int green_bits, int bidx> struct RGB2RGB5x5Converter;
         template<int bidx> struct RGB2RGB5x5Converter<6, bidx>
         {
-            static __device__ __forceinline__ ushort cvt(const uchar3& src)
+            template <typename T>
+            static __device__ __forceinline__ ushort cvt(const T& src)
             {
-                return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8));
-            }
-
-            static __device__ __forceinline__ ushort cvt(uint src)
-            {
-                uint b = 0xffu & (src >> (bidx * 8));
-                uint g = 0xffu & (src >> 8);
-                uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
+                uint b = bidx == 0 ? src.x : src.z;
+                uint g = src.y;
+                uint r = bidx == 0 ? src.z : src.x;
                 return (ushort)((b >> 3) | ((g & ~3) << 3) | ((r & ~7) << 8));
             }
         };
@@ -178,22 +174,25 @@ namespace cv { namespace gpu { namespace device
         {
             static __device__ __forceinline__ ushort cvt(const uchar3& src)
             {
-                return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7));
+                uint b = bidx == 0 ? src.x : src.z;
+                uint g = src.y;
+                uint r = bidx == 0 ? src.z : src.x;
+                return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7));
             }
 
-            static __device__ __forceinline__ ushort cvt(uint src)
+            static __device__ __forceinline__ ushort cvt(const uchar4& src)
             {
-                uint b = 0xffu & (src >> (bidx * 8));
-                uint g = 0xffu & (src >> 8);
-                uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
-                uint a = 0xffu & (src >> 24);
+                uint b = bidx == 0 ? src.x : src.z;
+                uint g = src.y;
+                uint r = bidx == 0 ? src.z : src.x;
+                uint a = src.w;
                 return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7) | (a * 0x8000));
             }
         };
 
         template<int scn, int bidx, int green_bits> struct RGB2RGB5x5;
 
-        template<int bidx, int green_bits> struct RGB2RGB5x5<3, bidx,green_bits> : unary_function<uchar3, ushort>
+        template<int bidx, int green_bits> struct RGB2RGB5x5<3, bidx, green_bits> : unary_function<uchar3, ushort>
         {
             __device__ __forceinline__ ushort operator()(const uchar3& src) const
             {
@@ -204,9 +203,9 @@ namespace cv { namespace gpu { namespace device
             __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
         };
 
-        template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx,green_bits> : unary_function<uint, ushort>
+        template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx, green_bits> : unary_function<uchar4, ushort>
         {
-            __device__ __forceinline__ ushort operator()(uint src) const
+            __device__ __forceinline__ ushort operator()(const uchar4& src) const
             {
                 return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
             }

From 77585bf8af195ec86fd6869c2a39533f9ff5ed73 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Fri, 22 Aug 2014 10:51:52 +0400
Subject: [PATCH 11/20] Several fixes for lintian varnings

(cherry picked from commit 634ffed488052efb5cab1fb5cdd90d192f0121a9)
---
 cmake/OpenCVPackaging.cmake | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/cmake/OpenCVPackaging.cmake b/cmake/OpenCVPackaging.cmake
index 22dbf6b1bc..8cd94ea24b 100644
--- a/cmake/OpenCVPackaging.cmake
+++ b/cmake/OpenCVPackaging.cmake
@@ -1,3 +1,6 @@
+# Use patched version of CPACK to build accurate set of Debian packages
+# https://github.com/asmorkalov/CMake/tree/deb_generator_improvement
+
 if(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
 set(CPACK_set_DESTDIR "on")
 
@@ -18,6 +21,8 @@ OpenCV makes it easy for businesses to utilize and modify the code.")
   set(CPACK_PACKAGE_VERSION "${OPENCV_VCSVERSION}")
 endif(NOT OPENCV_CUSTOM_PACKAGE_INFO)
 
+set(CPACK_STRIP_FILES 1)
+
 #arch
 if(X86)
   set(CPACK_DEBIAN_ARCHITECTURE "i386")
@@ -90,24 +95,31 @@ endif()
 if(NOT OPENCV_CUSTOM_PACKAGE_INFO)
   set(CPACK_COMPONENT_libs_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}")
   set(CPACK_COMPONENT_libs_DESCRIPTION "Open Computer Vision Library")
+  set(CPACK_COMPONENT_libs_SECTION "libs")
 
   set(CPACK_COMPONENT_python_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-python")
   set(CPACK_COMPONENT_python_DESCRIPTION "Python bindings for Open Source Computer Vision Library")
+  set(CPACK_COMPONENT_python_SECTION "python")
 
   set(CPACK_COMPONENT_java_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-java")
   set(CPACK_COMPONENT_java_DESCRIPTION "Java bindings for Open Source Computer Vision Library")
+  set(CPACK_COMPONENT_java_SECTION "java")
 
   set(CPACK_COMPONENT_dev_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-dev")
   set(CPACK_COMPONENT_dev_DESCRIPTION "Development files for Open Source Computer Vision Library")
+  set(CPACK_COMPONENT_dev_SECTION "libdevel")
 
   set(CPACK_COMPONENT_docs_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-docs")
   set(CPACK_COMPONENT_docs_DESCRIPTION "Documentation for Open Source Computer Vision Library")
+  set(CPACK_COMPONENT_docs_SECTION "doc")
 
   set(CPACK_COMPONENT_samples_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-samples")
   set(CPACK_COMPONENT_samples_DESCRIPTION "Samples for Open Source Computer Vision Library")
+  set(CPACK_COMPONENT_samples_SECTION "devel")
 
   set(CPACK_COMPONENT_tests_DISPLAY_NAME "lib${CMAKE_PROJECT_NAME}-tests")
   set(CPACK_COMPONENT_tests_DESCRIPTION "Accuracy and performance tests for Open Source Computer Vision Library")
+  set(CPACK_COMPONENT_tests_SECTION "misc")
 endif(NOT OPENCV_CUSTOM_PACKAGE_INFO)
 
 if(NOT OPENCV_CUSTOM_PACKAGE_LAYOUT)

From 7316676c41033ba62daa6780c8ef1307ab568205 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 28 Aug 2014 14:47:26 +0400
Subject: [PATCH 12/20] fix CUDA LUT implementation

In CUDA 6.0 there was a bug in NPP LUT implementation (invalid results when
src == 255). In CUDA 6.5 the bug was fixed.

Replaced NPP LUT call with own implementation (ported from master branch)
to be independant from CUDA Toolkit version.
(cherry picked from commit eaaa2d27d5ab334c74c2d10550a6097f437fb297)
---
 modules/gpu/src/arithm.cpp  |  82 ++++----------------
 modules/gpu/src/cuda/lut.cu | 151 ++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+), 69 deletions(-)
 create mode 100644 modules/gpu/src/cuda/lut.cu

diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp
index 53b6aea0c5..df001d037b 100644
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -317,6 +317,11 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
 ////////////////////////////////////////////////////////////////////////
 // LUT
 
+namespace arithm
+{
+    void lut(PtrStepSzb src, uchar* lut, int lut_cn, PtrStepSzb dst, bool cc30, cudaStream_t stream);
+}
+
 void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
 {
     const int cn = src.channels();
@@ -328,82 +333,21 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
 
     dst.create(src.size(), CV_MAKE_TYPE(lut.depth(), cn));
 
-    NppiSize sz;
-    sz.height = src.rows;
-    sz.width = src.cols;
-
-    Mat nppLut;
-    lut.convertTo(nppLut, CV_32S);
-
-    int nValues3[] = {256, 256, 256};
-
-    Npp32s pLevels[256];
-    for (int i = 0; i < 256; ++i)
-        pLevels[i] = i;
-
-    const Npp32s* pLevels3[3];
-
-#if (CUDA_VERSION <= 4020)
-    pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
-#else
-    GpuMat d_pLevels;
-    d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
-    pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
-#endif
+    GpuMat d_lut;
+    d_lut.upload(Mat(1, 256, lut.type(), lut.data));
 
+    int lut_cn = d_lut.channels();
+    bool cc30 = deviceSupports(FEATURE_SET_COMPUTE_30);
     cudaStream_t stream = StreamAccessor::getStream(s);
-    NppStreamHandler h(stream);
 
-    if (src.type() == CV_8UC1)
+    if (lut_cn == 1)
     {
-#if (CUDA_VERSION <= 4020)
-        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), pLevels, 256) );
-#else
-        GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
-        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), d_pLevels.ptr<Npp32s>(), 256) );
-#endif
+        arithm::lut(src.reshape(1), d_lut.data, lut_cn, dst.reshape(1), cc30, stream);
     }
-    else
+    else if (lut_cn == 3)
     {
-        const Npp32s* pValues3[3];
-
-        Mat nppLut3[3];
-        if (nppLut.channels() == 1)
-        {
-#if (CUDA_VERSION <= 4020)
-            pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
-#else
-            GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
-            pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr<Npp32s>();
-#endif
-        }
-        else
-        {
-            cv::split(nppLut, nppLut3);
-
-#if (CUDA_VERSION <= 4020)
-            pValues3[0] = nppLut3[0].ptr<Npp32s>();
-            pValues3[1] = nppLut3[1].ptr<Npp32s>();
-            pValues3[2] = nppLut3[2].ptr<Npp32s>();
-#else
-            GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data));
-            GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data));
-            GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data));
-
-            pValues3[0] = d_nppLut0.ptr<Npp32s>();
-            pValues3[1] = d_nppLut1.ptr<Npp32s>();
-            pValues3[2] = d_nppLut2.ptr<Npp32s>();
-#endif
-        }
-
-        nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, pLevels3, nValues3) );
+        arithm::lut(src, d_lut.data, lut_cn, dst, cc30, stream);
     }
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
 }
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/modules/gpu/src/cuda/lut.cu b/modules/gpu/src/cuda/lut.cu
new file mode 100644
index 0000000000..be5efeca2b
--- /dev/null
+++ b/modules/gpu/src/cuda/lut.cu
@@ -0,0 +1,151 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include <cstring>
+#include "opencv2/gpu/device/common.hpp"
+#include "opencv2/gpu/device/transform.hpp"
+#include "opencv2/gpu/device/functional.hpp"
+
+using namespace cv::gpu;
+using namespace cv::gpu::device;
+
+namespace
+{
+    texture<uchar, cudaTextureType1D, cudaReadModeElementType> texLutTable;
+
+    struct LutC1 : public unary_function<uchar, uchar>
+    {
+        typedef uchar value_type;
+        typedef uchar index_type;
+
+        cudaTextureObject_t texLutTableObj;
+
+        __device__ __forceinline__ uchar operator ()(uchar x) const
+        {
+        #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 300)
+            // Use the texture reference
+            return tex1Dfetch(texLutTable, x);
+        #else
+            // Use the texture object
+            return tex1Dfetch<uchar>(texLutTableObj, x);
+        #endif
+        }
+    };
+    struct LutC3 : public unary_function<uchar3, uchar3>
+    {
+        typedef uchar3 value_type;
+        typedef uchar3 index_type;
+
+        cudaTextureObject_t texLutTableObj;
+
+        __device__ __forceinline__ uchar3 operator ()(const uchar3& x) const
+        {
+        #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 300)
+            // Use the texture reference
+            return make_uchar3(tex1Dfetch(texLutTable, x.x * 3), tex1Dfetch(texLutTable, x.y * 3 + 1), tex1Dfetch(texLutTable, x.z * 3 + 2));
+        #else
+            // Use the texture object
+            return make_uchar3(tex1Dfetch<uchar>(texLutTableObj, x.x * 3), tex1Dfetch<uchar>(texLutTableObj, x.y * 3 + 1), tex1Dfetch<uchar>(texLutTableObj, x.z * 3 + 2));
+        #endif
+        }
+    };
+}
+
+namespace arithm
+{
+    void lut(PtrStepSzb src, uchar* lut, int lut_cn, PtrStepSzb dst, bool cc30, cudaStream_t stream)
+    {
+        cudaTextureObject_t texLutTableObj;
+
+        if (cc30)
+        {
+            // Use the texture object
+            cudaResourceDesc texRes;
+            std::memset(&texRes, 0, sizeof(texRes));
+            texRes.resType = cudaResourceTypeLinear;
+            texRes.res.linear.devPtr = lut;
+            texRes.res.linear.desc = cudaCreateChannelDesc<uchar>();
+            texRes.res.linear.sizeInBytes = 256 * lut_cn * sizeof(uchar);
+
+            cudaTextureDesc texDescr;
+            std::memset(&texDescr, 0, sizeof(texDescr));
+
+            cudaSafeCall( cudaCreateTextureObject(&texLutTableObj, &texRes, &texDescr, 0) );
+        }
+        else
+        {
+            // Use the texture reference
+            cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar>();
+            cudaSafeCall( cudaBindTexture(0, &texLutTable, lut, &desc) );
+        }
+
+        if (lut_cn == 1)
+        {
+            LutC1 op;
+            op.texLutTableObj = texLutTableObj;
+
+            transform((PtrStepSz<uchar>) src, (PtrStepSz<uchar>) dst, op, WithOutMask(), stream);
+        }
+        else if (lut_cn == 3)
+        {
+            LutC3 op;
+            op.texLutTableObj = texLutTableObj;
+
+            transform((PtrStepSz<uchar3>) src, (PtrStepSz<uchar3>) dst, op, WithOutMask(), stream);
+        }
+
+        if (cc30)
+        {
+            // Use the texture object
+            cudaSafeCall( cudaDestroyTextureObject(texLutTableObj) );
+        }
+        else
+        {
+            // Use the texture reference
+            cudaSafeCall( cudaUnbindTexture(texLutTable) );
+        }
+    }
+}
+
+#endif

From 00575b346d896d095cf65928a6e976bf5c103c5e Mon Sep 17 00:00:00 2001
From: Alexander Karsakov <alexander.karsakov@itseez.com>
Date: Wed, 20 Aug 2014 11:09:21 +0400
Subject: [PATCH 13/20] Fixed range for 'v' channel for 8U images.

(cherry picked from commit b027eac173ed9bc610f08d12a05c90fde395a07e)
---
 modules/imgproc/doc/miscellaneous_transformations.rst | 2 +-
 modules/imgproc/src/color.cpp                         | 4 ++--
 modules/imgproc/test/test_color.cpp                   | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/imgproc/doc/miscellaneous_transformations.rst b/modules/imgproc/doc/miscellaneous_transformations.rst
index e525f726da..3f6dcfcc1e 100644
--- a/modules/imgproc/doc/miscellaneous_transformations.rst
+++ b/modules/imgproc/doc/miscellaneous_transformations.rst
@@ -383,7 +383,7 @@ The function can do the following transformations:
 
         .. math::
 
-            L  \leftarrow 255/100 L, \; u  \leftarrow 255/354 (u + 134), \; v  \leftarrow 255/256 (v + 140)
+            L  \leftarrow 255/100 L, \; u  \leftarrow 255/354 (u + 134), \; v  \leftarrow 255/262 (v + 140)
 
     * 16-bit images
         (currently not supported)
diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp
index 08f27aef97..f27d04d01e 100644
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -2044,7 +2044,7 @@ struct RGB2Luv_b
             {
                 dst[j] = saturate_cast<uchar>(buf[j]*2.55f);
                 dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f);
-                dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.99609375f + 139.453125f);
+                dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.9732824427480916f + 136.259541984732824f);
             }
         }
     }
@@ -2076,7 +2076,7 @@ struct Luv2RGB_b
             {
                 buf[j] = src[j]*(100.f/255.f);
                 buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f);
-                buf[j+2] = (float)(src[j+2]*1.003921568627451f - 140.f);
+                buf[j+2] = (float)(src[j+2]*1.027450980392157f - 140.f);
             }
             cvt(buf, buf, dn);
 
diff --git a/modules/imgproc/test/test_color.cpp b/modules/imgproc/test/test_color.cpp
index 55499855c4..47f79f8ec0 100644
--- a/modules/imgproc/test/test_color.cpp
+++ b/modules/imgproc/test/test_color.cpp
@@ -1168,8 +1168,8 @@ void CV_ColorLuvTest::convert_row_bgr2abc_32f_c3( const float* src_row, float* d
     {
         u_scale = 0.720338983f;
         u_bias = 96.5254237f;
-        v_scale = 0.99609375f;
-        v_bias = 139.453125f;
+        v_scale = 0.973282442f;
+        v_bias = 136.2595419f;
     }
 
     for( j = 0; j < n*3; j += 3 )
@@ -1221,8 +1221,8 @@ void CV_ColorLuvTest::convert_row_abc2bgr_32f_c3( const float* src_row, float* d
     {
         u_scale = 1.f/0.720338983f;
         u_bias = 96.5254237f;
-        v_scale = 1.f/0.99609375f;
-        v_bias = 139.453125f;
+        v_scale = 1.f/0.973282442f;
+        v_bias = 136.2595419f;
     }
 
     for( j = 0; j < n*3; j += 3 )

From bb93c5394876d1e11ca86e5fd5d4e3544a33b1c7 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Tue, 2 Sep 2014 11:54:54 +0400
Subject: [PATCH 14/20] OpenCV version++.

---
 modules/core/include/opencv2/core/version.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp
index 63c2935282..2107552b71 100644
--- a/modules/core/include/opencv2/core/version.hpp
+++ b/modules/core/include/opencv2/core/version.hpp
@@ -50,7 +50,7 @@
 #define CV_VERSION_EPOCH    2
 #define CV_VERSION_MAJOR    4
 #define CV_VERSION_MINOR    9
-#define CV_VERSION_REVISION 0
+#define CV_VERSION_REVISION 1
 
 #define CVAUX_STR_EXP(__A)  #__A
 #define CVAUX_STR(__A)      CVAUX_STR_EXP(__A)

From 562796e41b80c9d22b7f8f942a432b22de3d05de Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 1 Aug 2014 11:33:29 +0400
Subject: [PATCH 15/20] fix cv::gpu::resize for INTER_LINEAR, now it produces
 the same result as CPU version (cherry picked from commit
 da9be8231fc153fd70ac4f4d41091d1653d00fd2)

---
 modules/gpu/src/cuda/resize.cu   | 34 +++++++++++++++++-------
 modules/gpu/test/test_resize.cpp | 44 +++++++++++++++++++++++++++-----
 2 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu
index 110e62d036..d679c32263 100644
--- a/modules/gpu/src/cuda/resize.cu
+++ b/modules/gpu/src/cuda/resize.cu
@@ -77,8 +77,8 @@ namespace cv { namespace gpu { namespace device
 
         if (dst_x < dst.cols && dst_y < dst.rows)
         {
-            const float src_x = dst_x * fx;
-            const float src_y = dst_y * fy;
+            const float src_x = (dst_x + 0.5f) * fx - 0.5f;
+            const float src_y = (dst_y + 0.5f) * fy - 0.5f;
 
             work_type out = VecTraits<work_type>::all(0);
 
@@ -86,16 +86,18 @@ namespace cv { namespace gpu { namespace device
             const int y1 = __float2int_rd(src_y);
             const int x2 = x1 + 1;
             const int y2 = y1 + 1;
-            const int x2_read = ::min(x2, src.cols - 1);
-            const int y2_read = ::min(y2, src.rows - 1);
+            const int x1_read = ::max(::min(x1, src.cols - 1), 0);
+            const int y1_read = ::max(::min(y1, src.rows - 1), 0);
+            const int x2_read = ::max(::min(x2, src.cols - 1), 0);
+            const int y2_read = ::max(::min(y2, src.rows - 1), 0);
 
-            T src_reg = src(y1, x1);
+            T src_reg = src(y1_read, x1_read);
             out = out + src_reg * ((x2 - src_x) * (y2 - src_y));
 
-            src_reg = src(y1, x2_read);
+            src_reg = src(y1_read, x2_read);
             out = out + src_reg * ((src_x - x1) * (y2 - src_y));
 
-            src_reg = src(y2_read, x1);
+            src_reg = src(y2_read, x1_read);
             out = out + src_reg * ((x2 - src_x) * (src_y - y1));
 
             src_reg = src(y2_read, x2_read);
@@ -119,6 +121,20 @@ namespace cv { namespace gpu { namespace device
         }
     }
 
+    template <class Ptr2D, typename T> __global__ void resize_linear(const Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx)
+    {
+        const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
+        const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
+
+        if (dst_x < dst.cols && dst_y < dst.rows)
+        {
+            const float src_x = (dst_x + 0.5f) * fx - 0.5f;
+            const float src_y = (dst_y + 0.5f) * fy - 0.5f;
+
+            dst(dst_y, dst_x) = src(src_y, src_x);
+        }
+    }
+
     template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst)
     {
         const int x = blockDim.x * blockIdx.x + threadIdx.x;
@@ -231,7 +247,7 @@ namespace cv { namespace gpu { namespace device
             TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
             LinearFilter< TextureAccessor<T> > filteredSrc(texSrc);
 
-            resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
+            resize_linear<<<grid, block>>>(filteredSrc, dst, fy, fx);
         }
         else
         {
@@ -241,7 +257,7 @@ namespace cv { namespace gpu { namespace device
             BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
             LinearFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
 
-            resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
+            resize_linear<<<grid, block>>>(filteredSrc, dst, fy, fx);
         }
 
         cudaSafeCall( cudaGetLastError() );
diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp
index b59e7b33f4..25f0f0e2bb 100644
--- a/modules/gpu/test/test_resize.cpp
+++ b/modules/gpu/test/test_resize.cpp
@@ -73,6 +73,28 @@ namespace
         }
     }
 
+    template <typename T, template <typename> class Interpolator>
+    void resizeLinearImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy)
+    {
+        const int cn = src.channels();
+
+        cv::Size dsize(cv::saturate_cast<int>(src.cols * fx), cv::saturate_cast<int>(src.rows * fy));
+
+        dst.create(dsize, src.type());
+
+        float ifx = static_cast<float>(1.0 / fx);
+        float ify = static_cast<float>(1.0 / fy);
+
+        for (int y = 0; y < dsize.height; ++y)
+        {
+            for (int x = 0; x < dsize.width; ++x)
+            {
+                for (int c = 0; c < cn; ++c)
+                    dst.at<T>(y, x * cn + c) = Interpolator<T>::getValue(src, (y + 0.5f) * ify - 0.5f, (x + 0.5f) * ifx - 0.5f, c, cv::BORDER_REPLICATE);
+            }
+        }
+    }
+
     void resizeGold(const cv::Mat& src, cv::Mat& dst, double fx, double fy, int interpolation)
     {
         typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst, double fx, double fy);
@@ -90,12 +112,12 @@ namespace
 
         static const func_t linear_funcs[] =
         {
-            resizeImpl<unsigned char, LinearInterpolator>,
-            resizeImpl<signed char, LinearInterpolator>,
-            resizeImpl<unsigned short, LinearInterpolator>,
-            resizeImpl<short, LinearInterpolator>,
-            resizeImpl<int, LinearInterpolator>,
-            resizeImpl<float, LinearInterpolator>
+            resizeLinearImpl<unsigned char, LinearInterpolator>,
+            resizeLinearImpl<signed char, LinearInterpolator>,
+            resizeLinearImpl<unsigned short, LinearInterpolator>,
+            resizeLinearImpl<short, LinearInterpolator>,
+            resizeLinearImpl<int, LinearInterpolator>,
+            resizeLinearImpl<float, LinearInterpolator>
         };
 
         static const func_t cubic_funcs[] =
@@ -204,7 +226,15 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine(
     DIFFERENT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
     testing::Values(0.3, 0.5),
-    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_AREA)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)),
+    WHOLE_SUBMAT));
+
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(0.3, 0.5, 1.5, 2.0),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)),
     WHOLE_SUBMAT));
 
 #endif // HAVE_CUDA

From cb1e9adc633772c09c0613b57b2d2b19316e7376 Mon Sep 17 00:00:00 2001
From: Alexander Karsakov <alexander.karsakov@itseez.com>
Date: Mon, 18 Aug 2014 13:01:19 +0400
Subject: [PATCH 16/20] Fixed getConversionInfo() for YUV2RGBA_* conversions

(cherry picked from commit 023a42ba55859932861f4a849fbec4cf1bd3ead7)
---
 modules/imgproc/perf/perf_cvt_color.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/imgproc/perf/perf_cvt_color.cpp b/modules/imgproc/perf/perf_cvt_color.cpp
index 966a442f36..86100ee834 100644
--- a/modules/imgproc/perf/perf_cvt_color.cpp
+++ b/modules/imgproc/perf/perf_cvt_color.cpp
@@ -155,14 +155,14 @@ ChPair getConversionInfo(int cvtMode)
     case CV_BGR5552BGR: case CV_BGR5552RGB:
     case CV_BGR5652BGR: case CV_BGR5652RGB:
     case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY:
-    case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY:
     case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2:
     case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU:
-    case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2:
-    case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU:
         return ChPair(2,3);
     case CV_BGR5552BGRA: case CV_BGR5552RGBA:
     case CV_BGR5652BGRA: case CV_BGR5652RGBA:
+    case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY:
+    case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2:
+    case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU:
         return ChPair(2,4);
     case CV_BGR2GRAY: case CV_RGB2GRAY:
     case CV_RGB2YUV_IYUV: case CV_RGB2YUV_YV12:

From fb81e4df1c6c5881d3a3f09b5762ee9d7c9741f3 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 21 Aug 2014 14:14:06 +0400
Subject: [PATCH 17/20] fix CUDA cvtColor after corresponding change in CPU
 version

see https://github.com/Itseez/opencv/pull/3137
(cherry picked from commit ebe36d6e7c99430b16f61e6b2ee832b91f155ff7)
---
 .../gpu/include/opencv2/gpu/device/detail/color_detail.hpp    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
index f938b90801..a8952f1e61 100644
--- a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
+++ b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
@@ -1821,7 +1821,7 @@ namespace cv { namespace gpu { namespace device
 
             dst.x = saturate_cast<uchar>(dstf.x * 2.55f);
             dst.y = saturate_cast<uchar>(dstf.y * 0.72033898305084743f + 96.525423728813564f);
-            dst.z = saturate_cast<uchar>(dstf.z * 0.99609375f + 139.453125f);
+            dst.z = saturate_cast<uchar>(dstf.z * 0.9732824427480916f + 136.259541984732824f);
         }
 
         template <typename T, int scn, int dcn, bool srgb, int blueIdx> struct RGB2Luv;
@@ -1915,7 +1915,7 @@ namespace cv { namespace gpu { namespace device
 
             srcf.x = src.x * (100.f / 255.f);
             srcf.y = src.y * 1.388235294117647f - 134.f;
-            srcf.z = src.z * 1.003921568627451f - 140.f;
+            srcf.z = src.z * 1.027450980392157f - 140.f;
 
             Luv2RGBConvert_f<srgb, blueIdx>(srcf, dstf);
 

From 4664a339ae4131589223e9cad1a16f5b0dbeb88e Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Thu, 8 May 2014 15:55:30 +0400
Subject: [PATCH 18/20] Fix non-Android cross compilation with
 OpenCVConfig.cmake

(cherry picked from commit e8376c789d675d9d4b536066320e2981b9981b49)
---
 cmake/templates/OpenCVConfig.cmake.in | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in
index 3b011109aa..6468aea5b3 100644
--- a/cmake/templates/OpenCVConfig.cmake.in
+++ b/cmake/templates/OpenCVConfig.cmake.in
@@ -60,7 +60,11 @@ set(OpenCV_USE_CUFFT    @HAVE_CUFFT@)
 set(OpenCV_USE_NVCUVID  @HAVE_NVCUVID@)
 
 # Android API level from which OpenCV has been compiled is remembered
-set(OpenCV_ANDROID_NATIVE_API_LEVEL @OpenCV_ANDROID_NATIVE_API_LEVEL_CONFIGCMAKE@)
+if(ANDROID)
+  set(OpenCV_ANDROID_NATIVE_API_LEVEL @OpenCV_ANDROID_NATIVE_API_LEVEL_CONFIGCMAKE@)
+else
+  set(OpenCV_ANDROID_NATIVE_API_LEVEL 0)
+endif()
 
 # Some additional settings are required if OpenCV is built as static libs
 set(OpenCV_SHARED @BUILD_SHARED_LIBS@)
@@ -71,8 +75,8 @@ set(OpenCV_USE_MANGLED_PATHS @OpenCV_USE_MANGLED_PATHS_CONFIGCMAKE@)
 # Extract the directory where *this* file has been installed (determined at cmake run-time)
 get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_FILE}" PATH CACHE)
 
-if(NOT WIN32 OR OpenCV_ANDROID_NATIVE_API_LEVEL GREATER 0)
-  if(OpenCV_ANDROID_NATIVE_API_LEVEL GREATER 0)
+if(NOT WIN32 OR ANDROID)
+  if(ANDROID)
     set(OpenCV_INSTALL_PATH "${OpenCV_CONFIG_PATH}/../../..")
   else()
     set(OpenCV_INSTALL_PATH "${OpenCV_CONFIG_PATH}/../..")

From 10a2c51c52e19678264adfe7920a507e57835248 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 13 May 2014 11:37:21 +0400
Subject: [PATCH 19/20] fix OpenCVConfig.cmake template - missing parentheses
 (cherry picked from commit 67b562d543154b29e3b5f8f9c79a03790da40712)

---
 cmake/templates/OpenCVConfig.cmake.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in
index 6468aea5b3..6d1c1a990b 100644
--- a/cmake/templates/OpenCVConfig.cmake.in
+++ b/cmake/templates/OpenCVConfig.cmake.in
@@ -62,7 +62,7 @@ set(OpenCV_USE_NVCUVID  @HAVE_NVCUVID@)
 # Android API level from which OpenCV has been compiled is remembered
 if(ANDROID)
   set(OpenCV_ANDROID_NATIVE_API_LEVEL @OpenCV_ANDROID_NATIVE_API_LEVEL_CONFIGCMAKE@)
-else
+else()
   set(OpenCV_ANDROID_NATIVE_API_LEVEL 0)
 endif()
 

From e929dd4d98cc705a5434125b12322a491d431480 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Tue, 2 Sep 2014 20:24:49 +0400
Subject: [PATCH 20/20] GPU MatchTemplate32F test epsilon increased.

---
 modules/gpu/test/test_imgproc.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp
index 9ce32d12b8..2fde0d9d84 100644
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -738,7 +738,7 @@ GPU_TEST_P(MatchTemplate32F, Regression)
     cv::Mat dst_gold;
     cv::matchTemplate(image, templ, dst_gold, method);
 
-    EXPECT_MAT_NEAR(dst_gold, dst, templ_size.area() * 1e-1);
+    EXPECT_MAT_NEAR(dst_gold, dst, templ_size.area() * 1.1e-1);
 }
 
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(