diff --git a/CMakeLists.txt b/CMakeLists.txt index cd2095db86..71d714b9b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,6 +194,7 @@ OCV_OPTION(BUILD_WITH_STATIC_CRT "Enables use of staticaly linked CRT for sta OCV_OPTION(BUILD_FAT_JAVA_LIB "Create fat java wrapper containing the whole OpenCV library" ON IF NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX ) OCV_OPTION(BUILD_ANDROID_SERVICE "Build OpenCV Manager for Google Play" OFF IF ANDROID AND ANDROID_SOURCE_TREE ) OCV_OPTION(BUILD_ANDROID_PACKAGE "Build platform-specific package for Google Play" OFF IF ANDROID ) +OCV_OPTION(BUILD_TINY_GPU_MODULE "Build tiny gpu module with limited image format support" OFF ) # 3rd party libs OCV_OPTION(BUILD_ZLIB "Build zlib from source" WIN32 OR APPLE ) @@ -996,6 +997,7 @@ if(HAVE_CUDA) status(" NVIDIA GPU arch:" ${OPENCV_CUDA_ARCH_BIN}) status(" NVIDIA PTX archs:" ${OPENCV_CUDA_ARCH_PTX}) status(" Use fast math:" CUDA_FAST_MATH THEN YES ELSE NO) + status(" Tiny gpu module:" BUILD_TINY_GPU_MODULE THEN YES ELSE NO) endif() if(HAVE_OPENCL) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 6a93d008ee..a69ac8c7e6 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -63,6 +63,10 @@ if(OPENCV_CAN_BREAK_BINARY_COMPATIBILITY) add_definitions(-DOPENCV_CAN_BREAK_BINARY_COMPATIBILITY) endif() +if(BUILD_TINY_GPU_MODULE) + add_definitions(-DOPENCV_TINY_GPU_MODULE) +endif() + if(CMAKE_COMPILER_IS_GNUCXX) # High level of warnings. add_extra_compiler_option(-W) diff --git a/modules/gpu/perf/perf_core.cpp b/modules/gpu/perf/perf_core.cpp index ae6ed865b1..87e22c4695 100644 --- a/modules/gpu/perf/perf_core.cpp +++ b/modules/gpu/perf/perf_core.cpp @@ -46,7 +46,11 @@ using namespace std; using namespace testing; using namespace perf; +#ifdef OPENCV_TINY_GPU_MODULE +#define ARITHM_MAT_DEPTH Values(CV_8U, CV_32F) +#else #define ARITHM_MAT_DEPTH Values(CV_8U, CV_16U, CV_32F, CV_64F) +#endif ////////////////////////////////////////////////////////////////////// // Merge @@ -524,9 +528,17 @@ PERF_TEST_P(Sz_Depth, Core_AbsDiffScalar, ////////////////////////////////////////////////////////////////////// // Abs -PERF_TEST_P(Sz_Depth, Core_Abs, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_16S, CV_32F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_Abs, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) +)) +#else +PERF_TEST_P(Sz_Depth, Core_Abs, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -552,9 +564,17 @@ PERF_TEST_P(Sz_Depth, Core_Abs, ////////////////////////////////////////////////////////////////////// // Sqr -PERF_TEST_P(Sz_Depth, Core_Sqr, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_Sqr, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) +)) +#else +PERF_TEST_P(Sz_Depth, Core_Sqr, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -580,9 +600,17 @@ PERF_TEST_P(Sz_Depth, Core_Sqr, ////////////////////////////////////////////////////////////////////// // Sqrt -PERF_TEST_P(Sz_Depth, Core_Sqrt, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) +)) +#else +PERF_TEST_P(Sz_Depth, Core_Sqrt, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -612,9 +640,17 @@ PERF_TEST_P(Sz_Depth, Core_Sqrt, ////////////////////////////////////////////////////////////////////// // Log -PERF_TEST_P(Sz_Depth, Core_Log, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_Log, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) +)) +#else +PERF_TEST_P(Sz_Depth, Core_Log, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -644,9 +680,17 @@ PERF_TEST_P(Sz_Depth, Core_Log, ////////////////////////////////////////////////////////////////////// // Exp -PERF_TEST_P(Sz_Depth, Core_Exp, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_Exp, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)) +)) +#else +PERF_TEST_P(Sz_Depth, Core_Exp, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -678,10 +722,19 @@ PERF_TEST_P(Sz_Depth, Core_Exp, DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double); -PERF_TEST_P(Sz_Depth_Power, Core_Pow, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16S, CV_32F), - Values(0.3, 2.0, 2.4))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Power, Core_Pow, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)), + Values(0.3, 2.0, 2.4) +)) +#else +PERF_TEST_P(Sz_Depth_Power, Core_Pow, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F), + Values(0.3, 2.0, 2.4) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -859,10 +912,19 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseAndMat, ////////////////////////////////////////////////////////////////////// // BitwiseAndScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) +)) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseAndScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -935,10 +997,19 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseOrMat, ////////////////////////////////////////////////////////////////////// // BitwiseOrScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) +)) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseOrScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1011,10 +1082,19 @@ PERF_TEST_P(Sz_Depth, Core_BitwiseXorMat, ////////////////////////////////////////////////////////////////////// // BitwiseXorScalar -PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32S), - GPU_CHANNELS_1_3_4)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_8U)), + testing::Values(MatCn(Gray)) +)) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_BitwiseXorScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1155,9 +1235,17 @@ PERF_TEST_P(Sz_Depth, Core_MinMat, ////////////////////////////////////////////////////////////////////// // MinScalar -PERF_TEST_P(Sz_Depth, Core_MinScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_MinScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) +)) +#else +PERF_TEST_P(Sz_Depth, Core_MinScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1226,9 +1314,17 @@ PERF_TEST_P(Sz_Depth, Core_MaxMat, ////////////////////////////////////////////////////////////////////// // MaxScalar -PERF_TEST_P(Sz_Depth, Core_MaxScalar, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) +)) +#else +PERF_TEST_P(Sz_Depth, Core_MaxScalar, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1263,11 +1359,21 @@ PERF_TEST_P(Sz_Depth, Core_MaxScalar, DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth); -PERF_TEST_P(Sz_3Depth, Core_AddWeighted, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F), - Values(CV_8U, CV_16U, CV_32F, CV_64F), - Values(CV_8U, CV_16U, CV_32F, CV_64F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_3Depth, Core_AddWeighted, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(MatDepth(CV_32F)), + Values(MatDepth(CV_32F)), + Values(MatDepth(CV_32F)) +)) +#else +PERF_TEST_P(Sz_3Depth, Core_AddWeighted, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(CV_8U, CV_16U, CV_32F, CV_64F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth1 = GET_PARAM(1); @@ -1782,10 +1888,19 @@ PERF_TEST_P(Sz, Core_MeanStdDev, DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType); -PERF_TEST_P(Sz_Depth_Norm, Core_Norm, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32S, CV_32F), - Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Norm, Core_Norm, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +)) +#else +PERF_TEST_P(Sz_Depth_Norm, Core_Norm, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S, CV_32F), + Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1859,10 +1974,19 @@ PERF_TEST_P(Sz_Norm, Core_NormDiff, ////////////////////////////////////////////////////////////////////// // Sum -PERF_TEST_P(Sz_Depth_Cn, Core_Sum, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) +)) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_Sum, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1896,10 +2020,19 @@ PERF_TEST_P(Sz_Depth_Cn, Core_Sum, ////////////////////////////////////////////////////////////////////// // SumAbs -PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) +)) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1929,10 +2062,19 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumAbs, ////////////////////////////////////////////////////////////////////// // SumSqr -PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + testing::Values(MatCn(Gray)) +)) +#else +PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4 +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1962,9 +2104,17 @@ PERF_TEST_P(Sz_Depth_Cn, Core_SumSqr, ////////////////////////////////////////////////////////////////////// // MinMax -PERF_TEST_P(Sz_Depth, Core_MinMax, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_MinMax, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) +)) +#else +PERF_TEST_P(Sz_Depth, Core_MinMax, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2000,9 +2150,17 @@ PERF_TEST_P(Sz_Depth, Core_MinMax, ////////////////////////////////////////////////////////////////////// // MinMaxLoc -PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) +)) +#else +PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2040,9 +2198,17 @@ PERF_TEST_P(Sz_Depth, Core_MinMaxLoc, ////////////////////////////////////////////////////////////////////// // CountNonZero -PERF_TEST_P(Sz_Depth, Core_CountNonZero, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth, Core_CountNonZero, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F) +)) +#else +PERF_TEST_P(Sz_Depth, Core_CountNonZero, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F) +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2079,12 +2245,23 @@ CV_ENUM(ReduceDim, Rows, Cols) DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim); -PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_16S, CV_32F), - Values(1, 2, 3, 4), - ReduceCode::all(), - ReduceDim::all())) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + Values(1, 2, 3, 4), + ReduceCode::all(), + ReduceDim::all() +)) +#else +PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_16S, CV_32F), + Values(1, 2, 3, 4), + ReduceCode::all(), + ReduceDim::all() +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -2120,13 +2297,25 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Core_Reduce, DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType); -PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F), - Values(NormType(cv::NORM_INF), - NormType(cv::NORM_L1), - NormType(cv::NORM_L2), - NormType(cv::NORM_MINMAX)))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + Values(NormType(cv::NORM_INF), + NormType(cv::NORM_L1), + NormType(cv::NORM_L2), + NormType(cv::NORM_MINMAX)) +)) +#else +PERF_TEST_P(Sz_Depth_NormType, Core_Normalize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(NormType(cv::NORM_INF), + NormType(cv::NORM_L1), + NormType(cv::NORM_L2), + NormType(cv::NORM_MINMAX)) +)) +#endif { const cv::Size size = GET_PARAM(0); const int type = GET_PARAM(1); diff --git a/modules/gpu/perf/perf_features2d.cpp b/modules/gpu/perf/perf_features2d.cpp index 2b1ab58129..5a21acdff6 100644 --- a/modules/gpu/perf/perf_features2d.cpp +++ b/modules/gpu/perf/perf_features2d.cpp @@ -145,9 +145,17 @@ PERF_TEST_P(Image_NFeatures, Features2D_ORB, DEF_PARAM_TEST(DescSize_Norm, int, NormType); -PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, - Combine(Values(64, 128, 256), - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine( + Values(64, 128, 256), + Values(NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)) +)) +#else +PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine( + Values(64, 128, 256), + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING)) +)) +#endif { declare.time(20.0); @@ -202,10 +210,19 @@ static void toOneRowMatches(const std::vector< std::vector >& src, s DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType); -PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, - Combine(Values(64, 128, 256), - Values(2, 3), - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine( + Values(64, 128, 256), + Values(2, 3), + Values(NormType(cv::NORM_L2)) +)) +#else +PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine( + Values(64, 128, 256), + Values(2, 3), + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +)) +#endif { declare.time(30.0); @@ -257,9 +274,17 @@ PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, ////////////////////////////////////////////////////////////////////// // BFRadiusMatch -PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, - Combine(Values(64, 128, 256), - Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine( + Values(64, 128, 256), + Values(NormType(cv::NORM_L2)) +)) +#else +PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine( + Values(64, 128, 256), + Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2)) +)) +#endif { declare.time(30.0); diff --git a/modules/gpu/perf/perf_filters.cpp b/modules/gpu/perf/perf_filters.cpp index adfc294f6d..f064dd395e 100644 --- a/modules/gpu/perf/perf_filters.cpp +++ b/modules/gpu/perf/perf_filters.cpp @@ -87,7 +87,19 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, ////////////////////////////////////////////////////////////////////// // Sobel -PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(3, 5, 7) +)) +#else +PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(3, 5, 7, 9, 11, 13, 15) +)) +#endif { declare.time(20.0); @@ -154,7 +166,19 @@ PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U ////////////////////////////////////////////////////////////////////// // GaussianBlur -PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(3, 5, 7) +)) +#else +PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(3, 5, 7, 9, 11, 13, 15) +)) +#endif { declare.time(20.0); diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index c7c1022941..f2762e07c0 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -91,13 +91,25 @@ void generateMap(cv::Mat& map_x, cv::Mat& map_y, int remapMode) DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border_Mode, cv::Size, MatDepth, MatCn, Interpolation, BorderMode, RemapMode); -PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), - ALL_BORDER_MODES, - RemapMode::all())) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + ALL_BORDER_MODES, + RemapMode::all() +)) +#else +PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + ALL_BORDER_MODES, + RemapMode::all() +)) +#endif { declare.time(20.0); @@ -143,12 +155,23 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Scale, cv::Size, MatDepth, MatCn, Interpolation, double); -PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), - Values(0.5, 0.3, 2.0))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + Values(0.5, 0.3, 2.0) +)) +#else +PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + Values(0.5, 0.3, 2.0) +)) +#endif { declare.time(20.0); @@ -187,11 +210,21 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, DEF_PARAM_TEST(Sz_Depth_Cn_Scale, cv::Size, MatDepth, MatCn, double); -PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(0.2, 0.1, 0.05))) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(0.2, 0.1, 0.05) +)) +#else +PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(0.2, 0.1, 0.05) +)) +#endif { declare.time(1.0); @@ -230,12 +263,23 @@ PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border, cv::Size, MatDepth, MatCn, Interpolation, BorderMode); -PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), - ALL_BORDER_MODES)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + ALL_BORDER_MODES) +) +#else +PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + ALL_BORDER_MODES) +) +#endif { declare.time(20.0); @@ -280,12 +324,23 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, ////////////////////////////////////////////////////////////////////// // WarpPerspective -PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), - ALL_BORDER_MODES)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + ALL_BORDER_MODES) +) +#else +PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), + ALL_BORDER_MODES) +) +#endif { declare.time(20.0); @@ -330,11 +385,21 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode); -PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - ALL_BORDER_MODES)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4, + ALL_BORDER_MODES) +) +#else +PERF_TEST_P(Sz_Depth_Cn_Border, ImgProc_CopyMakeBorder, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4, + ALL_BORDER_MODES) +) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -372,10 +437,19 @@ CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp); -PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F), - ThreshOp::all())) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + ThreshOp::all() +)) +#else +PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + ThreshOp::all() +)) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -672,10 +746,19 @@ PERF_TEST_P(Sz, ImgProc_ColumnSum, DEF_PARAM_TEST(Image_AppertureSz_L2gradient, string, int, bool); -PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, - Combine(Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"), - Values(3, 5), - Bool())) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, Combine( + Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"), + Values(3), + Bool() +)) +#else +PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, Combine( + Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"), + Values(3, 5), + Bool() +)) +#endif { const string fileName = GET_PARAM(0); const int apperture_size = GET_PARAM(1); @@ -1300,10 +1383,19 @@ PERF_TEST_P(Sz_Depth_Cn_Inter, ImgProc_Rotate, ////////////////////////////////////////////////////////////////////// // PyrDown -PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4) +) +#else +PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4) +) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); @@ -1336,10 +1428,19 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, ////////////////////////////////////////////////////////////////////// // PyrUp -PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) +#ifdef OPENCV_TINY_GPU_MODULE +PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_32F), + GPU_CHANNELS_1_3_4) +) +#else +PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, Combine( + GPU_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + GPU_CHANNELS_1_3_4) +) +#endif { const cv::Size size = GET_PARAM(0); const int depth = GET_PARAM(1); diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp index 5da22e156b..e39bce2270 100644 --- a/modules/gpu/src/brute_force_matcher.cpp +++ b/modules/gpu/src/brute_force_matcher.cpp @@ -204,6 +204,26 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[3][6] = + { + { + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/ + }, + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }, + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/ + } + }; +#else static const caller_t callers[3][6] = { { @@ -222,6 +242,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const matchHamming_gpu, 0/*matchHamming_gpu*/ } }; +#endif CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.cols == query.cols && train.type() == query.type()); @@ -334,6 +355,16 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& masks, Stream& stream) { +#ifdef OPENCV_TINY_GPU_MODULE + (void)query; + (void)trainCollection; + (void)trainIdx; + (void)imgIdx; + (void)distance; + (void)masks; + (void)stream; + CV_Error(CV_StsNotImplemented, "not available in tiny build"); +#else if (query.empty() || trainCollection.empty()) return; @@ -374,6 +405,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c CV_Assert(func != 0); func(query, trainCollection, masks, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); +#endif } void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, vector& matches) @@ -451,6 +483,26 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[3][6] = + { + { + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/ + }, + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }, + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/ + } + }; +#else static const caller_t callers[3][6] = { { @@ -469,6 +521,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co matchHamming_gpu, 0/*matchHamming_gpu*/ } }; +#endif CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(train.type() == query.type() && train.cols == query.cols); @@ -568,6 +621,16 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& maskCollection, Stream& stream) { +#ifdef OPENCV_TINY_GPU_MODULE + (void)query; + (void)trainCollection; + (void)trainIdx; + (void)imgIdx; + (void)distance; + (void)maskCollection; + (void)stream; + CV_Error(CV_StsNotImplemented, "not available in tiny build"); +#else if (query.empty() || trainCollection.empty()) return; @@ -613,6 +676,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer CV_Assert(func != 0); func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); +#endif } void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, @@ -755,6 +819,26 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[3][6] = + { + { + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/, + 0/*matchL1_gpu*/, 0/*matchL1_gpu*/ + }, + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }, + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/, + 0/*matchHamming_gpu*/, 0/*matchHamming_gpu*/ + } + }; +#else static const caller_t callers[3][6] = { { @@ -773,6 +857,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, matchHamming_gpu, 0/*matchHamming_gpu*/ } }; +#endif const int nQuery = query.rows; const int nTrain = train.rows; @@ -872,6 +957,17 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& query, const void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, const vector& masks, Stream& stream) { +#ifdef OPENCV_TINY_GPU_MODULE + (void)query; + (void)trainIdx; + (void)imgIdx; + (void)distance; + (void)nMatches; + (void)maxDistance; + (void)masks; + (void)stream; + CV_Error(CV_StsNotImplemented, "not available in tiny build"); +#else if (query.empty() || empty()) return; @@ -926,6 +1022,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream)); +#endif } void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, diff --git a/modules/gpu/src/color.cpp b/modules/gpu/src/color.cpp index 66a1ad6791..09b8be526f 100644 --- a/modules/gpu/src/color.cpp +++ b/modules/gpu/src/color.cpp @@ -71,6 +71,12 @@ namespace cv { namespace gpu { using namespace ::cv::gpu::device; +#ifdef OPENCV_TINY_GPU_MODULE + #define APPEND_16U(func) 0 +#else + #define APPEND_16U(func) func ## _16u +#endif + namespace { typedef void (*gpu_func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); @@ -78,10 +84,11 @@ namespace void bgr_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgr_to_rgb_8u, 0, bgr_to_rgb_16u, 0, 0, bgr_to_rgb_32f}; + static const gpu_func_t funcs[] = {bgr_to_rgb_8u, 0, APPEND_16U(bgr_to_rgb), 0, 0, bgr_to_rgb_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); @@ -91,10 +98,11 @@ namespace void bgr_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgr_to_bgra_8u, 0, bgr_to_bgra_16u, 0, 0, bgr_to_bgra_32f}; + static const gpu_func_t funcs[] = {bgr_to_bgra_8u, 0, APPEND_16U(bgr_to_bgra), 0, 0, bgr_to_bgra_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); @@ -104,10 +112,11 @@ namespace void bgr_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgr_to_rgba_8u, 0, bgr_to_rgba_16u, 0, 0, bgr_to_rgba_32f}; + static const gpu_func_t funcs[] = {bgr_to_rgba_8u, 0, APPEND_16U(bgr_to_rgba), 0, 0, bgr_to_rgba_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); @@ -117,10 +126,11 @@ namespace void bgra_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgra_to_bgr_8u, 0, bgra_to_bgr_16u, 0, 0, bgra_to_bgr_32f}; + static const gpu_func_t funcs[] = {bgra_to_bgr_8u, 0, APPEND_16U(bgra_to_bgr), 0, 0, bgra_to_bgr_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); @@ -130,10 +140,11 @@ namespace void bgra_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgra_to_rgb_8u, 0, bgra_to_rgb_16u, 0, 0, bgra_to_rgb_32f}; + static const gpu_func_t funcs[] = {bgra_to_rgb_8u, 0, APPEND_16U(bgra_to_rgb), 0, 0, bgra_to_rgb_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); @@ -143,10 +154,11 @@ namespace void bgra_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgra_to_rgba_8u, 0, bgra_to_rgba_16u, 0, 0, bgra_to_rgba_32f}; + static const gpu_func_t funcs[] = {bgra_to_rgba_8u, 0, APPEND_16U(bgra_to_rgba), 0, 0, bgra_to_rgba_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); @@ -316,10 +328,11 @@ namespace void gray_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {gray_to_bgr_8u, 0, gray_to_bgr_16u, 0, 0, gray_to_bgr_32f}; + static const gpu_func_t funcs[] = {gray_to_bgr_8u, 0, APPEND_16U(gray_to_bgr), 0, 0, gray_to_bgr_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 1); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); @@ -329,10 +342,11 @@ namespace void gray_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {gray_to_bgra_8u, 0, gray_to_bgra_16u, 0, 0, gray_to_bgra_32f}; + static const gpu_func_t funcs[] = {gray_to_bgra_8u, 0, APPEND_16U(gray_to_bgra), 0, 0, gray_to_bgra_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 1); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); @@ -382,10 +396,11 @@ namespace void rgb_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {rgb_to_gray_8u, 0, rgb_to_gray_16u, 0, 0, rgb_to_gray_32f}; + static const gpu_func_t funcs[] = {rgb_to_gray_8u, 0, APPEND_16U(rgb_to_gray), 0, 0, rgb_to_gray_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); @@ -395,10 +410,11 @@ namespace void bgr_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgr_to_gray_8u, 0, bgr_to_gray_16u, 0, 0, bgr_to_gray_32f}; + static const gpu_func_t funcs[] = {bgr_to_gray_8u, 0, APPEND_16U(bgr_to_gray), 0, 0, bgr_to_gray_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); @@ -408,10 +424,11 @@ namespace void rgba_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {rgba_to_gray_8u, 0, rgba_to_gray_16u, 0, 0, rgba_to_gray_32f}; + static const gpu_func_t funcs[] = {rgba_to_gray_8u, 0, APPEND_16U(rgba_to_gray), 0, 0, rgba_to_gray_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); @@ -421,10 +438,11 @@ namespace void bgra_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) { using namespace cv::gpu::device; - static const gpu_func_t funcs[] = {bgra_to_gray_8u, 0, bgra_to_gray_16u, 0, 0, bgra_to_gray_32f}; + static const gpu_func_t funcs[] = {bgra_to_gray_8u, 0, APPEND_16U(bgra_to_gray), 0, 0, bgra_to_gray_32f}; CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 4); + CV_Assert(funcs[src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); @@ -437,12 +455,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {rgb_to_yuv_8u, 0, rgb_to_yuv_16u, 0, 0, rgb_to_yuv_32f}, - {rgba_to_yuv_8u, 0, rgba_to_yuv_16u, 0, 0, rgba_to_yuv_32f} + {rgb_to_yuv_8u, 0, APPEND_16U(rgb_to_yuv), 0, 0, rgb_to_yuv_32f}, + {rgba_to_yuv_8u, 0, APPEND_16U(rgba_to_yuv), 0, 0, rgba_to_yuv_32f} }, { - {rgb_to_yuv4_8u, 0, rgb_to_yuv4_16u, 0, 0, rgb_to_yuv4_32f}, - {rgba_to_yuv4_8u, 0, rgba_to_yuv4_16u, 0, 0, rgba_to_yuv4_32f} + {rgb_to_yuv4_8u, 0, APPEND_16U(rgb_to_yuv4), 0, 0, rgb_to_yuv4_32f}, + {rgba_to_yuv4_8u, 0, APPEND_16U(rgba_to_yuv4), 0, 0, rgba_to_yuv4_32f} } }; @@ -451,6 +469,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -463,12 +482,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {bgr_to_yuv_8u, 0, bgr_to_yuv_16u, 0, 0, bgr_to_yuv_32f}, - {bgra_to_yuv_8u, 0, bgra_to_yuv_16u, 0, 0, bgra_to_yuv_32f} + {bgr_to_yuv_8u, 0, APPEND_16U(bgr_to_yuv), 0, 0, bgr_to_yuv_32f}, + {bgra_to_yuv_8u, 0, APPEND_16U(bgra_to_yuv), 0, 0, bgra_to_yuv_32f} }, { - {bgr_to_yuv4_8u, 0, bgr_to_yuv4_16u, 0, 0, bgr_to_yuv4_32f}, - {bgra_to_yuv4_8u, 0, bgra_to_yuv4_16u, 0, 0, bgra_to_yuv4_32f} + {bgr_to_yuv4_8u, 0, APPEND_16U(bgr_to_yuv4), 0, 0, bgr_to_yuv4_32f}, + {bgra_to_yuv4_8u, 0, APPEND_16U(bgra_to_yuv4), 0, 0, bgra_to_yuv4_32f} } }; @@ -477,6 +496,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -489,12 +509,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {yuv_to_rgb_8u, 0, yuv_to_rgb_16u, 0, 0, yuv_to_rgb_32f}, - {yuv4_to_rgb_8u, 0, yuv4_to_rgb_16u, 0, 0, yuv4_to_rgb_32f} + {yuv_to_rgb_8u, 0, APPEND_16U(yuv_to_rgb), 0, 0, yuv_to_rgb_32f}, + {yuv4_to_rgb_8u, 0, APPEND_16U(yuv4_to_rgb), 0, 0, yuv4_to_rgb_32f} }, { - {yuv_to_rgba_8u, 0, yuv_to_rgba_16u, 0, 0, yuv_to_rgba_32f}, - {yuv4_to_rgba_8u, 0, yuv4_to_rgba_16u, 0, 0, yuv4_to_rgba_32f} + {yuv_to_rgba_8u, 0, APPEND_16U(yuv_to_rgba), 0, 0, yuv_to_rgba_32f}, + {yuv4_to_rgba_8u, 0, APPEND_16U(yuv4_to_rgba), 0, 0, yuv4_to_rgba_32f} } }; @@ -503,6 +523,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -515,12 +536,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {yuv_to_bgr_8u, 0, yuv_to_bgr_16u, 0, 0, yuv_to_bgr_32f}, - {yuv4_to_bgr_8u, 0, yuv4_to_bgr_16u, 0, 0, yuv4_to_bgr_32f} + {yuv_to_bgr_8u, 0, APPEND_16U(yuv_to_bgr), 0, 0, yuv_to_bgr_32f}, + {yuv4_to_bgr_8u, 0, APPEND_16U(yuv4_to_bgr), 0, 0, yuv4_to_bgr_32f} }, { - {yuv_to_bgra_8u, 0, yuv_to_bgra_16u, 0, 0, yuv_to_bgra_32f}, - {yuv4_to_bgra_8u, 0, yuv4_to_bgra_16u, 0, 0, yuv4_to_bgra_32f} + {yuv_to_bgra_8u, 0, APPEND_16U(yuv_to_bgra), 0, 0, yuv_to_bgra_32f}, + {yuv4_to_bgra_8u, 0, APPEND_16U(yuv4_to_bgra), 0, 0, yuv4_to_bgra_32f} } }; @@ -529,6 +550,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -541,12 +563,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {rgb_to_YCrCb_8u, 0, rgb_to_YCrCb_16u, 0, 0, rgb_to_YCrCb_32f}, - {rgba_to_YCrCb_8u, 0, rgba_to_YCrCb_16u, 0, 0, rgba_to_YCrCb_32f} + {rgb_to_YCrCb_8u, 0, APPEND_16U(rgb_to_YCrCb), 0, 0, rgb_to_YCrCb_32f}, + {rgba_to_YCrCb_8u, 0, APPEND_16U(rgba_to_YCrCb), 0, 0, rgba_to_YCrCb_32f} }, { - {rgb_to_YCrCb4_8u, 0, rgb_to_YCrCb4_16u, 0, 0, rgb_to_YCrCb4_32f}, - {rgba_to_YCrCb4_8u, 0, rgba_to_YCrCb4_16u, 0, 0, rgba_to_YCrCb4_32f} + {rgb_to_YCrCb4_8u, 0, APPEND_16U(rgb_to_YCrCb4), 0, 0, rgb_to_YCrCb4_32f}, + {rgba_to_YCrCb4_8u, 0, APPEND_16U(rgba_to_YCrCb4), 0, 0, rgba_to_YCrCb4_32f} } }; @@ -555,6 +577,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -567,12 +590,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {bgr_to_YCrCb_8u, 0, bgr_to_YCrCb_16u, 0, 0, bgr_to_YCrCb_32f}, - {bgra_to_YCrCb_8u, 0, bgra_to_YCrCb_16u, 0, 0, bgra_to_YCrCb_32f} + {bgr_to_YCrCb_8u, 0, APPEND_16U(bgr_to_YCrCb), 0, 0, bgr_to_YCrCb_32f}, + {bgra_to_YCrCb_8u, 0, APPEND_16U(bgra_to_YCrCb), 0, 0, bgra_to_YCrCb_32f} }, { - {bgr_to_YCrCb4_8u, 0, bgr_to_YCrCb4_16u, 0, 0, bgr_to_YCrCb4_32f}, - {bgra_to_YCrCb4_8u, 0, bgra_to_YCrCb4_16u, 0, 0, bgra_to_YCrCb4_32f} + {bgr_to_YCrCb4_8u, 0, APPEND_16U(bgr_to_YCrCb4), 0, 0, bgr_to_YCrCb4_32f}, + {bgra_to_YCrCb4_8u, 0, APPEND_16U(bgra_to_YCrCb4), 0, 0, bgra_to_YCrCb4_32f} } }; @@ -581,6 +604,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -593,12 +617,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {YCrCb_to_rgb_8u, 0, YCrCb_to_rgb_16u, 0, 0, YCrCb_to_rgb_32f}, - {YCrCb4_to_rgb_8u, 0, YCrCb4_to_rgb_16u, 0, 0, YCrCb4_to_rgb_32f} + {YCrCb_to_rgb_8u, 0, APPEND_16U(YCrCb_to_rgb), 0, 0, YCrCb_to_rgb_32f}, + {YCrCb4_to_rgb_8u, 0, APPEND_16U(YCrCb4_to_rgb), 0, 0, YCrCb4_to_rgb_32f} }, { - {YCrCb_to_rgba_8u, 0, YCrCb_to_rgba_16u, 0, 0, YCrCb_to_rgba_32f}, - {YCrCb4_to_rgba_8u, 0, YCrCb4_to_rgba_16u, 0, 0, YCrCb4_to_rgba_32f} + {YCrCb_to_rgba_8u, 0, APPEND_16U(YCrCb_to_rgba), 0, 0, YCrCb_to_rgba_32f}, + {YCrCb4_to_rgba_8u, 0, APPEND_16U(YCrCb4_to_rgba), 0, 0, YCrCb4_to_rgba_32f} } }; @@ -607,6 +631,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -619,12 +644,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {YCrCb_to_bgr_8u, 0, YCrCb_to_bgr_16u, 0, 0, YCrCb_to_bgr_32f}, - {YCrCb4_to_bgr_8u, 0, YCrCb4_to_bgr_16u, 0, 0, YCrCb4_to_bgr_32f} + {YCrCb_to_bgr_8u, 0, APPEND_16U(YCrCb_to_bgr), 0, 0, YCrCb_to_bgr_32f}, + {YCrCb4_to_bgr_8u, 0, APPEND_16U(YCrCb4_to_bgr), 0, 0, YCrCb4_to_bgr_32f} }, { - {YCrCb_to_bgra_8u, 0, YCrCb_to_bgra_16u, 0, 0, YCrCb_to_bgra_32f}, - {YCrCb4_to_bgra_8u, 0, YCrCb4_to_bgra_16u, 0, 0, YCrCb4_to_bgra_32f} + {YCrCb_to_bgra_8u, 0, APPEND_16U(YCrCb_to_bgra), 0, 0, YCrCb_to_bgra_32f}, + {YCrCb4_to_bgra_8u, 0, APPEND_16U(YCrCb4_to_bgra), 0, 0, YCrCb4_to_bgra_32f} } }; @@ -633,6 +658,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -645,12 +671,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {rgb_to_xyz_8u, 0, rgb_to_xyz_16u, 0, 0, rgb_to_xyz_32f}, - {rgba_to_xyz_8u, 0, rgba_to_xyz_16u, 0, 0, rgba_to_xyz_32f} + {rgb_to_xyz_8u, 0, APPEND_16U(rgb_to_xyz), 0, 0, rgb_to_xyz_32f}, + {rgba_to_xyz_8u, 0, APPEND_16U(rgba_to_xyz), 0, 0, rgba_to_xyz_32f} }, { - {rgb_to_xyz4_8u, 0, rgb_to_xyz4_16u, 0, 0, rgb_to_xyz4_32f}, - {rgba_to_xyz4_8u, 0, rgba_to_xyz4_16u, 0, 0, rgba_to_xyz4_32f} + {rgb_to_xyz4_8u, 0, APPEND_16U(rgb_to_xyz4), 0, 0, rgb_to_xyz4_32f}, + {rgba_to_xyz4_8u, 0, APPEND_16U(rgba_to_xyz4), 0, 0, rgba_to_xyz4_32f} } }; @@ -659,6 +685,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -671,12 +698,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {bgr_to_xyz_8u, 0, bgr_to_xyz_16u, 0, 0, bgr_to_xyz_32f}, - {bgra_to_xyz_8u, 0, bgra_to_xyz_16u, 0, 0, bgra_to_xyz_32f} + {bgr_to_xyz_8u, 0, APPEND_16U(bgr_to_xyz), 0, 0, bgr_to_xyz_32f}, + {bgra_to_xyz_8u, 0, APPEND_16U(bgra_to_xyz), 0, 0, bgra_to_xyz_32f} }, { - {bgr_to_xyz4_8u, 0, bgr_to_xyz4_16u, 0, 0, bgr_to_xyz4_32f}, - {bgra_to_xyz4_8u, 0, bgra_to_xyz4_16u, 0, 0, bgra_to_xyz4_32f} + {bgr_to_xyz4_8u, 0, APPEND_16U(bgr_to_xyz4), 0, 0, bgr_to_xyz4_32f}, + {bgra_to_xyz4_8u, 0, APPEND_16U(bgra_to_xyz4), 0, 0, bgra_to_xyz4_32f} } }; @@ -685,6 +712,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -697,12 +725,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {xyz_to_rgb_8u, 0, xyz_to_rgb_16u, 0, 0, xyz_to_rgb_32f}, - {xyz4_to_rgb_8u, 0, xyz4_to_rgb_16u, 0, 0, xyz4_to_rgb_32f} + {xyz_to_rgb_8u, 0, APPEND_16U(xyz_to_rgb), 0, 0, xyz_to_rgb_32f}, + {xyz4_to_rgb_8u, 0, APPEND_16U(xyz4_to_rgb), 0, 0, xyz4_to_rgb_32f} }, { - {xyz_to_rgba_8u, 0, xyz_to_rgba_16u, 0, 0, xyz_to_rgba_32f}, - {xyz4_to_rgba_8u, 0, xyz4_to_rgba_16u, 0, 0, xyz4_to_rgba_32f} + {xyz_to_rgba_8u, 0, APPEND_16U(xyz_to_rgba), 0, 0, xyz_to_rgba_32f}, + {xyz4_to_rgba_8u, 0, APPEND_16U(xyz4_to_rgba), 0, 0, xyz4_to_rgba_32f} } }; @@ -711,6 +739,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); @@ -723,12 +752,12 @@ namespace static const gpu_func_t funcs[2][2][6] = { { - {xyz_to_bgr_8u, 0, xyz_to_bgr_16u, 0, 0, xyz_to_bgr_32f}, - {xyz4_to_bgr_8u, 0, xyz4_to_bgr_16u, 0, 0, xyz4_to_bgr_32f} + {xyz_to_bgr_8u, 0, APPEND_16U(xyz_to_bgr), 0, 0, xyz_to_bgr_32f}, + {xyz4_to_bgr_8u, 0, APPEND_16U(xyz4_to_bgr), 0, 0, xyz4_to_bgr_32f} }, { - {xyz_to_bgra_8u, 0, xyz_to_bgra_16u, 0, 0, xyz_to_bgra_32f}, - {xyz4_to_bgra_8u, 0, xyz4_to_bgra_16u, 0, 0, xyz4_to_bgra_32f} + {xyz_to_bgra_8u, 0, APPEND_16U(xyz_to_bgra), 0, 0, xyz_to_bgra_32f}, + {xyz4_to_bgra_8u, 0, APPEND_16U(xyz4_to_bgra), 0, 0, xyz4_to_bgra_32f} } }; @@ -737,6 +766,7 @@ namespace CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); CV_Assert(src.channels() == 3 || src.channels() == 4); CV_Assert(dcn == 3 || dcn == 4); + CV_Assert(funcs[dcn == 4][src.channels() == 4][src.depth()] != 0); dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); diff --git a/modules/gpu/src/cuda/bf_knnmatch.cu b/modules/gpu/src/cuda/bf_knnmatch.cu index 3e5bc741ff..640dafb816 100644 --- a/modules/gpu/src/cuda/bf_knnmatch.cu +++ b/modules/gpu/src/cuda/bf_knnmatch.cu @@ -1168,12 +1168,14 @@ namespace cv { namespace gpu { namespace device matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, WithOutMask(), trainIdx, distance, allDist, stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, @@ -1185,11 +1187,13 @@ namespace cv { namespace gpu { namespace device matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, WithOutMask(), trainIdx, distance, allDist, stream); } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, @@ -1203,10 +1207,12 @@ namespace cv { namespace gpu { namespace device } template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream); +#endif template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, @@ -1218,12 +1224,14 @@ namespace cv { namespace gpu { namespace device match2Dispatcher< L1Dist >(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); +#endif template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, @@ -1235,12 +1243,14 @@ namespace cv { namespace gpu { namespace device match2Dispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, stream); } +#ifndef OPENCV_TINY_GPU_MODULE //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); +#endif template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, @@ -1252,11 +1262,13 @@ namespace cv { namespace gpu { namespace device match2Dispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); //template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream); +#endif } // namespace bf_knnmatch }}} // namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/cuda/bf_match.cu b/modules/gpu/src/cuda/bf_match.cu index c2ae48bb30..baaf979e5d 100644 --- a/modules/gpu/src/cuda/bf_match.cu +++ b/modules/gpu/src/cuda/bf_match.cu @@ -644,12 +644,14 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, @@ -669,11 +671,13 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, @@ -695,10 +699,12 @@ namespace cv { namespace gpu { namespace device } template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, @@ -718,12 +724,14 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, @@ -743,12 +751,14 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& maskCollection, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, @@ -768,11 +778,13 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream); +#endif } // namespace bf_match }}} // namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/cuda/bf_radius_match.cu b/modules/gpu/src/cuda/bf_radius_match.cu index d83f9f7f96..8493b4e065 100644 --- a/modules/gpu/src/cuda/bf_radius_match.cu +++ b/modules/gpu/src/cuda/bf_radius_match.cu @@ -356,12 +356,14 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, @@ -381,11 +383,13 @@ namespace cv { namespace gpu { namespace device } } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, @@ -407,10 +411,12 @@ namespace cv { namespace gpu { namespace device } template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, @@ -421,12 +427,14 @@ namespace cv { namespace gpu { namespace device stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, @@ -437,12 +445,14 @@ namespace cv { namespace gpu { namespace device stream); } +#ifndef OPENCV_TINY_GPU_MODULE //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, @@ -453,11 +463,13 @@ namespace cv { namespace gpu { namespace device stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream); +#endif } // namespace bf_radius_match }}} // namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/cuda/bilateral_filter.cu b/modules/gpu/src/cuda/bilateral_filter.cu index 15e72a8b75..69f58aabd9 100644 --- a/modules/gpu/src/cuda/bilateral_filter.cu +++ b/modules/gpu/src/cuda/bilateral_filter.cu @@ -149,6 +149,16 @@ namespace cv { namespace gpu { namespace device { typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static caller_t funcs[] = + { + bilateral_caller, + bilateral_caller, + 0, + 0, + 0, + }; +#else static caller_t funcs[] = { bilateral_caller, @@ -157,7 +167,13 @@ namespace cv { namespace gpu { namespace device bilateral_caller, bilateral_caller, }; - funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream); +#endif + + const caller_t caller = funcs[borderMode]; + if (!caller) + cv::gpu::error("Unsupported input parameters for bilateral_filter", __FILE__, __LINE__, ""); + + caller(src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream); } } }}} @@ -171,6 +187,7 @@ OCV_INSTANTIATE_BILATERAL_FILTER(uchar) OCV_INSTANTIATE_BILATERAL_FILTER(uchar3) OCV_INSTANTIATE_BILATERAL_FILTER(uchar4) +#ifndef OPENCV_TINY_GPU_MODULE //OCV_INSTANTIATE_BILATERAL_FILTER(schar) //OCV_INSTANTIATE_BILATERAL_FILTER(schar2) //OCV_INSTANTIATE_BILATERAL_FILTER(schar3) @@ -190,6 +207,7 @@ OCV_INSTANTIATE_BILATERAL_FILTER(ushort4) //OCV_INSTANTIATE_BILATERAL_FILTER(int2) //OCV_INSTANTIATE_BILATERAL_FILTER(int3) //OCV_INSTANTIATE_BILATERAL_FILTER(int4) +#endif OCV_INSTANTIATE_BILATERAL_FILTER(float) //OCV_INSTANTIATE_BILATERAL_FILTER(float2) diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu index 5d8f6cbbb5..3ac0c111c8 100644 --- a/modules/gpu/src/cuda/color.cu +++ b/modules/gpu/src/cuda/color.cu @@ -235,10 +235,16 @@ namespace cv { namespace gpu { namespace device #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \ OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits) -#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) +#ifdef OPENCV_TINY_GPU_MODULE + #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) +#else + #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits) \ + OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) +#endif #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \ OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ diff --git a/modules/gpu/src/cuda/column_filter.10.cu b/modules/gpu/src/cuda/column_filter.10.cu index b71e25207e..81e4fe7a0a 100644 --- a/modules/gpu/src/cuda/column_filter.10.cu +++ b/modules/gpu/src/cuda/column_filter.10.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.11.cu b/modules/gpu/src/cuda/column_filter.11.cu index ccfbf8e773..34a065453b 100644 --- a/modules/gpu/src/cuda/column_filter.11.cu +++ b/modules/gpu/src/cuda/column_filter.11.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.12.cu b/modules/gpu/src/cuda/column_filter.12.cu index a38f93b531..bc0a45bc3f 100644 --- a/modules/gpu/src/cuda/column_filter.12.cu +++ b/modules/gpu/src/cuda/column_filter.12.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.13.cu b/modules/gpu/src/cuda/column_filter.13.cu index 40eec7a83f..b7facb6c03 100644 --- a/modules/gpu/src/cuda/column_filter.13.cu +++ b/modules/gpu/src/cuda/column_filter.13.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.14.cu b/modules/gpu/src/cuda/column_filter.14.cu index 08151ac6d0..6db983786b 100644 --- a/modules/gpu/src/cuda/column_filter.14.cu +++ b/modules/gpu/src/cuda/column_filter.14.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.3.cu b/modules/gpu/src/cuda/column_filter.3.cu index 7304565b96..339bdabc68 100644 --- a/modules/gpu/src/cuda/column_filter.3.cu +++ b/modules/gpu/src/cuda/column_filter.3.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.4.cu b/modules/gpu/src/cuda/column_filter.4.cu index 8c9db6985b..37f9bd718e 100644 --- a/modules/gpu/src/cuda/column_filter.4.cu +++ b/modules/gpu/src/cuda/column_filter.4.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.8.cu b/modules/gpu/src/cuda/column_filter.8.cu index 0a63a1dd43..b4ad5bd02e 100644 --- a/modules/gpu/src/cuda/column_filter.8.cu +++ b/modules/gpu/src/cuda/column_filter.8.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.9.cu b/modules/gpu/src/cuda/column_filter.9.cu index 758d9289d9..da64c32225 100644 --- a/modules/gpu/src/cuda/column_filter.9.cu +++ b/modules/gpu/src/cuda/column_filter.9.cu @@ -44,9 +44,13 @@ #include "column_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/column_filter.h b/modules/gpu/src/cuda/column_filter.h index 46e3583153..139a6ef20a 100644 --- a/modules/gpu/src/cuda/column_filter.h +++ b/modules/gpu/src/cuda/column_filter.h @@ -183,6 +183,186 @@ namespace filter { typedef void (*caller_t)(PtrStepSz src, PtrStepSz dst, int anchor, int cc, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[5][33] = + { + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReflect101>, + 0, + column_filter::caller< 5, T, D, BrdColReflect101>, + 0, + column_filter::caller< 7, T, D, BrdColReflect101>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReplicate>, + 0, + column_filter::caller< 5, T, D, BrdColReplicate>, + 0, + column_filter::caller< 7, T, D, BrdColReplicate>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColConstant>, + 0, + column_filter::caller< 5, T, D, BrdColConstant>, + 0, + column_filter::caller< 7, T, D, BrdColConstant>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + column_filter::caller< 3, T, D, BrdColReflect>, + 0, + column_filter::caller< 5, T, D, BrdColReflect>, + 0, + column_filter::caller< 7, T, D, BrdColReflect>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + } + }; +#else static const caller_t callers[5][33] = { { @@ -361,12 +541,17 @@ namespace filter column_filter::caller<32, T, D, BrdColWrap> } }; +#endif + + const caller_t caller = callers[brd_type][ksize]; + if (!caller) + cv::gpu::error("Unsupported input parameters for column_filter", __FILE__, __LINE__, ""); if (stream == 0) cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); else cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); + caller((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } } diff --git a/modules/gpu/src/cuda/copy_make_border.cu b/modules/gpu/src/cuda/copy_make_border.cu index 5553810ebd..ca5a4f779e 100644 --- a/modules/gpu/src/cuda/copy_make_border.cu +++ b/modules/gpu/src/cuda/copy_make_border.cu @@ -90,10 +90,18 @@ namespace cv { namespace gpu { namespace device CopyMakeBorderDispatcher::call, CopyMakeBorderDispatcher::call, CopyMakeBorderDispatcher::call, + #ifdef OPENCV_TINY_GPU_MODULE + 0, + #else CopyMakeBorderDispatcher::call + #endif }; - callers[borderMode](PtrStepSz(src), PtrStepSz(dst), top, left, borderValue, stream); + const caller_t caller = callers[borderMode]; + if (!caller) + cv::gpu::error("Unsupported input parameters for copyMakeBorder", __FILE__, __LINE__, ""); + + caller(PtrStepSz(src), PtrStepSz(dst), top, left, borderValue, stream); } template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); @@ -101,6 +109,7 @@ namespace cv { namespace gpu { namespace device template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream); @@ -120,6 +129,7 @@ namespace cv { namespace gpu { namespace device //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream); +#endif template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream); //template void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream); diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index f606f0c8c1..bca89ad643 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -234,6 +234,7 @@ namespace arithm } template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -278,7 +279,9 @@ namespace arithm //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -288,6 +291,7 @@ namespace arithm //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -329,6 +333,7 @@ namespace arithm } template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -373,7 +378,9 @@ namespace arithm //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -383,6 +390,7 @@ namespace arithm //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -469,6 +477,7 @@ namespace arithm } template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -513,7 +522,9 @@ namespace arithm //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -523,6 +534,7 @@ namespace arithm //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -542,6 +554,7 @@ namespace arithm } template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -586,7 +599,9 @@ namespace arithm //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -596,6 +611,7 @@ namespace arithm //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -700,6 +716,7 @@ namespace arithm } template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -744,7 +761,9 @@ namespace arithm //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -754,6 +773,7 @@ namespace arithm //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -791,6 +811,7 @@ namespace arithm } template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -835,7 +856,9 @@ namespace arithm //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -845,6 +868,7 @@ namespace arithm //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -968,6 +992,7 @@ namespace arithm } template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -1012,7 +1037,9 @@ namespace arithm //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); @@ -1022,6 +1049,7 @@ namespace arithm //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1037,6 +1065,7 @@ namespace arithm } template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1081,7 +1110,9 @@ namespace arithm //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1091,6 +1122,7 @@ namespace arithm //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1128,6 +1160,7 @@ namespace arithm } template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1172,7 +1205,9 @@ namespace arithm //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); @@ -1182,6 +1217,7 @@ namespace arithm //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1278,12 +1314,16 @@ namespace arithm } template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1323,12 +1363,16 @@ namespace arithm } template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1349,13 +1393,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, abs_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1390,13 +1438,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, Sqr(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1417,13 +1469,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, sqrt_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1444,13 +1500,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, log_func(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -1486,13 +1546,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, Exp(), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1620,36 +1684,52 @@ namespace arithm } template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1824,52 +1904,76 @@ namespace arithm } template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////////////////// @@ -1981,19 +2085,25 @@ namespace arithm } template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2067,18 +2177,27 @@ namespace arithm } template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) { transform((PtrStepSz) src1, (PtrStepSz) dst, cv::gpu::device::bind2nd(minimum(), src2), WithOutMask(), stream); } +#ifdef OPENCV_TINY_GPU_MODULE + template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#else template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); @@ -2086,6 +2205,7 @@ namespace arithm template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2159,12 +2279,16 @@ namespace arithm } template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) { @@ -2172,12 +2296,16 @@ namespace arithm } template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2233,12 +2361,16 @@ namespace arithm } template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#endif template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2312,13 +2444,17 @@ namespace arithm transform((PtrStepSz) src, (PtrStepSz) dst, PowOp(power), WithOutMask(), stream); } +#ifndef OPENCV_TINY_GPU_MODULE template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#endif template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#endif } ////////////////////////////////////////////////////////////////////////// @@ -2397,6 +2533,7 @@ namespace arithm } template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2451,9 +2588,10 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2501,9 +2639,10 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2543,9 +2682,10 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2577,9 +2717,10 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2603,15 +2744,18 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif - +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2621,9 +2765,11 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif +#ifndef OPENCV_TINY_GPU_MODULE template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); @@ -2631,6 +2777,7 @@ namespace arithm template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); template void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#endif } #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index 067dfaf640..2a1bca4ad9 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -985,6 +985,16 @@ namespace cv { namespace gpu { namespace device int borderMode, const float* borderValue, cudaStream_t stream) { typedef void (*func_t)(const PtrStepSz srcWhole, int xoff, int yoff, PtrStepSz dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call, + 0 + }; +#else static const func_t funcs[] = { Filter2DCaller::call, @@ -993,19 +1003,26 @@ namespace cv { namespace gpu { namespace device Filter2DCaller::call, Filter2DCaller::call }; +#endif + + const func_t func = funcs[borderMode]; + if (!func) + cv::gpu::error("Unsupported input parameters for filter2D", __FILE__, __LINE__, ""); if (stream == 0) cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); else cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - funcs[borderMode](static_cast< PtrStepSz >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); + func(static_cast< PtrStepSz >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); } template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); +#endif template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); } // namespace imgproc diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 745daca1db..15d6612832 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -462,6 +462,7 @@ namespace sum } template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -485,8 +486,10 @@ namespace sum template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -495,6 +498,7 @@ namespace sum template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask) @@ -504,6 +508,7 @@ namespace sum } template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -527,8 +532,10 @@ namespace sum template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -537,6 +544,7 @@ namespace sum template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template struct Sqr : unary_function { @@ -553,6 +561,7 @@ namespace sum } template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -576,8 +585,10 @@ namespace sum template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -586,6 +597,7 @@ namespace sum template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif } ///////////////////////////////////////////////////////////// @@ -773,12 +785,16 @@ namespace minMax } template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#endif template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#endif } ///////////////////////////////////////////////////////////// @@ -955,12 +971,16 @@ namespace minMaxLoc } template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#endif template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#endif } ///////////////////////////////////////////////////////////// @@ -1079,12 +1099,16 @@ namespace countNonZero } template int run(const PtrStepSzb src, PtrStep buf); +#ifndef OPENCV_TINY_GPU_MODULE template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); +#endif template int run(const PtrStepSzb src, PtrStep buf); +#ifndef OPENCV_TINY_GPU_MODULE template int run(const PtrStepSzb src, PtrStep buf); +#endif } ////////////////////////////////////////////////////////////////////////////// @@ -1257,6 +1281,11 @@ namespace reduce funcs[op]((PtrStepSz) src, (D*) dst, stream); } +#ifdef OPENCV_TINY_GPU_MODULE + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#else template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); @@ -1280,6 +1309,7 @@ namespace reduce template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#endif /////////////////////////////////////////////////////////// @@ -1338,6 +1368,11 @@ namespace reduce funcs[cn][op](src, dst, stream); } +#ifdef OPENCV_TINY_GPU_MODULE + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#else template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); @@ -1361,6 +1396,7 @@ namespace reduce template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#endif } #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/pyr_down.cu b/modules/gpu/src/cuda/pyr_down.cu index eac7928826..af0e18d888 100644 --- a/modules/gpu/src/cuda/pyr_down.cu +++ b/modules/gpu/src/cuda/pyr_down.cu @@ -197,6 +197,7 @@ namespace cv { namespace gpu { namespace device template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); @@ -216,6 +217,7 @@ namespace cv { namespace gpu { namespace device //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); diff --git a/modules/gpu/src/cuda/pyr_up.cu b/modules/gpu/src/cuda/pyr_up.cu index b14d124e7c..ffb6276622 100644 --- a/modules/gpu/src/cuda/pyr_up.cu +++ b/modules/gpu/src/cuda/pyr_up.cu @@ -166,6 +166,7 @@ namespace cv { namespace gpu { namespace device template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); @@ -185,6 +186,7 @@ namespace cv { namespace gpu { namespace device //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#endif template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); diff --git a/modules/gpu/src/cuda/remap.cu b/modules/gpu/src/cuda/remap.cu index f40ada0302..77bf976140 100644 --- a/modules/gpu/src/cuda/remap.cu +++ b/modules/gpu/src/cuda/remap.cu @@ -209,6 +209,7 @@ namespace cv { namespace gpu { namespace device typedef void (*caller_t)(PtrStepSz src, PtrStepSz srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSz dst, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE static const caller_t callers[3][5] = { { @@ -216,25 +217,55 @@ namespace cv { namespace gpu { namespace device RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, - RemapDispatcher::call + 0/*RemapDispatcher::call*/, }, { RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, - RemapDispatcher::call + 0/*RemapDispatcher::call*/, + }, + { + 0/*RemapDispatcher::call*/, + 0/*RemapDispatcher::call*/, + 0/*RemapDispatcher::call*/, + 0/*RemapDispatcher::call*/, + 0/*RemapDispatcher::call*/, + } + }; +#else + static const caller_t callers[3][5] = + { + { + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + }, + { + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, + RemapDispatcher::call, }, { RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, RemapDispatcher::call, - RemapDispatcher::call + RemapDispatcher::call, } }; +#endif - callers[interpolation][borderMode](static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), xoff, yoff, xmap, ymap, + const caller_t caller = callers[interpolation][borderMode]; + if (!caller) + cv::gpu::error("Unsupported input parameters for remap", __FILE__, __LINE__, ""); + + caller(static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), xoff, yoff, xmap, ymap, static_cast< PtrStepSz >(dst), borderValue, stream, cc20); } @@ -243,6 +274,7 @@ namespace cv { namespace gpu { namespace device template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifndef OPENCV_TINY_GPU_MODULE //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); @@ -262,6 +294,7 @@ namespace cv { namespace gpu { namespace device //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#endif template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index 1998b3b07c..11a90ab243 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -342,11 +342,13 @@ namespace cv { namespace gpu { namespace device template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; +#ifndef OPENCV_TINY_GPU_MODULE template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; +#endif template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; template <> struct ResizeNearestDispatcher : SelectImplForNearest {}; @@ -380,11 +382,13 @@ namespace cv { namespace gpu { namespace device template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; +#ifndef OPENCV_TINY_GPU_MODULE template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; +#endif template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; template <> struct ResizeLinearDispatcher : SelectImplForLinear {}; @@ -410,6 +414,7 @@ namespace cv { namespace gpu { namespace device } }; +#ifndef OPENCV_TINY_GPU_MODULE template <> struct ResizeCubicDispatcher : SelectImplForCubic {}; template <> struct ResizeCubicDispatcher : SelectImplForCubic {}; @@ -421,6 +426,7 @@ namespace cv { namespace gpu { namespace device template <> struct ResizeCubicDispatcher : SelectImplForCubic {}; template <> struct ResizeCubicDispatcher : SelectImplForCubic {}; +#endif // ResizeAreaDispatcher @@ -467,7 +473,11 @@ namespace cv { namespace gpu { namespace device { ResizeNearestDispatcher::call, ResizeLinearDispatcher::call, +#ifdef OPENCV_TINY_GPU_MODULE + 0, +#else ResizeCubicDispatcher::call, +#endif ResizeAreaDispatcher::call }; @@ -475,13 +485,18 @@ namespace cv { namespace gpu { namespace device if (interpolation == 3 && (fx <= 1.f || fy <= 1.f)) interpolation = 1; - funcs[interpolation](static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), yoff, xoff, static_cast< PtrStepSz >(dst), fy, fx, stream); + const func_t func = funcs[interpolation]; + if (!func) + cv::gpu::error("Unsupported input parameters for resize", __FILE__, __LINE__, ""); + + func(static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), yoff, xoff, static_cast< PtrStepSz >(dst), fy, fx, stream); } template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); +#ifndef OPENCV_TINY_GPU_MODULE template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); @@ -489,6 +504,7 @@ namespace cv { namespace gpu { namespace device template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); +#endif template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); template void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); diff --git a/modules/gpu/src/cuda/row_filter.10.cu b/modules/gpu/src/cuda/row_filter.10.cu index 7d93ee31ac..c910270a64 100644 --- a/modules/gpu/src/cuda/row_filter.10.cu +++ b/modules/gpu/src/cuda/row_filter.10.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.11.cu b/modules/gpu/src/cuda/row_filter.11.cu index 31bccc48b6..c5e1fbcd96 100644 --- a/modules/gpu/src/cuda/row_filter.11.cu +++ b/modules/gpu/src/cuda/row_filter.11.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.12.cu b/modules/gpu/src/cuda/row_filter.12.cu index 7be543f6b2..017aff8e7f 100644 --- a/modules/gpu/src/cuda/row_filter.12.cu +++ b/modules/gpu/src/cuda/row_filter.12.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.13.cu b/modules/gpu/src/cuda/row_filter.13.cu index bd700b1bb2..676f5ae826 100644 --- a/modules/gpu/src/cuda/row_filter.13.cu +++ b/modules/gpu/src/cuda/row_filter.13.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.14.cu b/modules/gpu/src/cuda/row_filter.14.cu index 97df2f128a..e8d0ec501a 100644 --- a/modules/gpu/src/cuda/row_filter.14.cu +++ b/modules/gpu/src/cuda/row_filter.14.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.3.cu b/modules/gpu/src/cuda/row_filter.3.cu index fe84666954..57013781c5 100644 --- a/modules/gpu/src/cuda/row_filter.3.cu +++ b/modules/gpu/src/cuda/row_filter.3.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.4.cu b/modules/gpu/src/cuda/row_filter.4.cu index 050f7af04e..277ab7f87d 100644 --- a/modules/gpu/src/cuda/row_filter.4.cu +++ b/modules/gpu/src/cuda/row_filter.4.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.8.cu b/modules/gpu/src/cuda/row_filter.8.cu index b899e87a7a..e9dfd7f4a3 100644 --- a/modules/gpu/src/cuda/row_filter.8.cu +++ b/modules/gpu/src/cuda/row_filter.8.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.9.cu b/modules/gpu/src/cuda/row_filter.9.cu index 516dd8fe7c..eaad54d343 100644 --- a/modules/gpu/src/cuda/row_filter.9.cu +++ b/modules/gpu/src/cuda/row_filter.9.cu @@ -44,9 +44,13 @@ #include "row_filter.h" +#ifndef OPENCV_TINY_GPU_MODULE + namespace filter { template void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } +#endif + #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/row_filter.h b/modules/gpu/src/cuda/row_filter.h index 933f900293..9bfaf7f3d8 100644 --- a/modules/gpu/src/cuda/row_filter.h +++ b/modules/gpu/src/cuda/row_filter.h @@ -182,6 +182,186 @@ namespace filter { typedef void (*caller_t)(PtrStepSz src, PtrStepSz dst, int anchor, int cc, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[5][33] = + { + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReflect101>, + 0, + row_filter::caller< 5, T, D, BrdRowReflect101>, + 0, + row_filter::caller< 7, T, D, BrdRowReflect101>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReplicate>, + 0, + row_filter::caller< 5, T, D, BrdRowReplicate>, + 0, + row_filter::caller< 7, T, D, BrdRowReplicate>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowConstant>, + 0, + row_filter::caller< 5, T, D, BrdRowConstant>, + 0, + row_filter::caller< 7, T, D, BrdRowConstant>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + row_filter::caller< 3, T, D, BrdRowReflect>, + 0, + row_filter::caller< 5, T, D, BrdRowReflect>, + 0, + row_filter::caller< 7, T, D, BrdRowReflect>, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + }, + { + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + } + }; +#else static const caller_t callers[5][33] = { { @@ -360,12 +540,17 @@ namespace filter row_filter::caller<32, T, D, BrdRowWrap> } }; +#endif + + const caller_t caller = callers[brd_type][ksize]; + if (!caller) + cv::gpu::error("Unsupported input parameters for row_filter", __FILE__, __LINE__, ""); if (stream == 0) cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); else cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); + caller((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } } diff --git a/modules/gpu/src/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu index ad256357b8..6082e6c3cc 100644 --- a/modules/gpu/src/cuda/stereobm.cu +++ b/modules/gpu/src/cuda/stereobm.cu @@ -330,24 +330,55 @@ namespace cv { namespace gpu { namespace device typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream); +#ifdef OPENCV_TINY_GPU_MODULE + const static kernel_caller_t callers[] = + { + 0, + kernel_caller< 1>, + kernel_caller< 2>, + kernel_caller< 3>, + kernel_caller< 4>, + kernel_caller< 5>, + 0/*kernel_caller< 6>*/, + 0/*kernel_caller< 7>*/, + 0/*kernel_caller< 8>*/, + kernel_caller< 9>, + 0/*kernel_caller<10>*/, + 0/*kernel_caller<11>*/, + 0/*kernel_caller<12>*/, + 0/*kernel_caller<13>*/, + 0/*kernel_caller<14>*/, + kernel_caller<15>, + 0/*kernel_caller<16>*/, + 0/*kernel_caller<17>*/, + 0/*kernel_caller<18>*/, + 0/*kernel_caller<19>*/, + 0/*kernel_caller<20>*/, + 0/*kernel_caller<21>*/, + 0/*kernel_caller<22>*/, + 0/*kernel_caller<23>*/, + 0/*kernel_caller<24>*/, + 0/*kernel_caller<25>*/, + }; +#else const static kernel_caller_t callers[] = { 0, kernel_caller< 1>, kernel_caller< 2>, kernel_caller< 3>, kernel_caller< 4>, kernel_caller< 5>, kernel_caller< 6>, kernel_caller< 7>, kernel_caller< 8>, kernel_caller< 9>, kernel_caller<10>, - kernel_caller<11>, kernel_caller<12>, kernel_caller<13>, kernel_caller<15>, kernel_caller<15>, + kernel_caller<11>, kernel_caller<12>, kernel_caller<13>, kernel_caller<14>, kernel_caller<15>, kernel_caller<16>, kernel_caller<17>, kernel_caller<18>, kernel_caller<19>, kernel_caller<20>, kernel_caller<21>, kernel_caller<22>, kernel_caller<23>, kernel_caller<24>, kernel_caller<25> - - //0,0,0, 0,0,0, 0,0,kernel_caller<9> }; +#endif + const int calles_num = sizeof(callers)/sizeof(callers[0]); void stereoBM_GPU(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, int winsz, const PtrStepSz& minSSD_buf, cudaStream_t& stream) { int winsz2 = winsz >> 1; - if (winsz2 == 0 || winsz2 >= calles_num) + if (winsz2 == 0 || winsz2 >= calles_num || callers[winsz2] == 0) cv::gpu::error("Unsupported window size", __FILE__, __LINE__, "stereoBM_GPU"); //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) ); diff --git a/modules/gpu/src/cuda/warp.cu b/modules/gpu/src/cuda/warp.cu index 49130d9405..ad867601ed 100644 --- a/modules/gpu/src/cuda/warp.cu +++ b/modules/gpu/src/cuda/warp.cu @@ -278,6 +278,7 @@ namespace cv { namespace gpu { namespace device { typedef void (*func_t)(PtrStepSz src, PtrStepSz srcWhole, int xoff, int yoff, PtrStepSz dst, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[3][5] = { { @@ -285,25 +286,55 @@ namespace cv { namespace gpu { namespace device WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, - WarpDispatcher::call + 0/*WarpDispatcher::call*/, }, { WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, - WarpDispatcher::call + 0/*WarpDispatcher::call*/, + }, + { + 0/*WarpDispatcher::call*/, + 0/*WarpDispatcher::call*/, + 0/*WarpDispatcher::call*/, + 0/*WarpDispatcher::call*/, + 0/*WarpDispatcher::call*/, + } + }; +#else + static const func_t funcs[3][5] = + { + { + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + }, + { + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, + WarpDispatcher::call, }, { WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, WarpDispatcher::call, - WarpDispatcher::call + WarpDispatcher::call, } }; +#endif - funcs[interpolation][borderMode](static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), xoff, yoff, + const func_t func = funcs[interpolation][borderMode]; + if (!func) + cv::gpu::error("Unsupported input parameters for warp_caller", __FILE__, __LINE__, ""); + + func(static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(srcWhole), xoff, yoff, static_cast< PtrStepSz >(dst), borderValue, stream, cc20); } @@ -320,6 +351,7 @@ namespace cv { namespace gpu { namespace device template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifndef OPENCV_TINY_GPU_MODULE //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); @@ -339,6 +371,7 @@ namespace cv { namespace gpu { namespace device //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#endif template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); @@ -358,6 +391,7 @@ namespace cv { namespace gpu { namespace device template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifndef OPENCV_TINY_GPU_MODULE //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); @@ -377,6 +411,7 @@ namespace cv { namespace gpu { namespace device //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#endif template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); //template void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); diff --git a/modules/gpu/src/cvt_color_internal.h b/modules/gpu/src/cvt_color_internal.h index 1b7c68f35f..f108da827f 100644 --- a/modules/gpu/src/cvt_color_internal.h +++ b/modules/gpu/src/cvt_color_internal.h @@ -48,10 +48,16 @@ namespace cv { namespace gpu { namespace device #define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \ void name(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -#define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \ - OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \ - OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _16u) \ - OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f) +#ifdef OPENCV_TINY_GPU_MODULE + #define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f) +#else + #define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _16u) \ + OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f) +#endif #define OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(name) \ OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \ diff --git a/modules/gpu/src/denoising.cpp b/modules/gpu/src/denoising.cpp index 50fab0cc2e..3eb74a92e6 100644 --- a/modules/gpu/src/denoising.cpp +++ b/modules/gpu/src/denoising.cpp @@ -77,6 +77,17 @@ void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, f typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {bilateral_filter_gpu , 0 /*bilateral_filter_gpu*/ , bilateral_filter_gpu , bilateral_filter_gpu }, + {0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ }, + {0 /*bilateral_filter_gpu*/, 0 /*bilateral_filter_gpu*/, 0 /*bilateral_filter_gpu*/, 0 /*bilateral_filter_gpu*/}, + {0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ }, + {0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ }, + {bilateral_filter_gpu , 0 /*bilateral_filter_gpu*/ , bilateral_filter_gpu , bilateral_filter_gpu } + }; +#else static const func_t funcs[6][4] = { {bilateral_filter_gpu , 0 /*bilateral_filter_gpu*/ , bilateral_filter_gpu , bilateral_filter_gpu }, @@ -86,6 +97,7 @@ void cv::gpu::bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, f {0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ , 0 /*bilateral_filter_gpu*/ }, {bilateral_filter_gpu , 0 /*bilateral_filter_gpu*/ , bilateral_filter_gpu , bilateral_filter_gpu } }; +#endif sigma_color = (sigma_color <= 0 ) ? 1 : sigma_color; sigma_spatial = (sigma_spatial <= 0 ) ? 1 : sigma_spatial; diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 354d614d42..780745d733 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -275,6 +275,75 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + addMat, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + addMat, + 0/*addMat*/, + }, + { + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0 /*addMat*/, + 0/*addMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -284,7 +353,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { addMat, @@ -293,7 +362,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -302,7 +371,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -311,7 +380,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu addMat, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -320,7 +389,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, addMat, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -329,7 +398,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, 0 /*addMat*/, addMat, - addMat + addMat, }, { 0 /*addMat*/, @@ -338,9 +407,10 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu 0 /*addMat*/, 0 /*addMat*/, 0 /*addMat*/, - addMat + addMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -421,6 +491,75 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + addScalar, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + addScalar, + 0/*addScalar*/, + }, + { + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0 /*addScalar*/, + 0/*addScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -430,7 +569,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { addScalar, @@ -439,7 +578,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -448,7 +587,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -457,7 +596,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat addScalar, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -466,7 +605,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, addScalar, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -475,7 +614,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, 0 /*addScalar*/, addScalar, - addScalar + addScalar, }, { 0 /*addScalar*/, @@ -484,9 +623,10 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat 0 /*addScalar*/, 0 /*addScalar*/, 0 /*addScalar*/, - addScalar + addScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -555,6 +695,75 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + subMat, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + subMat, + 0/*subMat*/, + }, + { + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0 /*subMat*/, + 0/*subMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -564,7 +773,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { subMat, @@ -573,7 +782,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -582,7 +791,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -591,7 +800,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons subMat, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -600,7 +809,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, subMat, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -609,7 +818,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, 0 /*subMat*/, subMat, - subMat + subMat, }, { 0 /*subMat*/, @@ -618,9 +827,10 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons 0 /*subMat*/, 0 /*subMat*/, 0 /*subMat*/, - subMat + subMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -701,6 +911,75 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + subScalar, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + subScalar, + 0/*subScalar*/, + }, + { + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0 /*subScalar*/, + 0/*subScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -710,7 +989,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { subScalar, @@ -719,7 +998,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -728,7 +1007,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -737,7 +1016,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G subScalar, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -746,7 +1025,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, subScalar, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -755,7 +1034,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, 0 /*subScalar*/, subScalar, - subScalar + subScalar, }, { 0 /*subScalar*/, @@ -764,9 +1043,10 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G 0 /*subScalar*/, 0 /*subScalar*/, 0 /*subScalar*/, - subScalar + subScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -856,6 +1136,75 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub else { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + mulMat, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + mulMat, + 0/*mulMat*/, + }, + { + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0 /*mulMat*/, + 0/*mulMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -865,7 +1214,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { mulMat, @@ -874,7 +1223,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -883,7 +1232,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -892,7 +1241,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub mulMat, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -901,7 +1250,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, mulMat, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -910,7 +1259,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, 0 /*mulMat*/, mulMat, - mulMat + mulMat, }, { 0 /*mulMat*/, @@ -919,9 +1268,10 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub 0 /*mulMat*/, 0 /*mulMat*/, 0 /*mulMat*/, - mulMat + mulMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -965,6 +1315,75 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + mulScalar, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + mulScalar, + 0/*mulScalar*/, + }, + { + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0 /*mulScalar*/, + 0/*mulScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -974,7 +1393,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { mulScalar, @@ -983,7 +1402,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -992,7 +1411,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1001,7 +1420,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double mulScalar, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1010,7 +1429,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, mulScalar, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1019,7 +1438,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, 0 /*mulScalar*/, mulScalar, - mulScalar + mulScalar, }, { 0 /*mulScalar*/, @@ -1028,9 +1447,10 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double 0 /*mulScalar*/, 0 /*mulScalar*/, 0 /*mulScalar*/, - mulScalar + mulScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -1121,6 +1541,75 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double else { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divMat, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + divMat, + 0/*divMat*/, + }, + { + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0 /*divMat*/, + 0/*divMat*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1130,7 +1619,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { divMat, @@ -1139,7 +1628,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1148,7 +1637,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1157,7 +1646,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double divMat, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1166,7 +1655,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, divMat, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1175,7 +1664,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, 0 /*divMat*/, divMat, - divMat + divMat, }, { 0 /*divMat*/, @@ -1184,9 +1673,10 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double 0 /*divMat*/, 0 /*divMat*/, 0 /*divMat*/, - divMat + divMat, } }; +#endif if (dtype < 0) dtype = src1.depth(); @@ -1230,6 +1720,75 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divScalar, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + divScalar, + 0/*divScalar*/, + }, + { + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0 /*divScalar*/, + 0/*divScalar*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1239,7 +1798,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { divScalar, @@ -1248,7 +1807,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1257,7 +1816,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1266,7 +1825,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc divScalar, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1275,7 +1834,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, divScalar, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1284,7 +1843,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, 0 /*divScalar*/, divScalar, - divScalar + divScalar, }, { 0 /*divScalar*/, @@ -1293,9 +1852,10 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc 0 /*divScalar*/, 0 /*divScalar*/, 0 /*divScalar*/, - divScalar + divScalar, } }; +#endif typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = @@ -1359,6 +1919,75 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + divInv, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + divInv, + 0/*divInv*/, + }, + { + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0 /*divInv*/, + 0/*divInv*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -1368,7 +1997,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { divInv, @@ -1377,7 +2006,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1386,7 +2015,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1395,7 +2024,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St divInv, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1404,7 +2033,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, divInv, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1413,7 +2042,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, 0 /*divInv*/, divInv, - divInv + divInv, }, { 0 /*divInv*/, @@ -1422,9 +2051,10 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St 0 /*divInv*/, 0 /*divInv*/, 0 /*divInv*/, - divInv + divInv, } }; +#endif if (dtype < 0) dtype = src.depth(); @@ -1471,6 +2101,19 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + absDiffMat, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + 0/*absDiffMat*/, + absDiffMat, + 0/*absDiffMat*/, + }; +#else static const func_t funcs[] = { absDiffMat, @@ -1479,8 +2122,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea absDiffMat, absDiffMat, absDiffMat, - absDiffMat + absDiffMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -1556,6 +2200,19 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + absDiffScalar, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + 0/*absDiffScalar*/, + absDiffScalar, + 0/*absDiffScalar*/, + }; +#else static const func_t funcs[] = { absDiffScalar, @@ -1564,8 +2221,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea absDiffScalar, absDiffScalar, absDiffScalar, - absDiffScalar + absDiffScalar, }; +#endif const int depth = src1.depth(); @@ -1578,9 +2236,13 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src1.size(), src1.type()); - funcs[depth](src1, src2.val[0], dst, StreamAccessor::getStream(stream)); + func(src1, src2.val[0], dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1597,6 +2259,19 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + 0/*absMat*/, + absMat, + 0/*absMat*/, + }; +#else static const func_t funcs[] = { absMat, @@ -1605,8 +2280,9 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) absMat, absMat, absMat, - absMat + absMat, }; +#endif const int depth = src.depth(); @@ -1619,9 +2295,13 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1638,6 +2318,19 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + 0/*sqrMat*/, + sqrMat, + 0/*sqrMat*/, + }; +#else static const func_t funcs[] = { sqrMat, @@ -1646,8 +2339,9 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) sqrMat, sqrMat, sqrMat, - sqrMat + sqrMat, }; +#endif const int depth = src.depth(); @@ -1660,9 +2354,13 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1679,6 +2377,19 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + 0/*sqrtMat*/, + sqrtMat, + 0/*sqrtMat*/, + }; +#else static const func_t funcs[] = { sqrtMat, @@ -1687,8 +2398,9 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) sqrtMat, sqrtMat, sqrtMat, - sqrtMat + sqrtMat, }; +#endif const int depth = src.depth(); @@ -1701,9 +2413,13 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -1720,6 +2436,19 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + 0/*logMat*/, + logMat, + 0/*logMat*/, + }; +#else static const func_t funcs[] = { logMat, @@ -1728,8 +2457,9 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) logMat, logMat, logMat, - logMat + logMat, }; +#endif const int depth = src.depth(); @@ -1742,9 +2472,13 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -1761,6 +2495,19 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + 0/*expMat*/, + expMat, + 0/*expMat*/, + }; +#else static const func_t funcs[] = { expMat, @@ -1769,8 +2516,9 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) expMat, expMat, expMat, - expMat + expMat, }; +#endif const int depth = src.depth(); @@ -1783,9 +2531,13 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth](src, dst, StreamAccessor::getStream(stream)); + func(src, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -1809,6 +2561,19 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][4] = + { + {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, + {0, 0, 0, 0}, + }; +#else static const func_t funcs[7][4] = { {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, @@ -1819,6 +2584,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe }, {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe } }; +#endif typedef void (*func_v4_t)(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); static const func_v4_t funcs_v4[] = @@ -1839,10 +2605,6 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); - static const int codes[] = { 0, 2, 3, 2, 3, 1 @@ -1857,6 +2619,15 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c }; const int code = codes[cmpop]; + + const func_t func = funcs[depth][code]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); + + cudaStream_t stream = StreamAccessor::getStream(s); + PtrStepSzb src1_(src1.rows, src1.cols * cn, psrc1[cmpop]->data, psrc1[cmpop]->step); PtrStepSzb src2_(src1.rows, src1.cols * cn, psrc2[cmpop]->data, psrc2[cmpop]->step); PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); @@ -1882,8 +2653,6 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c } } - const func_t func = funcs[depth][code]; - func(src1_, src2_, dst_, stream); } @@ -1913,6 +2682,31 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre using namespace arithm; typedef void (*func_t)(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); + typedef void (*cast_func_t)(Scalar& sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][6] = + { + {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, + {0, 0, 0, 0, 0, 0}, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + 0/*castScalar*/, + }; +#else static const func_t funcs[7][6] = { {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe }, @@ -1924,11 +2718,11 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre {cmpScalarEq , cmpScalarGt , cmpScalarGe , cmpScalarLt , cmpScalarLe , cmpScalarNe } }; - typedef void (*cast_func_t)(Scalar& sc); static const cast_func_t cast_func[] = { castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -1943,11 +2737,15 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth][cmpop]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), CV_MAKE_TYPE(CV_8U, cn)); cast_func[depth](sc); - funcs[depth][cmpop](src, cn, sc.val, dst, StreamAccessor::getStream(stream)); + func(src, cn, sc.val, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -2391,14 +3189,56 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } }; +#else + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarAnd >::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } + }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2406,9 +3246,13 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) @@ -2416,14 +3260,56 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } }; +#else + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarOr >::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } + }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2431,9 +3317,13 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) @@ -2441,14 +3331,56 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE static const func_t funcs[5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, + { + BitScalar >::call, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + }, {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + 0/*BitScalar >::call*/, + 0, + 0/*NppBitwiseC::call*/, + 0/*NppBitwiseC::call*/, + } }; +#else + static const func_t funcs[5][4] = + { + { + BitScalar >::call, + 0, + NppBitwiseC::call, + BitScalar4< bitScalarXor >::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + }, + {0,0,0,0}, + { + BitScalar >::call, + 0, + NppBitwiseC::call, + NppBitwiseC::call + } + }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2456,9 +3388,13 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + const func_t func = funcs[depth][cn - 1]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + dst.create(src.size(), src.type()); - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + func(src, sc, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// @@ -2578,6 +3514,19 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + minMat, + 0/*minMat*/, + 0/*minMat*/, + 0/*minMat*/, + 0/*minMat*/, + minMat, + 0/*minMat*/, + }; +#else static const func_t funcs[] = { minMat, @@ -2586,8 +3535,9 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s minMat, minMat, minMat, - minMat + minMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -2657,6 +3607,19 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + maxMat, + 0/*maxMat*/, + 0/*maxMat*/, + 0/*maxMat*/, + 0/*maxMat*/, + maxMat, + 0/*maxMat*/, + }; +#else static const func_t funcs[] = { maxMat, @@ -2665,8 +3628,9 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s maxMat, maxMat, maxMat, - maxMat + maxMat, }; +#endif const int depth = src1.depth(); const int cn = src1.channels(); @@ -2744,6 +3708,31 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + typedef double (*cast_func_t)(double sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + minScalar, + 0/*minScalar*/, + 0/*minScalar*/, + 0/*minScalar*/, + minScalar, + minScalar, + 0/*minScalar*/, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + castScalar, + 0/*castScalar*/, + }; +#else static const func_t funcs[] = { minScalar, @@ -2752,14 +3741,20 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) minScalar, minScalar, minScalar, - minScalar + minScalar, }; - typedef double (*cast_func_t)(double sc); static const cast_func_t cast_func[] = { - castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, }; +#endif const int depth = src.depth(); @@ -2772,9 +3767,13 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + func(src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) @@ -2782,6 +3781,31 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); + typedef double (*cast_func_t)(double sc); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + maxScalar, + 0/*maxScalar*/, + 0/*maxScalar*/, + 0/*maxScalar*/, + 0/*maxScalar*/, + maxScalar, + 0/*maxScalar*/, + }; + + static const cast_func_t cast_func[] = + { + castScalar, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + 0/*castScalar*/, + castScalar, + 0/*castScalar*/ + }; +#else static const func_t funcs[] = { maxScalar, @@ -2790,14 +3814,20 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) maxScalar, maxScalar, maxScalar, - maxScalar + maxScalar, }; - typedef double (*cast_func_t)(double sc); static const cast_func_t cast_func[] = { - castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, + castScalar, }; +#endif const int depth = src.depth(); @@ -2810,9 +3840,13 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + func(src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -2858,6 +3892,18 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double else { typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + arithm::threshold, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + 0/*arithm::threshold*/, + arithm::threshold, + 0/*arithm::threshold*/ + }; +#else static const func_t funcs[] = { arithm::threshold, @@ -2868,6 +3914,11 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double arithm::threshold, arithm::threshold }; +#endif + + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); if (depth != CV_32F && depth != CV_64F) { @@ -2875,7 +3926,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal = cvRound(maxVal); } - funcs[depth](src, dst, thresh, maxVal, type, stream); + func(src, dst, thresh, maxVal, type, stream); } return thresh; @@ -2892,6 +3943,18 @@ namespace arithm void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) { typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + 0/*arithm::pow*/, + arithm::pow, + 0/*arithm::pow*/, + }; +#else static const func_t funcs[] = { arithm::pow, @@ -2902,6 +3965,7 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) arithm::pow, arithm::pow }; +#endif const int depth = src.depth(); const int cn = src.channels(); @@ -2914,12 +3978,16 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + const func_t func = funcs[depth]; + if (!func) + CV_Error(CV_StsNotImplemented, "not available in tiny build"); + dst.create(src.size(), src.type()); PtrStepSzb src_(src.rows, src.cols * cn, src.data, src.step); PtrStepSzb dst_(src.rows, src.cols * cn, dst.data, dst.step); - funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream)); + func(src_, power, dst_, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// @@ -3007,6 +4075,466 @@ namespace arithm void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int ddepth, Stream& stream) { typedef void (*func_t)(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7][7] = + { + { + { + arithm::addWeighted, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/ + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + arithm::addWeighted, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + }, + { + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + }, + { + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + 0/*arithm::addWeighted*/, + } + } + }; +#else static const func_t funcs[7][7][7] = { { @@ -3465,6 +4993,7 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, } } }; +#endif int sdepth1 = src1.depth(); int sdepth2 = src2.depth(); diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp index 8905eaed64..c7fd61a9c9 100644 --- a/modules/gpu/src/filtering.cpp +++ b/modules/gpu/src/filtering.cpp @@ -789,12 +789,14 @@ Ptr cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const case CV_8UC4: func = filter2D_gpu; break; +#ifndef OPENCV_TINY_GPU_MODULE case CV_16UC1: func = filter2D_gpu; break; case CV_16UC4: func = filter2D_gpu; break; +#endif case CV_32FC1: func = filter2D_gpu; break; @@ -893,6 +895,18 @@ namespace Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType) { +#ifdef OPENCV_TINY_GPU_MODULE + static const gpuFilter1D_t funcs[7][4] = + { + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {0, 0, 0, 0} + }; +#else static const gpuFilter1D_t funcs[7][4] = { {filter::linearRow, 0, filter::linearRow, filter::linearRow}, @@ -903,6 +917,7 @@ Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, {filter::linearRow, 0, filter::linearRow, filter::linearRow}, {0, 0, 0, 0} }; +#endif static const nppFilter1D_t npp_funcs[] = { 0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R @@ -998,6 +1013,18 @@ namespace Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType) { +#ifdef OPENCV_TINY_GPU_MODULE + static const gpuFilter1D_t funcs[7][4] = + { + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {0, 0, 0, 0} + }; +#else static const gpuFilter1D_t funcs[7][4] = { {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, @@ -1008,6 +1035,7 @@ Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, {0, 0, 0, 0} }; +#endif static const nppFilter1D_t npp_funcs[] = { 0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 66f838f77a..c0dfc44d13 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -336,6 +336,17 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom #endif { typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const caller_t callers[6][4] = + { + { copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/, copyMakeBorder_caller , copyMakeBorder_caller} + }; +#else static const caller_t callers[6][4] = { { copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller , copyMakeBorder_caller}, @@ -345,6 +356,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom {0/*copyMakeBorder_caller*/, 0/*copyMakeBorder_caller*/ , 0/*copyMakeBorder_caller*/, 0/*copyMakeBorder_caller*/}, { copyMakeBorder_caller , 0/*copyMakeBorder_caller*/ , copyMakeBorder_caller , copyMakeBorder_caller} }; +#endif caller_t func = callers[src.depth()][src.channels() - 1]; CV_Assert(func != 0); diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 4e09246e78..c22790e35d 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -261,6 +261,18 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::run, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::run, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run}, @@ -271,6 +283,7 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run}, {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -286,6 +299,8 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -307,6 +322,18 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::runAbs, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::runAbs, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs}, @@ -317,6 +344,7 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs}, {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -332,6 +360,8 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -353,6 +383,18 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::runSqr, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::runSqr, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr}, @@ -363,6 +405,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr}, {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -378,6 +421,8 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -405,6 +450,18 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::minMax::run, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + ::minMax::run, + 0/*::minMax::run*/, + }; +#else static const func_t funcs[] = { ::minMax::run, @@ -413,8 +470,9 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ::minMax::run, ::minMax::run, ::minMax::run, - ::minMax::run + ::minMax::run, }; +#endif CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); @@ -430,6 +488,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ensureSizeIsEnough(buf_size, CV_8U, buf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double temp1, temp2; func(src, mask, minVal ? minVal : &temp1, maxVal ? maxVal : &temp2, buf); @@ -456,6 +516,18 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::minMaxLoc::run, + 0/*::minMaxLoc::run*/, + 0/*::minMaxLoc::run*/, + 0/*::minMaxLoc::run*/, + ::minMaxLoc::run, + ::minMaxLoc::run, + 0/*::minMaxLoc::run*/, + }; +#else static const func_t funcs[] = { ::minMaxLoc::run, @@ -464,8 +536,9 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ::minMaxLoc::run, ::minMaxLoc::run, ::minMaxLoc::run, - ::minMaxLoc::run + ::minMaxLoc::run, }; +#endif CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); @@ -482,6 +555,8 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double temp1, temp2; Point temp3, temp4; @@ -508,6 +583,18 @@ int cv::gpu::countNonZero(const GpuMat& src) int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) { typedef int (*func_t)(const PtrStepSzb src, PtrStep buf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::countNonZero::run, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + ::countNonZero::run, + 0/*::countNonZero::run*/, + }; +#else static const func_t funcs[] = { ::countNonZero::run, @@ -516,8 +603,9 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ::countNonZero::run, ::countNonZero::run, ::countNonZero::run, - ::countNonZero::run + ::countNonZero::run, }; +#endif CV_Assert(src.channels() == 1); @@ -532,6 +620,8 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ensureSizeIsEnough(buf_size, CV_8U, buf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); return func(src, buf); } @@ -562,6 +652,74 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int if (dim == 0) { typedef void (*func_t)(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + ::reduce::rows, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + ::reduce::rows, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + ::reduce::rows, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -571,7 +729,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -580,7 +738,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, 0/*::reduce::rows*/, - 0/*::reduce::rows*/ + 0/*::reduce::rows*/, }, { 0/*::reduce::rows*/, @@ -589,7 +747,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -598,7 +756,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ::reduce::rows, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -607,7 +765,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -616,7 +774,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -625,9 +783,10 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, 0/*::reduce::rows*/, - ::reduce::rows + ::reduce::rows, } }; +#endif const func_t func = funcs[src.depth()][dst.depth()]; @@ -639,6 +798,74 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int else { typedef void (*func_t)(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + ::reduce::cols, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + ::reduce::cols, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + ::reduce::cols, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -648,7 +875,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -657,7 +884,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, 0/*::reduce::cols*/, - 0/*::reduce::cols*/ + 0/*::reduce::cols*/, }, { 0/*::reduce::cols*/, @@ -666,7 +893,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -675,7 +902,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ::reduce::cols, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -684,7 +911,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -693,7 +920,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -702,9 +929,10 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, 0/*::reduce::cols*/, - ::reduce::cols + ::reduce::cols, } }; +#endif const func_t func = funcs[src.depth()][dst.depth()]; diff --git a/modules/gpu/src/pyramids.cpp b/modules/gpu/src/pyramids.cpp index 85fb99040c..b4d4676587 100644 --- a/modules/gpu/src/pyramids.cpp +++ b/modules/gpu/src/pyramids.cpp @@ -68,6 +68,17 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream) typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {pyrDown_gpu , 0 /*pyrDown_gpu*/ , pyrDown_gpu , pyrDown_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {pyrDown_gpu , 0 /*pyrDown_gpu*/ , pyrDown_gpu , pyrDown_gpu } + }; +#else static const func_t funcs[6][4] = { {pyrDown_gpu , 0 /*pyrDown_gpu*/ , pyrDown_gpu , pyrDown_gpu }, @@ -77,6 +88,7 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream) {0 /*pyrDown_gpu*/ , 0 /*pyrDown_gpu*/ , 0 /*pyrDown_gpu*/ , 0 /*pyrDown_gpu*/ }, {pyrDown_gpu , 0 /*pyrDown_gpu*/ , pyrDown_gpu , pyrDown_gpu } }; +#endif CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); @@ -106,6 +118,17 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream) typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {pyrUp_gpu , 0 /*pyrUp_gpu*/ , pyrUp_gpu , pyrUp_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {pyrUp_gpu , 0 /*pyrUp_gpu*/ , pyrUp_gpu , pyrUp_gpu } + }; +#else static const func_t funcs[6][4] = { {pyrUp_gpu , 0 /*pyrUp_gpu*/ , pyrUp_gpu , pyrUp_gpu }, @@ -115,6 +138,7 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream) {0 /*pyrUp_gpu*/ , 0 /*pyrUp_gpu*/ , 0 /*pyrUp_gpu*/ , 0 /*pyrUp_gpu*/ }, {pyrUp_gpu , 0 /*pyrUp_gpu*/ , pyrUp_gpu , pyrUp_gpu } }; +#endif CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); diff --git a/modules/gpu/src/remap.cpp b/modules/gpu/src/remap.cpp index 4b87286331..3e13c7285c 100644 --- a/modules/gpu/src/remap.cpp +++ b/modules/gpu/src/remap.cpp @@ -65,6 +65,17 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {remap_gpu , 0 /*remap_gpu*/ , remap_gpu , remap_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {remap_gpu , 0 /*remap_gpu*/ , remap_gpu , remap_gpu } + }; +#else static const func_t funcs[6][4] = { {remap_gpu , 0 /*remap_gpu*/ , remap_gpu , remap_gpu }, @@ -74,6 +85,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp {0 /*remap_gpu*/ , 0 /*remap_gpu*/ , 0 /*remap_gpu*/ , 0 /*remap_gpu*/ }, {remap_gpu , 0 /*remap_gpu*/ , remap_gpu , remap_gpu } }; +#endif CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size()); diff --git a/modules/gpu/src/resize.cpp b/modules/gpu/src/resize.cpp index e1b502672a..66a771668d 100644 --- a/modules/gpu/src/resize.cpp +++ b/modules/gpu/src/resize.cpp @@ -57,6 +57,18 @@ namespace cv { namespace gpu { namespace device void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& stream) { typedef void (*func_t)(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); + +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {device::resize , 0 /*device::resize*/ , device::resize , device::resize }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {device::resize , 0 /*device::resize*/ , device::resize , device::resize } + }; +#else static const func_t funcs[6][4] = { {device::resize , 0 /*device::resize*/ , device::resize , device::resize }, @@ -66,6 +78,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub {0 /*device::resize*/ , 0 /*device::resize*/ , 0 /*device::resize*/ , 0 /*device::resize*/ }, {device::resize , 0 /*device::resize*/ , device::resize , device::resize } }; +#endif CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA ); diff --git a/modules/gpu/src/warp.cpp b/modules/gpu/src/warp.cpp index 827d5219f1..c963235b72 100644 --- a/modules/gpu/src/warp.cpp +++ b/modules/gpu/src/warp.cpp @@ -277,6 +277,17 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {warpAffine_gpu , 0 /*warpAffine_gpu*/ , warpAffine_gpu , warpAffine_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {warpAffine_gpu , 0 /*warpAffine_gpu*/ , warpAffine_gpu , warpAffine_gpu } + }; +#else static const func_t funcs[6][4] = { {warpAffine_gpu , 0 /*warpAffine_gpu*/ , warpAffine_gpu , warpAffine_gpu }, @@ -286,6 +297,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz {0 /*warpAffine_gpu*/ , 0 /*warpAffine_gpu*/ , 0 /*warpAffine_gpu*/ , 0 /*warpAffine_gpu*/ }, {warpAffine_gpu , 0 /*warpAffine_gpu*/ , warpAffine_gpu , warpAffine_gpu } }; +#endif const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert(func != 0); @@ -415,6 +427,17 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[6][4] = + { + {warpPerspective_gpu , 0 /*warpPerspective_gpu*/ , warpPerspective_gpu , warpPerspective_gpu }, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {warpPerspective_gpu , 0 /*warpPerspective_gpu*/ , warpPerspective_gpu , warpPerspective_gpu } + }; +#else static const func_t funcs[6][4] = { {warpPerspective_gpu , 0 /*warpPerspective_gpu*/ , warpPerspective_gpu , warpPerspective_gpu }, @@ -424,6 +447,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size {0 /*warpPerspective_gpu*/ , 0 /*warpPerspective_gpu*/ , 0 /*warpPerspective_gpu*/ , 0 /*warpPerspective_gpu*/ }, {warpPerspective_gpu , 0 /*warpPerspective_gpu*/ , warpPerspective_gpu , warpPerspective_gpu } }; +#endif const func_t func = funcs[src.depth()][src.channels() - 1]; CV_Assert(func != 0); diff --git a/modules/gpu/test/test_color.cpp b/modules/gpu/test/test_color.cpp index 5720e0c9d0..6d4c8c5423 100644 --- a/modules/gpu/test/test_color.cpp +++ b/modules/gpu/test/test_color.cpp @@ -2285,11 +2285,19 @@ GPU_TEST_P(CvtColor, BayerGR2Gray) EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)), WHOLE_SUBMAT)); +#endif /////////////////////////////////////////////////////////////////////////////////////////////////////// // Demosaicing diff --git a/modules/gpu/test/test_copy_make_border.cpp b/modules/gpu/test/test_copy_make_border.cpp index 24a75c0235..b06f795411 100644 --- a/modules/gpu/test/test_copy_make_border.cpp +++ b/modules/gpu/test/test_copy_make_border.cpp @@ -87,6 +87,20 @@ GPU_TEST_P(CopyMakeBorder, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), + MatType(CV_8UC3), + MatType(CV_8UC4), + MatType(CV_32FC1), + MatType(CV_32FC3), + MatType(CV_32FC4)), + testing::Values(Border(1), Border(10), Border(50)), + ALL_BORDER_TYPES, + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -102,5 +116,6 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine( testing::Values(Border(1), Border(10), Border(50)), ALL_BORDER_TYPES, WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index b8b83ef10c..dae80c72df 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -1341,11 +1341,19 @@ GPU_TEST_P(Abs, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Sqr @@ -1381,6 +1389,13 @@ GPU_TEST_P(Sqr, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -1389,6 +1404,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine( MatDepth(CV_16S), MatDepth(CV_32F)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Sqrt @@ -1451,6 +1467,13 @@ GPU_TEST_P(Sqrt, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -1459,6 +1482,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine( MatDepth(CV_16S), MatDepth(CV_32F)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Log @@ -1521,6 +1545,13 @@ GPU_TEST_P(Log, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-6); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -1529,6 +1560,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( MatDepth(CV_16S), MatDepth(CV_32F)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Exp @@ -1601,6 +1633,13 @@ GPU_TEST_P(Exp, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-2); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -1609,6 +1648,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( MatDepth(CV_16S), MatDepth(CV_32F)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // Compare_Array @@ -1775,12 +1815,21 @@ GPU_TEST_P(Compare_Scalar, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Scalar, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + CmpCode::all(), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Scalar, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, TYPES(CV_8U, CV_64F, 1, 4), CmpCode::all(), WHOLE_SUBMAT)); +#endif ////////////////////////////////////////////////////////////////////////////// // Bitwise_Array @@ -1936,11 +1985,19 @@ GPU_TEST_P(Bitwise_Scalar, Xor) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Scalar, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U)), + testing::Values(Channels(1)))); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Scalar, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)), IMAGE_CHANNELS)); +#endif ////////////////////////////////////////////////////////////////////////////// // RShift @@ -2317,11 +2374,19 @@ GPU_TEST_P(Pow, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, ALL_DEPTH, WHOLE_SUBMAT)); +#endif ////////////////////////////////////////////////////////////////////////////// // AddWeighted @@ -2380,6 +2445,23 @@ GPU_TEST_P(AddWeighted, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core_1, AddWeighted, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U)), + testing::Values(MatDepth(CV_8U)), + testing::Values(MatDepth(CV_8U)), + WHOLE_SUBMAT)); + +INSTANTIATE_TEST_CASE_P(GPU_Core_2, AddWeighted, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_32F)), + testing::Values(MatDepth(CV_32F)), + testing::Values(MatDepth(CV_32F)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -2387,6 +2469,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine( ALL_DEPTH, ALL_DEPTH, WHOLE_SUBMAT)); +#endif ////////////////////////////////////////////////////////////////////////////// // GEMM @@ -2953,6 +3036,15 @@ GPU_TEST_P(Norm, Accuracy) EXPECT_NEAR(val_gold, val, depth < CV_32F ? 0.0 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Norm, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), + MatDepth(CV_32F)), + testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Norm, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -2964,6 +3056,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Norm, testing::Combine( MatDepth(CV_32F)), testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2), NormCode(cv::NORM_INF)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // normDiff @@ -3136,11 +3229,19 @@ GPU_TEST_P(Sum, Sqr) EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, TYPES(CV_8U, CV_64F, 1, 4), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // MinMax @@ -3513,11 +3614,19 @@ PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, Reduc type = CV_MAKE_TYPE(depth, channels); if (reduceOp == CV_REDUCE_MAX || reduceOp == CV_REDUCE_MIN) + { dst_depth = depth; + } +#ifndef OPENCV_TINY_GPU_MODULE else if (reduceOp == CV_REDUCE_SUM) + { dst_depth = depth == CV_8U ? CV_32S : depth < CV_64F ? CV_32F : depth; + } +#endif else + { dst_depth = depth < CV_32F ? CV_32F : depth; + } dst_type = CV_MAKE_TYPE(dst_depth, channels); } @@ -3553,6 +3662,16 @@ GPU_TEST_P(Reduce, Cols) EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 0.0 : 0.02); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), + MatDepth(CV_32F)), + ALL_CHANNELS, + ALL_REDUCE_CODES, + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -3564,6 +3683,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine( ALL_CHANNELS, ALL_REDUCE_CODES, WHOLE_SUBMAT)); +#endif ////////////////////////////////////////////////////////////////////////////// // Normalize diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpu/test/test_features2d.cpp index 697483657e..99d9b2e8f6 100644 --- a/modules/gpu/test/test_features2d.cpp +++ b/modules/gpu/test/test_features2d.cpp @@ -310,6 +310,7 @@ GPU_TEST_P(BruteForceMatcher, Match_Single) ASSERT_EQ(0, badCount); } +#ifndef OPENCV_TINY_GPU_MODULE GPU_TEST_P(BruteForceMatcher, Match_Collection) { cv::gpu::BFMatcher_GPU matcher(normCode); @@ -363,6 +364,7 @@ GPU_TEST_P(BruteForceMatcher, Match_Collection) ASSERT_EQ(0, badCount); } +#endif GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Single) { @@ -442,6 +444,7 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Single) ASSERT_EQ(0, badCount); } +#ifndef OPENCV_TINY_GPU_MODULE GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) { cv::gpu::BFMatcher_GPU matcher(normCode); @@ -565,6 +568,7 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) ASSERT_EQ(0, badCount); } +#endif GPU_TEST_P(BruteForceMatcher, RadiusMatch_Single) { @@ -615,6 +619,7 @@ GPU_TEST_P(BruteForceMatcher, RadiusMatch_Single) } } +#ifndef OPENCV_TINY_GPU_MODULE GPU_TEST_P(BruteForceMatcher, RadiusMatch_Collection) { cv::gpu::BFMatcher_GPU matcher(normCode); @@ -693,11 +698,20 @@ GPU_TEST_P(BruteForceMatcher, RadiusMatch_Collection) ASSERT_EQ(0, badCount); } } +#endif +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine( + ALL_DEVICES, + testing::Values(NormCode(cv::NORM_L2)), + testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304)), + testing::Values(UseMask(false), UseMask(true)))); +#else INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine( ALL_DEVICES, testing::Values(NormCode(cv::NORM_L1), NormCode(cv::NORM_L2)), testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304)), testing::Values(UseMask(false), UseMask(true)))); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp index cbb6db8a2a..cac3c70d79 100644 --- a/modules/gpu/test/test_filters.cpp +++ b/modules/gpu/test/test_filters.cpp @@ -164,6 +164,21 @@ GPU_TEST_P(Sobel, Accuracy) EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), + IMAGE_CHANNELS, + testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7))), + testing::Values(Deriv_X(0), Deriv_X(1), Deriv_X(2)), + testing::Values(Deriv_Y(0), Deriv_Y(1), Deriv_Y(2)), + testing::Values(BorderType(cv::BORDER_REFLECT101), + BorderType(cv::BORDER_REPLICATE), + BorderType(cv::BORDER_CONSTANT), + BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -177,6 +192,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine( BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +#endif ///////////////////////////////////////////////////////////////////////////////////////////////// // Scharr @@ -227,6 +243,20 @@ GPU_TEST_P(Scharr, Accuracy) EXPECT_MAT_NEAR(getInnerROI(dst_gold, cv::Size(3, 3)), getInnerROI(dst, cv::Size(3, 3)), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), + IMAGE_CHANNELS, + testing::Values(Deriv_X(0), Deriv_X(1)), + testing::Values(Deriv_Y(0), Deriv_Y(1)), + testing::Values(BorderType(cv::BORDER_REFLECT101), + BorderType(cv::BORDER_REPLICATE), + BorderType(cv::BORDER_CONSTANT), + BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -239,6 +269,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine( BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +#endif ///////////////////////////////////////////////////////////////////////////////////////////////// // GaussianBlur @@ -301,6 +332,21 @@ GPU_TEST_P(GaussianBlur, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), + IMAGE_CHANNELS, + testing::Values(KSize(cv::Size(3, 3)), + KSize(cv::Size(5, 5)), + KSize(cv::Size(7, 7))), + testing::Values(BorderType(cv::BORDER_REFLECT101), + BorderType(cv::BORDER_REPLICATE), + BorderType(cv::BORDER_CONSTANT), + BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -326,6 +372,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine( BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +#endif ///////////////////////////////////////////////////////////////////////////////////////////////// // Laplacian @@ -565,6 +612,16 @@ GPU_TEST_P(Filter2D, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)), + testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), + testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -573,5 +630,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine( testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index aa27bfe206..c6c0bf1868 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -357,11 +357,19 @@ GPU_TEST_P(Canny, Accuracy) } } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine( + ALL_DEVICES, + testing::Values(AppertureSize(3)), + testing::Values(L2gradient(false), L2gradient(true)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine( ALL_DEVICES, testing::Values(AppertureSize(3), AppertureSize(5)), testing::Values(L2gradient(false), L2gradient(true)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////////////////////////////// // MeanShift diff --git a/modules/gpu/test/test_pyramids.cpp b/modules/gpu/test/test_pyramids.cpp index 6b0540fc10..5ddecf49ac 100644 --- a/modules/gpu/test/test_pyramids.cpp +++ b/modules/gpu/test/test_pyramids.cpp @@ -80,11 +80,19 @@ GPU_TEST_P(PyrDown, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-4 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), WHOLE_SUBMAT)); +#endif //////////////////////////////////////////////////////// // pyrUp @@ -120,10 +128,18 @@ GPU_TEST_P(PyrUp, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-4 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_remap.cpp b/modules/gpu/test/test_remap.cpp index eb4b9ece85..cd0520070b 100644 --- a/modules/gpu/test/test_remap.cpp +++ b/modules/gpu/test/test_remap.cpp @@ -169,6 +169,15 @@ GPU_TEST_P(Remap, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-3 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -176,5 +185,6 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine( testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp index 25f0f0e2bb..99cbfec3b4 100644 --- a/modules/gpu/test/test_resize.cpp +++ b/modules/gpu/test/test_resize.cpp @@ -174,6 +174,15 @@ GPU_TEST_P(Resize, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(0.3, 0.5, 1.5, 2.0), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -181,6 +190,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine( testing::Values(0.3, 0.5, 1.5, 2.0), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), WHOLE_SUBMAT)); +#endif ///////////////// @@ -221,6 +231,15 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : src.depth() == CV_8U ? 4.0 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(0.3, 0.5), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -228,7 +247,17 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( testing::Values(0.3, 0.5), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)), WHOLE_SUBMAT)); +#endif +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(0.3, 0.5, 1.5, 2.0), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -236,5 +265,6 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine( testing::Values(0.3, 0.5, 1.5, 2.0), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_threshold.cpp b/modules/gpu/test/test_threshold.cpp index 52ebd7f592..cd06c17dd9 100644 --- a/modules/gpu/test/test_threshold.cpp +++ b/modules/gpu/test/test_threshold.cpp @@ -83,11 +83,20 @@ GPU_TEST_P(Threshold, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, 0.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Threshold, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)), + ThreshOp::all(), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Threshold, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)), ThreshOp::all(), WHOLE_SUBMAT)); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_warp_affine.cpp b/modules/gpu/test/test_warp_affine.cpp index 43bf0f6d9e..a20bbbeb4d 100644 --- a/modules/gpu/test/test_warp_affine.cpp +++ b/modules/gpu/test/test_warp_affine.cpp @@ -222,6 +222,16 @@ GPU_TEST_P(WarpAffine, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-1 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffine, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + DIRECT_INVERSE, + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffine, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -230,6 +240,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffine, testing::Combine( testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), WHOLE_SUBMAT)); +#endif /////////////////////////////////////////////////////////////////// // Test NPP @@ -271,10 +282,18 @@ GPU_TEST_P(WarpAffineNPP, Accuracy) EXPECT_MAT_SIMILAR(dst_gold, dst, 2e-2); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffineNPP, testing::Combine( + ALL_DEVICES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + DIRECT_INVERSE, + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)))); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffineNPP, testing::Combine( ALL_DEVICES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), DIRECT_INVERSE, testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)))); +#endif #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_warp_perspective.cpp b/modules/gpu/test/test_warp_perspective.cpp index d225e58b66..892704dd30 100644 --- a/modules/gpu/test/test_warp_perspective.cpp +++ b/modules/gpu/test/test_warp_perspective.cpp @@ -225,6 +225,16 @@ GPU_TEST_P(WarpPerspective, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-1 : 1.0); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspective, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + DIRECT_INVERSE, + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspective, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, @@ -233,6 +243,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspective, testing::Combine( testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), WHOLE_SUBMAT)); +#endif /////////////////////////////////////////////////////////////////// // Test NPP @@ -274,10 +285,18 @@ GPU_TEST_P(WarpPerspectiveNPP, Accuracy) EXPECT_MAT_SIMILAR(dst_gold, dst, 2e-2); } +#ifdef OPENCV_TINY_GPU_MODULE +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspectiveNPP, testing::Combine( + ALL_DEVICES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + DIRECT_INVERSE, + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)))); +#else INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspectiveNPP, testing::Combine( ALL_DEVICES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), DIRECT_INVERSE, testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)))); +#endif #endif // HAVE_CUDA diff --git a/modules/ts/include/opencv2/ts/gpu_perf.hpp b/modules/ts/include/opencv2/ts/gpu_perf.hpp index b7b73b7bc5..d74d7ea031 100644 --- a/modules/ts/include/opencv2/ts/gpu_perf.hpp +++ b/modules/ts/include/opencv2/ts/gpu_perf.hpp @@ -50,8 +50,13 @@ namespace perf { +#ifdef OPENCV_TINY_GPU_MODULE + #define ALL_BORDER_MODES testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT)) + #define ALL_INTERPOLATIONS testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)) +#else #define ALL_BORDER_MODES BorderMode::all() #define ALL_INTERPOLATIONS Interpolation::all() +#endif CV_ENUM(BorderMode, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP) CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA) diff --git a/modules/ts/include/opencv2/ts/gpu_test.hpp b/modules/ts/include/opencv2/ts/gpu_test.hpp index 943a3536ca..01737bc951 100644 --- a/modules/ts/include/opencv2/ts/gpu_test.hpp +++ b/modules/ts/include/opencv2/ts/gpu_test.hpp @@ -215,6 +215,12 @@ namespace cvtest using perf::MatDepth; +#ifdef OPENCV_TINY_GPU_MODULE + #define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)) + + #define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)), \ + std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F))) +#else #define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F)) #define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)), \ @@ -242,6 +248,7 @@ namespace cvtest std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \ \ std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F))) +#endif // Type @@ -318,7 +325,11 @@ namespace cvtest CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA) CV_ENUM(BorderType, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP) +#ifdef OPENCV_TINY_GPU_MODULE + #define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)) +#else #define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)) +#endif CV_FLAGS(WarpFlags, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, WARP_INVERSE_MAP)